AnĂ¡lisis de Cluster

Estudio R

Noviembre de 2017

Contenido

AĂ¡lisis de Cluster o agrupamiento

Objetivos del Cluster

Algunas aplicaciones

Medidas de similaridad

Algunas de las distancias utilizadas son:

Algunas de los coeficientes de asociaciĂ³n utilizados son:

Para las distancias se cumple que:

MĂ©todos de agrupamiento

Los métodos jerarquicos se clasifican en:

Algunos de los métodos jerarquicos aglomerativos son:

Ejemplo e implementaciĂ³n en R

Lectura de datos:

datos <- read.csv(file = "Gorriones.csv", dec = ",")
head(datos, n = 5)
##   Sexo Edad Sobrevivio LongitudTotal ExteAlas Peso LonPicoCabe LonHumero LonFemur LonTibTarso
## 1    m    a         SI           154      241 24.5        31.2     0.687    0.668       1.022
## 2    m    a         NO           165      240 26.5        31.0     0.738    0.704       1.095
## 3    m    a         NO           160      245 26.1        32.0     0.736    0.709       1.109
## 4    m    a         SI           160      252 26.9        30.8     0.736    0.709       1.180
## 5    m    a         SI           155      243 26.9        30.6     0.733    0.704       1.151
##   AncCraneo LonQuilla
## 1     0.587     0.830
## 2     0.606     0.847
## 3     0.611     0.842
## 4     0.602     0.841
## 5     0.602     0.846

Dendrograma (enlace completo o vecino mĂ¡s lejano)

datosstd <- data.frame(scale(datos[, 4:12]))
cluster1 <- hclust(dist(datosstd))
plot(cluster1)

Asignando grupos (del cluster1) a la base de datos inicial

datos$grupo <- cutree(cluster1, 5)
head(datos, n = 10)
##    Sexo Edad Sobrevivio LongitudTotal ExteAlas Peso LonPicoCabe LonHumero LonFemur LonTibTarso
## 1     m    a         SI           154      241 24.5        31.2     0.687    0.668       1.022
## 2     m    a         NO           165      240 26.5        31.0     0.738    0.704       1.095
## 3     m    a         NO           160      245 26.1        32.0     0.736    0.709       1.109
## 4     m    a         SI           160      252 26.9        30.8     0.736    0.709       1.180
## 5     m    a         SI           155      243 26.9        30.6     0.733    0.704       1.151
## 6     m    a         NO           161      249 25.6        32.3     0.743    0.718       1.128
## 7     m    a         SI           154      245 24.3        31.7     0.741    0.688       1.146
## 8     m    a         NO           162      246 25.9        32.3     0.738    0.709       1.135
## 9     m    a         SI           156      247 24.1        31.5     0.715    0.706       1.129
## 10    m    a         NO           163      250 25.5        32.5     0.752    0.731       1.197
##    AncCraneo LonQuilla grupo
## 1      0.587     0.830     1
## 2      0.606     0.847     2
## 3      0.611     0.842     3
## 4      0.602     0.841     3
## 5      0.602     0.846     2
## 6      0.602     0.828     3
## 7      0.584     0.839     2
## 8      0.607     0.869     3
## 9      0.575     0.821     2
## 10     0.623     0.888     4

Resumen numérico para cada grupo

lapply(split(datos[,4:12], datos$grupo), summary)
## $`1`
##  LongitudTotal      ExteAlas          Peso        LonPicoCabe      LonHumero         LonFemur     
##  Min.   :152.0   Min.   :230.0   Min.   :22.60   Min.   :30.10   Min.   :0.6590   Min.   :0.6530  
##  1st Qu.:153.0   1st Qu.:234.2   1st Qu.:23.10   1st Qu.:30.38   1st Qu.:0.6800   1st Qu.:0.6635  
##  Median :154.5   Median :237.0   Median :23.30   Median :30.55   Median :0.6860   Median :0.6725  
##  Mean   :154.3   Mean   :236.2   Mean   :23.61   Mean   :30.64   Mean   :0.6871   Mean   :0.6739  
##  3rd Qu.:155.2   3rd Qu.:238.2   3rd Qu.:24.52   3rd Qu.:30.98   3rd Qu.:0.6957   3rd Qu.:0.6837  
##  Max.   :157.0   Max.   :241.0   Max.   :24.70   Max.   :31.20   Max.   :0.7060   Max.   :0.7020  
##   LonTibTarso      AncCraneo        LonQuilla     
##  Min.   :1.011   Min.   :0.5510   Min.   :0.7340  
##  1st Qu.:1.037   1st Qu.:0.5840   1st Qu.:0.7698  
##  Median :1.045   Median :0.5875   Median :0.7770  
##  Mean   :1.069   Mean   :0.5847   Mean   :0.7805  
##  3rd Qu.:1.107   3rd Qu.:0.5905   3rd Qu.:0.7943  
##  Max.   :1.156   Max.   :0.5990   Max.   :0.8300  
## 
## $`2`
##  LongitudTotal      ExteAlas          Peso        LonPicoCabe      LonHumero         LonFemur     
##  Min.   :153.0   Min.   :235.0   Min.   :23.20   Min.   :30.30   Min.   :0.6890   Min.   :0.6620  
##  1st Qu.:156.0   1st Qu.:240.0   1st Qu.:24.20   1st Qu.:31.00   1st Qu.:0.7150   1st Qu.:0.6947  
##  Median :158.5   Median :244.0   Median :24.75   Median :31.40   Median :0.7260   Median :0.7050  
##  Mean   :158.6   Mean   :242.7   Mean   :24.88   Mean   :31.27   Mean   :0.7246   Mean   :0.7045  
##  3rd Qu.:161.0   3rd Qu.:246.0   3rd Qu.:25.55   3rd Qu.:31.50   3rd Qu.:0.7330   3rd Qu.:0.7130  
##  Max.   :166.0   Max.   :251.0   Max.   :27.50   Max.   :32.40   Max.   :0.7520   Max.   :0.7350  
##   LonTibTarso      AncCraneo        LonQuilla     
##  Min.   :1.073   Min.   :0.5750   Min.   :0.7810  
##  1st Qu.:1.104   1st Qu.:0.5907   1st Qu.:0.8027  
##  Median :1.123   Median :0.6000   Median :0.8305  
##  Mean   :1.120   Mean   :0.5991   Mean   :0.8288  
##  3rd Qu.:1.131   3rd Qu.:0.6082   3rd Qu.:0.8492  
##  Max.   :1.175   Max.   :0.6200   Max.   :0.8920  
## 
## $`3`
##  LongitudTotal      ExteAlas          Peso        LonPicoCabe      LonHumero         LonFemur     
##  Min.   :155.0   Min.   :239.0   Min.   :24.00   Min.   :30.80   Min.   :0.7090   Min.   :0.6990  
##  1st Qu.:158.0   1st Qu.:245.0   1st Qu.:25.25   1st Qu.:31.75   1st Qu.:0.7310   1st Qu.:0.7097  
##  Median :160.0   Median :247.0   Median :25.95   Median :32.05   Median :0.7390   Median :0.7170  
##  Mean   :159.8   Mean   :246.7   Mean   :25.80   Mean   :32.01   Mean   :0.7387   Mean   :0.7207  
##  3rd Qu.:162.0   3rd Qu.:248.0   3rd Qu.:26.30   3rd Qu.:32.30   3rd Qu.:0.7445   3rd Qu.:0.7332  
##  Max.   :165.0   Max.   :253.0   Max.   :27.60   Max.   :33.00   Max.   :0.7660   Max.   :0.7510  
##   LonTibTarso      AncCraneo        LonQuilla     
##  Min.   :1.102   Min.   :0.5890   Min.   :0.7870  
##  1st Qu.:1.133   1st Qu.:0.5970   1st Qu.:0.8225  
##  Median :1.149   Median :0.6045   Median :0.8405  
##  Mean   :1.148   Mean   :0.6044   Mean   :0.8469  
##  3rd Qu.:1.163   3rd Qu.:0.6092   3rd Qu.:0.8652  
##  Max.   :1.227   Max.   :0.6300   Max.   :0.9270  
## 
## $`4`
##  LongitudTotal      ExteAlas          Peso        LonPicoCabe      LonHumero         LonFemur     
##  Min.   :158.0   Min.   :245.0   Min.   :24.20   Min.   :31.40   Min.   :0.7060   Min.   :0.7110  
##  1st Qu.:161.0   1st Qu.:250.0   1st Qu.:26.20   1st Qu.:31.80   1st Qu.:0.7520   1st Qu.:0.7310  
##  Median :163.0   Median :251.5   Median :26.85   Median :32.00   Median :0.7585   Median :0.7405  
##  Mean   :162.8   Mean   :251.2   Mean   :27.03   Mean   :32.10   Mean   :0.7571   Mean   :0.7392  
##  3rd Qu.:165.0   3rd Qu.:253.0   3rd Qu.:27.85   3rd Qu.:32.38   3rd Qu.:0.7658   3rd Qu.:0.7488  
##  Max.   :167.0   Max.   :256.0   Max.   :31.00   Max.   :33.40   Max.   :0.7800   Max.   :0.7670  
##   LonTibTarso      AncCraneo        LonQuilla     
##  Min.   :1.120   Min.   :0.5880   Min.   :0.8300  
##  1st Qu.:1.153   1st Qu.:0.6070   1st Qu.:0.8572  
##  Median :1.175   Median :0.6150   Median :0.8765  
##  Mean   :1.171   Mean   :0.6164   Mean   :0.8755  
##  3rd Qu.:1.189   3rd Qu.:0.6285   3rd Qu.:0.8910  
##  Max.   :1.230   Max.   :0.6400   Max.   :0.9230  
## 
## $`5`
##  LongitudTotal      ExteAlas          Peso        LonPicoCabe      LonHumero         LonFemur     
##  Min.   :156.0   Min.   :236.0   Min.   :23.60   Min.   :29.80   Min.   :0.6900   Min.   :0.6660  
##  1st Qu.:158.2   1st Qu.:239.8   1st Qu.:23.98   1st Qu.:29.90   1st Qu.:0.7045   1st Qu.:0.6680  
##  Median :159.0   Median :244.5   Median :25.40   Median :30.25   Median :0.7100   Median :0.6720  
##  Mean   :159.0   Mean   :243.5   Mean   :25.17   Mean   :30.30   Mean   :0.7100   Mean   :0.6775  
##  3rd Qu.:159.8   3rd Qu.:247.0   3rd Qu.:26.07   3rd Qu.:30.68   3rd Qu.:0.7155   3rd Qu.:0.6820  
##  Max.   :162.0   Max.   :250.0   Max.   :26.80   Max.   :30.90   Max.   :0.7300   Max.   :0.7030  
##   LonTibTarso      AncCraneo        LonQuilla     
##  Min.   :1.067   Min.   :0.5630   Min.   :0.7490  
##  1st Qu.:1.082   1st Qu.:0.5763   1st Qu.:0.8117  
##  Median :1.091   Median :0.5835   Median :0.8200  
##  Mean   :1.088   Mean   :0.5828   Mean   :0.8235  
##  3rd Qu.:1.097   3rd Qu.:0.5893   3rd Qu.:0.8290  
##  Max.   :1.103   Max.   :0.6020   Max.   :0.9110

Componentes principales + Cluster

acp <- princomp(datosstd, cor = TRUE)
datos2 <- data.frame(datos, acp$scores[, c(1, 2, 3)])
head(datos2, n = 10)
##    Sexo Edad Sobrevivio LongitudTotal ExteAlas Peso LonPicoCabe LonHumero LonFemur LonTibTarso
## 1     m    a         SI           154      241 24.5        31.2     0.687    0.668       1.022
## 2     m    a         NO           165      240 26.5        31.0     0.738    0.704       1.095
## 3     m    a         NO           160      245 26.1        32.0     0.736    0.709       1.109
## 4     m    a         SI           160      252 26.9        30.8     0.736    0.709       1.180
## 5     m    a         SI           155      243 26.9        30.6     0.733    0.704       1.151
## 6     m    a         NO           161      249 25.6        32.3     0.743    0.718       1.128
## 7     m    a         SI           154      245 24.3        31.7     0.741    0.688       1.146
## 8     m    a         NO           162      246 25.9        32.3     0.738    0.709       1.135
## 9     m    a         SI           156      247 24.1        31.5     0.715    0.706       1.129
## 10    m    a         NO           163      250 25.5        32.5     0.752    0.731       1.197
##    AncCraneo LonQuilla grupo     Comp.1     Comp.2      Comp.3
## 1      0.587     0.830     1  3.8880757 -1.3522616  0.41603544
## 2      0.606     0.847     2  0.1575123 -1.4096787  0.40722779
## 3      0.611     0.842     3 -0.3388752 -0.4201252  0.77208305
## 4      0.602     0.841     3 -0.7918721 -0.3448695 -0.78499927
## 5      0.602     0.846     2  0.6448564  0.2402161 -0.09755562
## 6      0.602     0.828     3 -0.8439208 -0.0350315 -0.01554790
## 7      0.584     0.839     2  1.1956308  0.8519471 -0.96170268
## 8      0.607     0.869     3 -1.0570477 -0.5649736  0.29651557
## 9      0.575     0.821     2  1.6455690  0.4574013 -1.38797615
## 10     0.623     0.888     4 -2.9531850  0.3580806  0.29672513

Proyecciones en el plano con los Cluster

colores <- c("forestgreen", "blue")
simbolos <- c(15, 16, 17, 18, 20)
with(datos2, plot(Comp.1, Comp.2, col = colores[Sexo], pch = simbolos[grupo], 
                  xlab = "Componente principal 1", ylab = "Componente principal 2", 
                  main = "Grupos de aves sobre componentes principales (sobrevivencia[SI, NO])", 
                  cex = 1.7))
legend("topright", legend = 1:5, pch = simbolos, col = "black", ncol = 2, cex = 1.2)
legend("topleft", legend = c("Hembra", "Macho"), col = colores, cex = 1, lty =1, lwd =2)
abline(h = 0, col = "brown")
abline(v = 0, col = "brown")
arrows(0, 0, acp$loadings[, 1]*5, acp$loadings[, 2]*5, col = "red", lwd = 2)
text(acp$loadings[, 1]*5.2, acp$loadings[, 2]*5.2, row.names(acp$loadings), col = "red")
text(datos2$Comp.1, datos2$Comp.2, labels = datos2$Sobrevivio, pos = 4, cex = 0.6)

Sobrevivencia de aves por grupo

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
datos3 <- datos2 %>%
  group_by(grupo, Sexo, Sobrevivio) %>%
  count() %>%
  arrange(desc(n))

datos3
## # A tibble: 19 x 4
## # Groups:   grupo, Sexo, Sobrevivio [19]
##    grupo   Sexo Sobrevivio     n
##    <int> <fctr>     <fctr> <int>
##  1     3      m         SI    18
##  2     2      m         NO    14
##  3     2      m         SI    14
##  4     4      m         SI    14
##  5     2      f         SI    11
##  6     4      m         NO    10
##  7     2      f         NO     9
##  8     3      m         NO     9
##  9     3      f         NO     7
## 10     3      f         SI     6
## 11     1      f         NO     5
## 12     4      f         NO     4
## 13     1      m         SI     3
## 14     5      f         NO     3
## 15     1      f         SI     2
## 16     1      m         NO     2
## 17     4      f         SI     2
## 18     5      m         SI     2
## 19     5      m         NO     1