Principal Component Analysis
<- function(data, scale=T, center=T) {
pca
<- data[complete.cases(data),]
data
if (center) mat <- apply(data, 2, function(x) x - mean(x)) else mat <- data
if (scale) for(i in 1:ncol(mat)) mat[,i] <- mat[,i] / sd(data[,i])
<- svd(mat)
decomp <- (decomp$d^2)/(nrow(mat)-1)
var <- decomp$v
loadings
colnames(loadings) <- paste0("PC", 1:ncol(loadings))
rownames(loadings) <- colnames(mat)
return(list(var=var, loadings=loadings))
}
Testing the function:
# Dataset of English constituency vote shares from UK GE2019
library(parlitools)
<- subset(bes_2019,
df subset=bes_2019$country=="England",
select=c("con_19", "lab_19", "ld_19", "brexit_19", "green_19", "other_19"))
<- apply(df, 2, function(x) ifelse(is.na(x), 0, x))
df
# With centering and rescaling
<- prcomp(df, scale.=T)
pc $sdev^2 pc
## [1] 2.372841e+00 1.121676e+00 1.022484e+00 8.585543e-01 6.244452e-01
## [6] 3.622418e-31
$rotation pc
## PC1 PC2 PC3 PC4 PC5 PC6
## con_19 -0.54332983 0.07627559 -0.05082277 -0.56942560 0.1384571 0.5941178
## lab_19 0.59823413 -0.07011487 -0.19959561 0.02259843 -0.4083210 0.6558389
## ld_19 -0.39181511 -0.36512555 0.04767122 0.74424233 0.1153703 0.3790577
## brexit_19 0.43609277 -0.25251232 0.24198884 -0.11374592 0.8065703 0.1549459
## green_19 0.04764488 0.75339583 -0.43749954 0.30622933 0.3617630 0.1186169
## other_19 0.03070171 0.47390033 0.83984620 0.12096531 -0.1391588 0.1874472
pca(df)
## $var
## [1] 2.372841e+00 1.121676e+00 1.022484e+00 8.585543e-01 6.244452e-01
## [6] 3.628914e-31
##
## $loadings
## PC1 PC2 PC3 PC4 PC5 PC6
## con_19 -0.54332983 0.07627559 -0.05082277 -0.56942560 0.1384571 -0.5941178
## lab_19 0.59823413 -0.07011487 -0.19959561 0.02259843 -0.4083210 -0.6558389
## ld_19 -0.39181511 -0.36512555 0.04767122 0.74424233 0.1153703 -0.3790577
## brexit_19 0.43609277 -0.25251232 0.24198884 -0.11374592 0.8065703 -0.1549459
## green_19 0.04764488 0.75339583 -0.43749954 0.30622933 0.3617630 -0.1186169
## other_19 0.03070171 0.47390033 0.83984620 0.12096531 -0.1391588 -0.1874472
# Without centering and rescaling
<- prcomp(df, center=F, scale.=F)
pc $sdev^2 pc
## [1] 3601.582679 528.759516 102.026004 26.105736 14.712369 9.887987
$rotation pc
## PC1 PC2 PC3 PC4 PC5
## con_19 -0.79601894 0.54348891 0.26225689 -0.02744832 0.02841558
## lab_19 -0.57199009 -0.81268134 -0.05235555 -0.01872792 -0.07167662
## ld_19 -0.18775368 0.19101255 -0.96262869 0.03485081 0.01576254
## brexit_19 -0.03580223 -0.08665429 0.01318959 0.10999656 0.93237245
## green_19 -0.04785633 -0.01272207 0.01544479 0.07000400 -0.34354616
## other_19 -0.01891353 0.00349730 0.03759933 0.99029363 -0.08040029
## PC6
## con_19 -0.02518186
## lab_19 -0.06439723
## ld_19 0.01207155
## brexit_19 0.33108554
## green_19 0.93508548
## other_19 -0.10521720
pca(df, center=F, scale=F)
## $var
## [1] 3601.582679 528.759516 102.026004 26.105736 14.712369 9.887987
##
## $loadings
## PC1 PC2 PC3 PC4 PC5
## con_19 -0.79601894 0.54348891 0.26225689 -0.02744832 0.02841558
## lab_19 -0.57199009 -0.81268134 -0.05235555 -0.01872792 -0.07167662
## ld_19 -0.18775368 0.19101255 -0.96262869 0.03485081 0.01576254
## brexit_19 -0.03580223 -0.08665429 0.01318959 0.10999656 0.93237245
## green_19 -0.04785633 -0.01272207 0.01544479 0.07000400 -0.34354616
## other_19 -0.01891353 0.00349730 0.03759933 0.99029363 -0.08040029
## PC6
## con_19 -0.02518186
## lab_19 -0.06439723
## ld_19 0.01207155
## brexit_19 0.33108554
## green_19 0.93508548
## other_19 -0.10521720