Hi. I am new in R. I was trying to figure out how to incorporate categorical variables in PCA. prcomp and ePCA only compute numeric variables. But I want to analyze the countries as well. any help, please?
#dataset #592 obs. of 17 variables
head(UrbanAreaLog)
ID AREA GCPNT_LAT GCPNT_LON COUNTRY GR UC_NAME
1 2486 49 57.0410899168 9.92426287095 Denmark Northern Europe Aalborg
2 2574 96 5.10886858483 7.35086200977 Nigeria Western Africa Aba
3 1675 431 5.34956326151 -4.00269599617 Côte d'Ivoire Western Africa Abidjan
4 2565 337 9.06189891144 7.43349518867 Nigeria Western Africa Abuja
5 1910 846 5.62108507064 -0.215586874558 Ghana Western Africa Accra
6 4237 135 37.0034193536 35.2831339317 Turkey Western Asia Adana
H00_NBR H00_AREA B15 BUCAP15 E_GR_AV14 E_GR_AH14 E_GR_AT14 SDG_LUE9015
1 1 3.891820 3.333719 5.635509 -0.7241529 3.498985 3.890039 -0.8076205
2 1 4.418841 3.569795 3.390930 -1.1212652 3.019277 4.553592 -2.2733364
3 1 5.894403 5.441951 3.926418 -1.3018480 3.628108 6.056978 -1.3642453
4 1 4.718499 4.906370 4.534689 -1.0133702 4.246865 5.811337 -1.0336720
5 2 6.513230 6.204348 4.719880 -1.2254961 2.680334 6.732362 -1.4974861
6 1 4.836282 4.174412 4.478802 -1.4558008 2.454523 4.910071 6.3459896
SDG_OS15MX POP_DEN_15
1 4.212276 5.604330
2 4.143293 7.370860
3 3.837946 5.818301
4 4.092677 5.786284
5 3.725693 7.675128
6 3.948355 5.071417
pca method 1
UrbanArea_pca <- UrbanAreaLog %>%
filter(GR %in% c("Australia/New Zealand","Caribbean","Central America","Eastern Africa","Eastern Europe",
"Middle Africa","Northern Africa","Northern America","Northern Europe","South America",
"South-Central Asia","Southern Africa","Southern Europe","Western Africa","Western Asia",
"Western Europe")) %>%
dplyr::select(COUNTRY,UC_NAME,GR,ID,H00_AREA,B15,BUCAP15,E_GR_AV14,E_GR_AH14,E_GR_AT14,SDG_LUE9015,SDG_OS15MX,POP_DEN_15) %>%
unite("continent_country_city", c(COUNTRY,GR,UC_NAME,ID)) %>%
column_to_rownames("continent_country_city")
UrbanArea.pca2 <- prcomp(na.omit(UrbanArea_pca, scale=TRUE))
#the above script does work but it combines classes into one
pca method 2
UrbanF.pca <- epPCA(na.omit(UrbanAreaLog[-1:-8], graph=FALSE))
fviz_pca_ind(UrbanF.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind.sup = (UrbanAreaLog$UC_NAME), # color by groups
palette = c("rainbow"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Geographic Regions")
#this method 2 does not show classes by groups