# Principal Component Analysis and Factor Analysis in R
# Copyright 2013 by Ani Katchova
mydata<- read.csv("C:/Econometrics/Data/pca_gsp.csv")
attach(mydata)
# Define variables
X <- cbind(Ag, Mining, Constr, Manuf, Manuf_nd, Transp, Comm, Energy, TradeW, TradeR,
RE, Services, Govt)
# Descriptive statistics
summary(X)
cor(X)
# Principal component analysis
pca1 <- princomp(X, scores=TRUE, cor=TRUE)
summary(pca1)
# Loadings of principal components
loadings(pca1)
#pca1$loadings
# Scree plot of eigenvalues
plot(pca1)
screeplot(pca1, type="line", main="Scree Plot")
# Biplot of score variables
biplot(pca1)
# Scores of the components
pca1$scores[1:10,]
# Rotation
#varimax(pca1$rotation)
#promax(pca1$rotation)
# Factor analysis - different results from other softwares and no rotation
fa1 <- factanal(X, factor=3)
fa1
fa2 <- factanal(X, factor=3, rotation="varimax")
fa2
fa3 <- factanal(X, factors=3, rotation="varimax", scores="regression")
fa3
> # Principal Component Analysis and Factor Analysis in R
> # Copyright 2013 by Ani Katchova
>
> mydata<- read.csv("C:/Econometrics/Data/pca_gsp.csv")
> attach(mydata)
>
> # Define variables
> X <- cbind(Ag, Mining, Constr, Manuf, Manuf_nd, Transp, Comm, Energy, TradeW,
TradeR, RE, Services, Govt)
>
> # Descriptive statistics
> summary(X)
Ag Mining Constr Manuf Manuf_nd
Min. : 0.500 Min. : 0.000 Min. :2.900 Min. : 0.800 Min. : 1.700
1st Qu.: 1.025 1st Qu.: 0.200 1st Qu.:3.825 1st Qu.: 6.250 1st Qu.: 4.500
Median : 1.800 Median : 0.450 Median :4.200 Median :10.400 Median : 7.150
Mean : 2.480 Mean : 2.624 Mean :4.338 Mean : 9.784 Mean : 7.696
3rd Qu.: 2.525 3rd Qu.: 1.650 3rd Qu.:4.675 3rd Qu.:12.375 3rd Qu.:10.500
Max. :10.600 Max. :31.600 Max. :8.400 Max. :21.400 Max. :16.700
Transp Comm Energy TradeW TradeR
Min. : 1.500 Min. :1.300 Min. :1.000 Min. :2.900 Min. : 6.000
1st Qu.: 2.650 1st Qu.:1.900 1st Qu.:2.500 1st Qu.:5.825 1st Qu.: 8.600
Median : 3.200 Median :2.100 Median :2.950 Median :6.300 Median : 8.900
Mean : 3.476 Mean :2.398 Mean :3.112 Mean :6.348 Mean : 9.002
3rd Qu.: 3.875 3rd Qu.:2.875 3rd Qu.:3.600 3rd Qu.:7.275 3rd Qu.: 9.850
Max. :12.100 Max. :5.700 Max. :7.500 Max. :9.100 Max. :11.500
RE Services Govt
Min. :10.40 Min. : 9.60 Min. : 9.00
1st Qu.:13.15 1st Qu.:16.15 1st Qu.:10.90
Median :16.20 Median :18.40 Median :12.25
Mean :17.09 Mean :18.71 Mean :12.93
3rd Qu.:19.15 3rd Qu.:20.77 3rd Qu.:14.55
Max. :35.40 Max. :32.30 Max. :21.30
> cor(X)
Ag Mining Constr Manuf Manuf_nd Transp
Ag 1.00000000 -0.06446456 0.08498040 0.03208436 -0.14533029 0.27917762
Mining -0.06446456 1.00000000 -0.02146761 -0.42367130 -0.13794386 0.61153483
Constr 0.08498040 -0.02146761 1.00000000 -0.12993364 -0.31780734 0.07516000
Manuf 0.03208436 -0.42367130 -0.12993364 1.00000000 0.20372851 -0.35693665
Manuf_nd -0.14533029 -0.13794386 -0.31780734 0.20372851 1.00000000 -0.17641840
Transp 0.27917762 0.61153483 0.07516000 -0.35693665 -0.17641840 1.00000000
Comm -0.18418380 -0.19271386 -0.02310018 -0.31738049 -0.09988080 -0.04911955
Energy 0.04325752 0.39044648 0.01300031 -0.05083012 0.07091362 -0.05572106
TradeW 0.24539204 -0.55305518 -0.08691544 0.27073259 0.03900687 -0.21353750
TradeR 0.09464267 -0.39599385 0.40113015 0.19462642 -0.12082255 -0.14778210
RE -0.30129373 -0.40633100 -0.25294548 -0.18205552 -0.13333291 -0.50348731
Services -0.32191590 -0.45971557 0.32364308 -0.15904326 -0.45771026 -0.42168031
Govt 0.11033873 0.23067866 0.18104989 -0.41051203 -0.23707649 0.42750613
Comm Energy TradeW TradeR RE Services
Ag -0.18418380 0.04325752 0.24539204 0.09464267 -0.30129373 -0.3219159
Mining -0.19271386 0.39044648 -0.55305518 -0.39599385 -0.40633100 -0.4597156
Constr -0.02310018 0.01300031 -0.08691544 0.40113015 -0.25294548 0.3236431
Manuf -0.31738049 -0.05083012 0.27073259 0.19462642 -0.18205552 -0.1590433
Manuf_nd -0.09988080 0.07091362 0.03900687 -0.12082255 -0.13333291 -0.4577103
Transp -0.04911955 -0.05572106 -0.21353750 -0.14778210 -0.50348731 -0.4216803
Comm 1.00000000 -0.16859364 0.33018068 0.12467627 0.11971090 0.3091459
Energy -0.16859364 1.00000000 -0.26707721 0.02953892 -0.37884901 -0.3138348
TradeW 0.33018068 -0.26707721 1.00000000 0.16636809 0.04049752 0.2390834
TradeR 0.12467627 0.02953892 0.16636809 1.00000000 -0.30862993 0.2021658
RE 0.11971090 -0.37884901 0.04049752 -0.30862993 1.00000000 0.5192887
Services 0.30914590 -0.31383479 0.23908339 0.20216576 0.51928874 1.0000000
Govt 0.19333215 0.04539830 -0.34283068 0.28658841 -0.35064424 -0.1795611
Govt
Ag 0.1103387
Mining 0.2306787
Constr 0.1810499
Manuf -0.4105120
Manuf_nd -0.2370765
Transp 0.4275061
Comm 0.1933322
Energy 0.0453983
TradeW -0.3428307
TradeR 0.2865884
RE -0.3506442
Services -0.1795611
Govt 1.0000000
>
> # Principal component analysis
> pca1 <- princomp(X, scores=TRUE, cor=TRUE)
> summary(pca1)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
Standard deviation 1.7987525 1.4954801 1.3999420 1.1663403 1.07583525 0.93184458
Proportion of Variance 0.2488854 0.1720354 0.1507567 0.1046423 0.08903242 0.06679495
Cumulative Proportion 0.2488854 0.4209209 0.5716776 0.6763199 0.76535232 0.83214726
Comp.7 Comp.8 Comp.9 Comp.10 Comp.11 Comp.12
Standard deviation 0.85116719 0.78471605 0.5641253 0.4851322 0.38943836 0.36945813
Proportion of Variance 0.05572966 0.04736764 0.0244798 0.0181041 0.01166633 0.01049995
Cumulative Proportion 0.88787692 0.93524456 0.9597244 0.9778285 0.98949478 0.99999473
Comp.13
Standard deviation 8.279806e-03
Proportion of Variance 5.273476e-06
Cumulative Proportion 1.000000e+00
>
> # Loadings of principal components
> loadings(pca1)
Loadings:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
Comp.11
Ag 0.135 -0.385 -0.373 0.411 0.245 -0.433 -0.277 0.152 -0.217
Mining 0.470 0.260 0.164 0.276 0.148 -0.116 -0.475
Constr 0.393 -0.257 0.350 0.196 0.370 -0.499 0.371 -0.141
Manuf -0.183 -0.376 -0.375 0.147 0.111 -0.198 0.151 0.500 0.387 0.138
Manuf_nd -0.459 -0.465 -0.217 0.102 -0.592 -0.102 0.142
Transp 0.418 0.147 -0.365 0.143 -0.169 0.302 -0.264 -0.108 0.507
Comm -0.152 0.316 -0.343 -0.550 0.267 0.145 0.101 0.436 -0.383
Energy 0.247 -0.138 0.416 -0.202 0.689 -0.199 0.116 0.282
TradeW -0.315 -0.290 -0.442 0.353 0.254 -0.253 0.455 -0.322
TradeR 0.261 -0.507 0.227 -0.252 -0.143 -0.146 0.100 -0.515 -0.432 -0.181
RE -0.363 0.447 0.173 -0.359 -0.106 -0.169 -0.127
Services -0.380 0.384 0.127 0.183 0.125 0.101 0.133 -0.222 0.204 0.458
Govt 0.289 0.369 -0.295 -0.306 -0.428 0.121 0.171 0.548
Comp.12 Comp.13
Ag -0.286 0.206
Mining -0.282 0.500
Constr 0.256
Manuf 0.406
Manuf_nd -0.122 0.338
Transp 0.407 0.144
Comm
Energy 0.286
TradeW 0.210 0.112
TradeR 0.106
RE 0.496 0.451
Services -0.460 0.320
Govt 0.238
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Proportion Var 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077
Cumulative Var 0.077 0.154 0.231 0.308 0.385 0.462 0.538 0.615 0.692 0.769
Comp.11 Comp.12 Comp.13
SS loadings 1.000 1.000 1.000
Proportion Var 0.077 0.077 0.077
Cumulative Var 0.846 0.923 1.000
> #pca1$loadings
>
> # Scree plot of eigenvalues
> plot(pca1)
> screeplot(pca1, type="line", main="Scree Plot")
>
> # Biplot of score variables
> biplot(pca1)
>
> # Scores of the components
> pca1$scores[1:10,]
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
[1,] 0.4896329 -0.2840153 -0.92052695 0.08785947 -1.7536053 -0.25729127 -0.3122104
[2,] 6.6852235 1.5422261 2.72534093 -2.09383081 0.9621524 -2.42578094 1.2416981
[3,] -0.7517456 1.4884587 -0.86440944 1.23947821 0.7013320 -0.40238064 0.2571765
[4,] 1.0000343 -1.2527044 -1.79705334 -0.15952525 -0.6286758 0.31143230 -0.2804304
[5,] -1.8143951 0.3083173 1.07282748 -0.62096975 0.5873437 -0.19317967 -0.3766092
[6,] -1.1231352 2.7966578 0.12662810 -0.83439457 -1.5916798 0.58129969 1.0428510
[7,] -2.4543289 -0.8838887 2.26111646 -0.17696795 1.0153673 0.09693667 -0.2158679
[8,] -0.9918322 -2.9538499 4.02639673 0.22036149 0.4781388 -1.24008960 -1.5630880
[9,] -1.5476850 2.2030680 -0.14284640 0.17108744 -0.1473670 0.46797314 -0.1884953
[10,] -0.9920176 0.2998176 -0.05468377 -2.18146390 -1.8755534 0.87740143 0.9428300
Comp.8 Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
[1,] -0.1332644 0.16656810 0.12841084 -0.14054526 -0.11795741 -0.0022081279
[2,] 0.8350926 -0.02638485 -0.03282346 0.60577408 0.42502678 0.0038008406
[3,] 0.1730944 0.13204988 -0.33229286 -0.67623158 0.53472823 -0.0060317043
[4,] -0.2515513 -0.26955035 -0.91416815 0.62811995 0.03117104 0.0028593823
[5,] 0.6847873 -0.60181782 -0.15025628 -0.05043902 -0.57785734 -0.0072707745
[6,] 0.1619501 1.59879806 -1.52538907 -0.02066718 -0.20311745 -0.0025178811
[7,] 0.5350483 0.06689563 -0.09648740 -0.09564494 0.11018733 0.0001849203
[8,] -2.3471516 0.38662079 -0.46928579 -0.24839536 0.84153331 -0.0004708541
[9,] 0.0521831 -1.26973408 -0.64521367 -0.42080320 0.29586008 -0.0079586042
[10,] -0.1629925 0.11552867 -0.08515134 -0.14312824 0.31799226 -0.0020137002
>
> # Rotation
> #varimax(pca1$rotation)
> #promax(pca1$rotation)
>
>
> # Factor analysis - different results from other softwares and no rotation
> fa1 <- factanal(X, factor=3)
> fa1
Call:
factanal(x = X, factors = 3)
Uniquenesses:
Ag Mining Constr Manuf Manuf_nd Transp Comm Energy TradeW
0.863 0.005 0.550 0.518 0.533 0.536 0.874 0.782 0.653
TradeR RE Services Govt
0.420 0.005 0.284 0.755
Loadings:
Factor1 Factor2 Factor3
Ag 0.154 0.335
Mining 0.825 -0.551 -0.108
Constr 0.116 0.656
Manuf -0.117 0.641 -0.238
Manuf_nd 0.401 -0.552
Transp 0.659 -0.144
Comm -0.198 0.294
Energy 0.461
TradeW -0.344 0.471
TradeR 0.527 0.547
RE -0.847 -0.498 -0.171
Services -0.613 -0.191 0.551
Govt 0.337 0.363
Factor1 Factor2 Factor3
SS loadings 2.747 1.799 1.674
Proportion Var 0.211 0.138 0.129
Cumulative Var 0.211 0.350 0.479
Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 393.94 on 42 degrees of freedom.
The p-value is 1.01e-58
>
> fa2 <- factanal(X, factor=3, rotation="varimax")
> fa2
Call:
factanal(x = X, factors = 3, rotation = "varimax")
Uniquenesses:
Ag Mining Constr Manuf Manuf_nd Transp Comm Energy TradeW
0.863 0.005 0.550 0.518 0.533 0.536 0.874 0.782 0.653
TradeR RE Services Govt
0.420 0.005 0.284 0.755
Loadings:
Factor1 Factor2 Factor3
Ag 0.154 0.335
Mining 0.825 -0.551 -0.108
Constr 0.116 0.656
Manuf -0.117 0.641 -0.238
Manuf_nd 0.401 -0.552
Transp 0.659 -0.144
Comm -0.198 0.294
Energy 0.461
TradeW -0.344 0.471
TradeR 0.527 0.547
RE -0.847 -0.498 -0.171
Services -0.613 -0.191 0.551
Govt 0.337 0.363
Factor1 Factor2 Factor3
SS loadings 2.747 1.799 1.674
Proportion Var 0.211 0.138 0.129
Cumulative Var 0.211 0.350 0.479
Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 393.94 on 42 degrees of freedom.
The p-value is 1.01e-58
>
> fa3 <- factanal(X, factors=3, rotation="varimax", scores="regression")
> fa3
Call:
factanal(x = X, factors = 3, scores = "regression", rotation = "varimax")
Uniquenesses:
Ag Mining Constr Manuf Manuf_nd Transp Comm Energy TradeW
0.863 0.005 0.550 0.518 0.533 0.536 0.874 0.782 0.653
TradeR RE Services Govt
0.420 0.005 0.284 0.755
Loadings:
Factor1 Factor2 Factor3
Ag 0.154 0.335
Mining 0.825 -0.551 -0.108
Constr 0.116 0.656
Manuf -0.117 0.641 -0.238
Manuf_nd 0.401 -0.552
Transp 0.659 -0.144
Comm -0.198 0.294
Energy 0.461
TradeW -0.344 0.471
TradeR 0.527 0.547
RE -0.847 -0.498 -0.171
Services -0.613 -0.191 0.551
Govt 0.337 0.363
Factor1 Factor2 Factor3
SS loadings 2.747 1.799 1.674
Proportion Var 0.211 0.138 0.129
Cumulative Var 0.211 0.350 0.479
Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 393.94 on 42 degrees of freedom.
The p-value is 1.01e-58