%Candace Metoyer %Stat 135 Discussion %Chapter 8 Demos %Example 8.3 pg 439 %Summarizing sample variability with two sample principal components. %Scree Plot clc; clear; format compact format short g load T8_5.dat; x = T8_5; [n p] = size(x) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Compute summary statistics %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% xbar = mean(x)' for i = 1:p d(:,i) = x(:,i) - xbar(i)*ones(n,1); end for i = 1:p for j = 1:p S(i,j) = d(:,i)'*d(:,j)/(n-1); end end S %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Compute the coefficients for the principal components %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% [eigVEC, eigVAL] = eig(S); %The eigenvalues of S are stored in the diagonals of matrix "eigVAL" so %that lambda_i = eigVAL(i,i). %NOTE: The eigenvalues are listed in increasing order %We will reorder the lambdas so that they are in decreasing order. %Additionally, we must reorder the corresponding eigenvectors %METHOD 1: Directly reorder lambda(1) = eigVAL(5,5); lambda(2) = eigVAL(4,4); lambda(3) = eigVAL(3,3); lambda(4) = eigVAL(2,2); lambda(5) = eigVAL(1,1); ReigVEC(:,1) = eigVEC(:,5); ReigVEC(:,2) = eigVEC(:,4); ReigVEC(:,3) = eigVEC(:,3); ReigVEC(:,4) = eigVEC(:,2); ReigVEC(:,5) = eigVEC(:,1); lambda; ReigVEC; %METHOD 2: Use a for loop to reorder clear('lambda') clear('ReigVEC') lambda = zeros(p,1); ReigVEC = zeros(size(eigVEC)); j = 1; for i = p:-1:1 %Decrease from p to 1 in steps of 1 lambda(i) = eigVAL(j,j); ReigVEC(:,i) = eigVEC(:,j); %re-ordered eigenvector matrix j = j+1; end lambda ReigVEC %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Compute the Correlation Coefficients %for the first two sample principal %components %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Use the equation at the bottom of page 438 ehat1 = ReigVEC(:,1) ehat2 = ReigVEC(:,2) %For the first principal component, we have: for k = 1:p r(k,1) = ehat1(k)*sqrt(lambda(1))/sqrt(S(k,k)); end %For the second principal component, we have: for k = 1:p r(k,2) = ehat2(k)*sqrt(lambda(2))/sqrt(S(k,k)); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Compute the cumulative percentage of the total variance %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% for i = 1:p cpercent(i) = sum(lambda(1:i))/sum(lambda); end cpercent %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Construct the scree plot %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure %Set up the axes nicely (optional) minx = 0; maxx = p+0.5; miny = 0; maxy = ceil(max(lambda)); hold on axis([minx maxx miny maxy]) plot(1:p, lambda, '.-', 'MarkerSize', 20, 'LineWidth', 1.25) xlabel('i, eigenvalue number') text(-0.5,4,'{}_{^{\fontsize{10}\lambda_i}}^{\^}') title('Scree Plot for the Census-Tract Data') %The "text" command places a label at the selected coordinate (-0.5, 4) %In particular, this label is placed near the y-axis. %You can play with the location of this label by changing the coordinates.