library(MASS) #loads up some useful functions # To read data not located in your working directory. turtledata <- read.table("C:/Documents and Settings/cnmetoye/Desktop/turtledata.txt") X = turtledata x1 = turtledata[,1] x2 = turtledata[,2] x3 = turtledata[,3] #Compute the dimensions of the data matrix size = dim(X) n = size[1] p = size[2] #Compute the mean of each x variable Xbar[1] = mean(x1) Xbar[2] = mean(x2) Xbar[3] = mean(x3) #Obtain the deviation vectors dd1 = x1 - Xbar[1]*matrix(1, n, 1) dd2 = x2 - Xbar[2]*matrix(1, n, 1) dd3 = x3 - Xbar[3]*matrix(1, n, 1) #Compute the sample variance matrix, S #To take the transpose of a vector or matrix, use t() #To multiply two vectors, use %*% #First, create a p by p matrix, S S = matrix(0, p, p) S[1,1] = t(dd1)%*%dd1/(n-1) S[1,2] = t(dd1)%*%dd2/(n-1) S[1,3] = t(dd1)%*%dd3/(n-1) S[2,1] = S[1,2] S[2,2] = t(dd2)%*%dd2/(n-1) S[2,3] = t(dd2)%*%dd3/(n-1) S[3,1] = S[1,3] S[3,2] = S[2,3] S[3,3] = t(dd3)%*%dd3/(n-1) #Compute the generalized sample variance gsamplevariance = det(S) #Compute the total sample variance #The trace of a matrix is equal to the #sum of the eigenvalues #The function eigen(Sm) calculates #the eigenvalues and eigenvectors of a #symmetric matrix, Sm. #We only need the eigenvalues. evals = eigen(S)$values tsamplevariance = sum(evals) #Compute the sample correlation matrix, R R = matrix(0, p, p) R[1,1] = 1 R[1,2] = S[1,2]/sqrt(S[1,1]*S[2,2]) R[1,3] = S[1,3]/sqrt(S[1,1]*S[3,3]) R[2,1] = R[1,2] R[2,2] = 1 R[2,3] = S[2,3]/sqrt(S[2,2]*S[3,3]) R[3,1] = R[1,3] R[3,2] = R[2,3] R[3,3] = 1 #Find the eigenvalues and eigenvectors of R evalsR = eigen(R)$values evecsR = eigen(R)$vectors #Compute histograms for each variable hist(x1) hist(x2) hist(x3) #Construct normal probability plots par(pty="s") # arrange for a square figure region qqnorm(x1, main = "Normal Q-Q Plot for x1") qqline(x1) par(pty="s") # arrange for a square figure region qqnorm(x2, main = "Normal Q-Q Plot for x2") qqline(x2) par(pty="s") # arrange for a square figure region qqnorm(x3, main = "Normal Q-Q Plot for x3") qqline(x3) #Construct chi-square plot da = X #"da" is the data nr = dim(da)[1] nc = dim(da)[2] cm = matrix(colMeans(da),1,nc) #Obtain the sample means o = matrix(rep(1,nr),nr,1) dev = da-kronecker(o,cm) #Remove sample means from data s = S #Sample covariance matrix si = ginverse(S) dev = as.matrix(dev) #convert the data frame to matrix form d2 = sort(diag(dev%*%si%*%t(dev))) #Compute and sort the squared distance prob = (c(1:nr)-0.5)/nr q1 = qchisq(prob,nr) #Obtain quantiles of Chi-square dist. plot(q1,d2, main = "Chi-Square plot for the turtle data") #May add a straightline to the plot fit = lsfit(q1,d2) fitted = d2-fit$residuals lines(q1,fitted)