# BST 226 Discussion February 12, 2007 setwd("c:/Program Files/R/R-2.4.0/library/estrogen/extdata") library(Biobase) library(affy) library(affydata) library(hgu95av2cdf) library(vsn) library(hgu95av2) # Importing and Exploring Affymetrix Arrays # List *.celfiles in working directory list.celfiles() # Load one file Low10 <- ReadAffy(filename="low10-1.cel") Low10 # To load all files in working directory with phenotype data (i.e., information # on experiment generating each array pd <- read.phenoData("estrogen.txt", header=TRUE, row.names=1) pData(pd) EstArrays <-ReadAffy(filenames=rownames(pData(pd)),phenoData=pd, verbose=TRUE) EstArrays EstArrays[,1] class(EstArrays[,1]) #Look at elements of an array slotNames(EstArrays[,1]) help(AffyBatch) EstArrays[,1]@cdfName EstArrays[,1]@annotation EstArrays[,1]@nrow EstArrays[,1]@ncol 640*640 EstArrays[,1]@exprs pData(EstArrays[,1]@phenoData) # Returns vector of probeset IDs with ith entry corresponds to the ith row of the PM matrix # PM consists of probe matches probeNames(Low10) # Returns vector of genes that are included in the array geneNames(Low10) #To see intensities of probes in a given probeset pm(Low10, probeNames(Low10)[1]) # Graphical display of raw intensities # Can be used to assess differences between arrays boxplot(EstArrays) hist(log2(intensity(Low10))) image(Low10) par(mfrow=c(2,4)) MAplot(EstArrays) # Background adjustment Low10.bg.rma <- bg.correct(Low10, method="rma") # Can look at the before and after side by side split.screen(c(1,2)) screen(1) image(Low10) screen(2) image(Low10.bg.rma) # Can do all arrays at once Est.rma <- bg.correct(EstArrays, method="rma") # Normalization # Use several normalization procedures and compare Low10.constant <- normalize(Low10.bg.rma, method="constant") Low10.nl <- normalize(Low10.bg.rma, method="invariantset") Low10.quant <- normalize(Low10.bg.rma, method="quantiles") # Compare normalized with background adjusted split.screen(c(2,2)) screen(1) image(Low10.bg.rma, main="Background Adjusted Only") screen(2) image(Low10.constant, main="Scaling") screen(3) image(Low10.nl, main="Non-linear") screen(4) image(Low10.quant, main="Quantile") # Can do all arrays at once Est.norm <- normalize(Est.rma, method="constant") # Compare with preprocssed data boxplot(Est.norm) # Summarization Processed <- expresso(EstArrays, bgcorrect.method="rma", normalize.method="constant", summary.method="avgdiff", pmcorrect.method="pmonly") # Compare with preprocessed data boxplot(data.frame(log2(exprs(Processed)))) #Generate summary statistics for Array 1 summary(exprs(Processed[,1])) # Gene Filtering library(genefilter) # Recall experimental set up pData(Processed@phenoData) # First let's generate summary statistics for each array summary(exprs(Processed)) # Non-specific filter example # Create filter. Want genes with expression levels greater than 500 for both samples # Criterion for filter Big500 <- kOverA(2,500) # Create filter function Fn500 <- filterfun(Big500) # Apply filter function to matrices of expressions # Returns logical vector for each gene of whether it meets the criterion High500 <- genefilter(exprs(Processed[,3:4]), Fn500) # Number that meet criterion sum(High500) # List of genes (i.e., probesets) that meet criterion High500[High500] # Applying more than 1 non-specific filter at a time # Genes with expression levels greater than 6 on log2 scale and # with an IQR of at least 0.5 on log2 scale across samples # Looking at expressions at 10 hours for low and high estrogen f1 <- kOverA(2, 10) f2 <- function(x) (IQR(x)>0.5) Filter <- filterfun(f1, f2) TheGenes <- genefilter(log2(exprs(Processed[,1:4])), Filter) # Specific Filter Example # Compare expression levels between high and low estrogen at both times combined # Simple t-test with significance at 0.001 # Create filter criterion tf1 <- ttest(Processed$estrogen, 0.001) tFilter <- filterfun(tf1) tGenes <- genefilter(exprs(Processed), tFilter) sum(tGenes)