# Two-way factorial ANOVA # by Aaron Albin # Script began: 2/28/2012 ########################### # WHEN TO USE THIS SCRIPT # ########################### # Use this script if all of the following is true: # * You have measurements that you wish to compare between two sets of different non-numeric 'conditions'. # * Your data are stored in three separate columns - one with the measurements and two others with the labels for the 'conditions'. # An example: Investigating the relationship between college students' year in school and academic major (different sets of non-numeric 'conditions') on the average number of hours slept per night. # Notes: If you don't have approximately the same number of data points inside each combination of conditions (e.g. because of several NA data points), the results must be taken with care, since the statistical procedures underlying R's ANOVA function 'aov()' were designed for 'balanced designs'. ############### # SELECT DATA # ############### # Run this to have R tell you the names for each of the columns that it ended up with after importing the data frame: #=====[1]===== colnames(Dataframe) #============= # Now, pick out which of these columns has your number data for the ANOVA and which has the identifier labels. # Type the names of those columns between the double-quotes below. # It doesn't matter which is Label Column 1 and which is Label Column 2. #=====[2]===== Name_NumberColumn = "NumberColumn1" Name_LabelColumn1 = "LabelColumn1" Name_LabelColumn2 = "LabelColumn2" # Name_NumberColumn = "VOT1"; Name_LabelColumn1 = "CONSONAT"; Name_LabelColumn2 = "SYLLABIF" #============= # Now run this to extract out the specific columns data you selected. # The function 'as.factor()' ensures that your conditions will be treated as conditions (e.g. not numbers if they are just numbers (1,2,3,...) etc.) #=====[3]===== NumberData = Dataframe[,Name_NumberColumn] ConditionSet1 = as.factor(Dataframe[,Name_LabelColumn1]) ConditionSet2 = as.factor(Dataframe[,Name_LabelColumn2]) #============= ################ # RUN ANALYSES # ################ # In the output, it gives you a confidence interval. R defaults to a 95% confidence interval, as this is common practice in the literature, but you can adjust the percentage here if you like (to 99%, 99.9% etc.). # You must specify this in decimal form, i.e. 0.95, 0.99, 0.999, etc.: #=====[4]===== PercentageForConfidenceInterval = 0.95 #============= # Run these four lines of code to conduct the analysis: #=====[5]===== FittedModel = aov(NumberData ~ ConditionSet1 + ConditionSet2 + ConditionSet1:ConditionSet2, qr=TRUE, contrasts=NULL) # Fit a linear 'analysis of variance' model to the data ResultsSummary = summary.aov(object=FittedModel,intercept=FALSE,keep.zero.df=TRUE) # Calculate various summary statistics of the model just fitted ConditionMeans = model.tables(x=FittedModel,type="means",se=FALSE) # Calculate the average of the measurements within each specific condition TukeyHSD = TukeyHSD(x=FittedModel,ordered=FALSE, conf.level=PercentageForConfidenceInterval) # Calculate Tukey's Honest Significant Differences #============= ####################### # INSPECT THE RESULTS # ####################### # Run the following code for various descriptive statistics of the data that were input into the analyses: #=====[6]===== { cat("\n\nDESCRIPTIVE STATISTICS\n~~~~~~~~~~~~~~~~~~~~~~\nOverall mean across the entire dataset = ",ConditionMeans\$tables\$'Grand mean',"\n",sep="") nConditions_Set1 = length(ConditionMeans\$tables\$ConditionSet1) if(!is.null(ConditionMeans\$tables\$ConditionSet1)){ cat("\nMean for each condition in '",Name_LabelColumn1,"':\n",sep="")} for(EachCondition in 1:nConditions_Set1){ ExtractedNSet = ConditionMeans\$n if(identical(names(ExtractedNSet),c("ConditionSet1","ConditionSet2","ConditionSet1:ConditionSet2"))&!is.list(ExtractedNSet)){ExtractedN=ExtractedNSet["ConditionSet1"]}else{ExtractedN = ConditionMeans\$n\$ConditionSet1[EachCondition]} if(!is.na(ExtractedN)){CurrentN=ExtractedN}else{CurrentN=ConditionMeans\$n\$ConditionSet1[1]} cat(" * Condition '",names(ConditionMeans\$tables\$ConditionSet1)[EachCondition],"': ",ConditionMeans\$tables\$ConditionSet1[EachCondition]," (",CurrentN," observations)\n",sep="")} nConditions_Set2 = length(ConditionMeans\$tables\$ConditionSet2) if(!is.null(ConditionMeans\$tables\$ConditionSet2)){ cat("\nMean for each condition in '",Name_LabelColumn2,"':\n",sep="")} for(EachCondition in 1:nConditions_Set2){ ExtractedNSet = ConditionMeans\$n if(identical(names(ExtractedNSet),c("ConditionSet1","ConditionSet2","ConditionSet1:ConditionSet2"))&!is.list(ExtractedNSet)){ExtractedN=ExtractedNSet["ConditionSet2"]}else{ExtractedN = ConditionMeans\$n\$ConditionSet2[EachCondition]} if(!is.na(ExtractedN)){CurrentN=ExtractedN}else{CurrentN=ConditionMeans\$n\$ConditionSet2[1]} cat(" * Condition '",names(ConditionMeans\$tables\$ConditionSet2)[EachCondition],"': ",ConditionMeans\$tables\$ConditionSet2[EachCondition]," (",CurrentN," observations)\n",sep="")} if(!is.null(ConditionMeans\$tables\$'ConditionSet1:ConditionSet2')){ cat("\nMean for each combination of conditions:\n") for(EachCondition1 in 1:nConditions_Set1){for(EachCondition2 in 1:nConditions_Set2){ ExtractedNSet = ConditionMeans\$n if(identical(names(ExtractedNSet),c("ConditionSet1","ConditionSet2","ConditionSet1:ConditionSet2"))&!is.list(ExtractedNSet)){ExtractedN=ExtractedNSet["ConditionSet2"]}else{ExtractedN = ConditionMeans\$n\$'ConditionSet1:ConditionSet2'[EachCondition]} if(!is.na(ExtractedN)){CurrentN=ExtractedN}else{CurrentN=ConditionMeans\$n\$'ConditionSet1:ConditionSet2'[1]} cat(" * '",rownames(ConditionMeans\$tables\$'ConditionSet1:ConditionSet2')[EachCondition1],"' and '",colnames(ConditionMeans\$tables\$'ConditionSet1:ConditionSet2')[EachCondition2],"': ",ConditionMeans\$tables\$'ConditionSet1:ConditionSet2'[EachCondition1,EachCondition2]," (",CurrentN," observations)\n",sep="")}}} cat("\n\n") } #============= # Run the following for a description of the specific test/analysis that was run: #=====[7]===== cat("\n\nTEST THAT WAS RUN\n~~~~~~~~~~~~~~~~~\nA two-way ANOVA was run on '",Name_NumberColumn,"' as grouped by '", Name_LabelColumn1, "'\n and '",Name_LabelColumn2,"'.\n\n\n",sep="") #============= # Run the following for a description of the results obtained from the analysis: #=====[8]===== {cat("\n\nRESULTS\n~~~~~~~\n",sep="") nSet1Comparisons = nrow(TukeyHSD\$ConditionSet1) if(!is.null(nSet1Comparisons)){ cat("Overall across all conditions of '", Name_LabelColumn1,"':\n F(",ResultsSummary[[1]][1,"Df"],",",ResultsSummary[[1]][4,"Df"],") = ",ResultsSummary[[1]][1,"F value"],", p = ",ResultsSummary[[1]][1,"Pr(>F)"],"\n\nSpecific comparisons:\n",sep="") for(EachComparison in 1:nSet1Comparisons){cat(" * ",rownames(TukeyHSD\$ConditionSet1)[EachComparison],": Mean = ",TukeyHSD\$ConditionSet1[EachComparison,"diff"],", 95% confidence interval [",TukeyHSD\$ConditionSet1[EachComparison,"lwr"],", ",TukeyHSD\$ConditionSet1[EachComparison,"upr"],"], p = ",TukeyHSD\$ConditionSet1[EachComparison,"p adj"],"\n",sep="")} nSet2Comparisons = nrow(TukeyHSD\$ConditionSet2) if(!is.null(nSet2Comparisons)){ cat("\nOverall across all conditions of '", Name_LabelColumn2,"':\n F(",ResultsSummary[[1]][2,"Df"],",",ResultsSummary[[1]][4,"Df"],") = ",ResultsSummary[[1]][2,"F value"],", p = ",ResultsSummary[[1]][2,"Pr(>F)"],"\n\nSpecific comparisons:\n",sep="") for(EachComparison in 1:nSet2Comparisons){cat(" * ",rownames(TukeyHSD\$ConditionSet2)[EachComparison],": Mean = ",TukeyHSD\$ConditionSet2[EachComparison,"diff"],", ", 100*PercentageForConfidenceInterval, "% confidence interval [",TukeyHSD\$ConditionSet2[EachComparison,"lwr"],", ",TukeyHSD\$ConditionSet2[EachComparison,"upr"],"], p = ",TukeyHSD\$ConditionSet2[EachComparison,"p adj"],"\n",sep="")} if(!is.na(ResultsSummary[[1]][3,"Pr(>F)"])){cat("\nOverall across all interactions/combinations of conditions:\n F(",ResultsSummary[[1]][3,"Df"],",",ResultsSummary[[1]][4,"Df"],") = ",ResultsSummary[[1]][3,"F value"],", p = ",ResultsSummary[[1]][3,"Pr(>F)"],"\n\nSpecific interactions:\n",sep="")}} nInteractions = nrow(TukeyHSD\$'ConditionSet1:ConditionSet2') if(!is.null(nInteractions)){for(EachComparison in 1:nInteractions){cat(" * ",rownames(TukeyHSD\$'ConditionSet1:ConditionSet2')[EachComparison],": Mean = ",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"diff"],", 95% confidence interval [",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"lwr"],", ",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"upr"],"], p = ",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"p adj"],"\n",sep="")}} cat("\n")}} #============= # The following is R's default way of displaying the results of the various tests performed above: #=====[9]===== show(ConditionMeans) show(ResultsSummary) show(TukeyHSD) #=============