# Two-way repeated measures factorial ANOVA # by Aaron Albin # Script began: 4/1/2012 ########################### # WHEN TO USE THIS SCRIPT # ########################### # Use this script if all of the following is true: # * You have measurements that you wish to compare between two sets of different non-numeric 'conditions'. # * Multiple individual things (e.g. participants in an experiment) were measured over the course of data collection, and you have multiple measurements from each individual. # * Your data are stored in four separate columns - one with the measurements, two others with the labels for the 'conditions', and a fourth with some sort of identifier that keeps track of which measurement belongs to which individual. # An example: Investigating the relationship between college students' year in school and academic major (different sets of non-numeric 'conditions') on the number of hours slept every night over the course of a month. # Notes: If you don't have approximately the same number of data points inside each combination of conditions (e.g. because of several NA data points), the results must be taken with care, since the statistical procedures underlying R's ANOVA function 'aov()' were designed for 'balanced designs'. ############### # SELECT DATA # ############### # Run this to have R tell you the names for each of the columns that it ended up with after importing the data frame: #=====[1]===== colnames(Dataframe) #============= # Now, pick out which of these columns has your number data for the ANOVA and which has the identifier labels. # Type the names of those columns between the double-quotes below. # It doesn't matter which is Label Column 1 and which is Label Column 2. #=====[2]===== Name_NumberColumn = "NumberColumn" Name_LabelColumn1 = "LabelColumn1" Name_LabelColumn2 = "LabelColumn2" Name_SubjectColumn = "SubjectColumn" # Name_NumberColumn = "VOT1"; Name_LabelColumn1 = "CONSONAT"; Name_LabelColumn2 = "SYLLABIF"; Name_SubjectColumn = "SPKR" #============= # Now run this to extract out the specific columns data you selected. # The function 'as.factor()' ensures that your conditions will be treated as conditions (e.g. not numbers if they are subject numbers (1,2,3,...) etc.) #=====[3]===== NumberData = Dataframe[,Name_NumberColumn] ConditionSet1 = as.factor(Dataframe[,Name_LabelColumn1]) ConditionSet2 = as.factor(Dataframe[,Name_LabelColumn2]) Subjects = as.factor(Dataframe[,Name_SubjectColumn]) #============= ################ # RUN ANALYSES # ################ # In the output, it gives you a confidence interval. R defaults to a 95% confidence interval, as this is common practice in the literature, but you can adjust the percentage here if you like (to 99%, 99.9% etc.). # You must specify this in decimal form, i.e. 0.95, 0.99, 0.999, etc.: #=====[4]===== PercentageForConfidenceInterval = 0.95 #============= # Run these four lines of code to conduct the analysis: #=====[5]===== FittedModel_FullDataset=aov(NumberData ~ ConditionSet1*ConditionSet2 + Error(Subjects/(ConditionSet1*ConditionSet2)), qr=TRUE, contrasts=NULL) ResultsSummary = summary(object=FittedModel_FullDataset,intercept=FALSE,keep.zero.df=TRUE) AggregatedData = aggregate(NumberData, by=list(Subjects=Subjects,ConditionSet1=ConditionSet1,ConditionSet2=ConditionSet2), FUN=mean) FittedModel_AggregatedData=aov(x ~ ConditionSet1*ConditionSet2, qr=TRUE, contrasts=NULL, data=AggregatedData) ConditionMeans = model.tables(x=FittedModel_AggregatedData,type="means",se=FALSE) TukeyHSD = TukeyHSD(x=FittedModel_AggregatedData,ordered=FALSE, conf.level=PercentageForConfidenceInterval) #============= ####################### # INSPECT THE RESULTS # ####################### # Run the following code for various descriptive statistics of the data that were input into the analyses: #=====[6]===== {cat("\n\nDESCRIPTIVE STATISTICS\n~~~~~~~~~~~~~~~~~~~~~~\nOverall mean across the entire dataset = ",ConditionMeans\$tables\$'Grand mean',"\n",sep="") nConditions_Set1 = length(ConditionMeans\$tables\$ConditionSet1); if(!is.null(ConditionMeans\$tables\$ConditionSet1)){ cat("\nMean for each condition in '",Name_LabelColumn1,"':\n",sep=""); for(EachCondition in 1:nConditions_Set1){ ExtractedNSet = ConditionMeans\$n;if(identical(names(ExtractedNSet),c("ConditionSet1","ConditionSet2","ConditionSet1:ConditionSet2"))&!is.list(ExtractedNSet)){ExtractedN=ExtractedNSet["ConditionSet1"]}else{ExtractedN = ConditionMeans\$n\$ConditionSet1[EachCondition]} if(!is.na(ExtractedN)){CurrentN=ExtractedN}else{CurrentN=ConditionMeans\$n\$ConditionSet1[1]}; cat(" * Condition '",names(ConditionMeans\$tables\$ConditionSet1)[EachCondition],"': ",ConditionMeans\$tables\$ConditionSet1[EachCondition]," (",CurrentN," observations)\n",sep="")}; nConditions_Set2 = length(ConditionMeans\$tables\$ConditionSet2) } if(!is.null(ConditionMeans\$tables\$ConditionSet2)){ cat("\nMean for each condition in '",Name_LabelColumn2,"':\n",sep=""); for(EachCondition in 1:nConditions_Set2){ ExtractedNSet = ConditionMeans\$n;if(identical(names(ExtractedNSet),c("ConditionSet1","ConditionSet2","ConditionSet1:ConditionSet2"))&!is.list(ExtractedNSet)){ExtractedN=ExtractedNSet["ConditionSet1"]}else{ExtractedN = ConditionMeans\$n\$ConditionSet1[EachCondition]} if(!is.na(ExtractedN)){CurrentN=ExtractedN}else{CurrentN=ConditionMeans\$n\$ConditionSet2[1]}; cat(" * Condition '",names(ConditionMeans\$tables\$ConditionSet2)[EachCondition],"': ",ConditionMeans\$tables\$ConditionSet2[EachCondition]," (",CurrentN," observations)\n",sep="")}} if(!is.null(ConditionMeans\$tables\$'ConditionSet1:ConditionSet2')){ cat("\nMean for each combination of conditions:\n"); for(EachCondition1 in 1:nConditions_Set1){for(EachCondition2 in 1:nConditions_Set2){ ExtractedNSet = ConditionMeans\$n;if(identical(names(ExtractedNSet),c("ConditionSet1","ConditionSet2","ConditionSet1:ConditionSet2"))&!is.list(ExtractedNSet)){ExtractedN=ExtractedNSet["ConditionSet1:ConditionSet2"]}else{ConditionMeans\$n\$'ConditionSet1:ConditionSet2'[EachCondition1,EachCondition2]} cat(" * '",rownames(ConditionMeans\$tables\$'ConditionSet1:ConditionSet2')[EachCondition1],"' and '",colnames(ConditionMeans\$tables\$'ConditionSet1:ConditionSet2')[EachCondition2],"': ",ConditionMeans\$tables\$'ConditionSet1:ConditionSet2'[EachCondition1,EachCondition2]," (",ExtractedN," observations)\n",sep="")}}}; cat("\n\n")} #============= # Run the following for a description of the specific test/analysis that was run: #=====[7]===== cat("\n\nTEST THAT WAS RUN\n~~~~~~~~~~~~~~~~~\nA two-way factorial repeated measures ANOVA was run on '",Name_NumberColumn,"' as grouped by\n '", Name_LabelColumn1,"' and '", Name_LabelColumn2, "', with '",Name_SubjectColumn,"' used as the error term for\n individual subjects.\n\n\n",sep=""); #============= # Run the following lines of code for a description of the results obtained from the analysis: #=====[8]===== { cat("\n\nRESULTS\n~~~~~~~\nOverall across all conditions of '", Name_LabelColumn1,"':\n F(",ResultsSummary[["Error: Subjects:ConditionSet1"]][[1]]["ConditionSet1","Df"],",",ResultsSummary[["Error: Subjects:ConditionSet1"]][[1]]["Residuals","Df"],") = ",ResultsSummary[["Error: Subjects:ConditionSet1"]][[1]]["ConditionSet1","F value"],", p = ",ResultsSummary[["Error: Subjects:ConditionSet1"]][[1]]["ConditionSet1","Pr(>F)"],"\n\nSpecific comparisons:\n",sep=""); nSet1Comparisons = nrow(TukeyHSD\$ConditionSet1); for(EachComparison in 1:nSet1Comparisons){cat(" * ",rownames(TukeyHSD\$ConditionSet1)[EachComparison],": Mean = ",TukeyHSD\$ConditionSet1[EachComparison,"diff"],", ", 100*PercentageForConfidenceInterval, "% confidence interval [",TukeyHSD\$ConditionSet1[EachComparison,"lwr"],", ",TukeyHSD\$ConditionSet1[EachComparison,"upr"],"], p = ",TukeyHSD\$ConditionSet1[EachComparison,"p adj"],"\n",sep="")}; cat("\nOverall across all conditions of '", Name_LabelColumn2,"':\n F(",ResultsSummary[["Error: Subjects:ConditionSet2"]][[1]]["ConditionSet2","Df"],",",ResultsSummary[["Error: Subjects:ConditionSet2"]][[1]]["Residuals","Df"],") = ",ResultsSummary[["Error: Subjects:ConditionSet2"]][[1]]["ConditionSet2","F value"],", p = ",ResultsSummary[["Error: Subjects:ConditionSet2"]][[1]]["ConditionSet2","Pr(>F)"],"\n\nSpecific comparisons:\n",sep=""); nSet2Comparisons = nrow(TukeyHSD\$ConditionSet2); for(EachComparison in 1:nSet2Comparisons){cat(" * ",rownames(TukeyHSD\$ConditionSet2)[EachComparison],": Mean = ",TukeyHSD\$ConditionSet2[EachComparison,"diff"],", ", 100*PercentageForConfidenceInterval, "% confidence interval [",TukeyHSD\$ConditionSet2[EachComparison,"lwr"],", ",TukeyHSD\$ConditionSet2[EachComparison,"upr"],"], p = ",TukeyHSD\$ConditionSet2[EachComparison,"p adj"],"\n",sep="")}; if(!is.na(ResultsSummary[["Error: Subjects:ConditionSet1:ConditionSet2"]][[1]]["ConditionSet1:ConditionSet2","Pr(>F)"])){cat("\nOverall across all interactions/combinations of conditions:\n F(",ResultsSummary[["Error: Subjects:ConditionSet1:ConditionSet2"]][[1]]["ConditionSet1:ConditionSet2","Df"],",",ResultsSummary[["Error: Subjects:ConditionSet1:ConditionSet2"]][[1]]["Residuals","Df"],") = ",ResultsSummary[["Error: Subjects:ConditionSet1:ConditionSet2"]][[1]]["ConditionSet1:ConditionSet2","F value"],", p = ",ResultsSummary[["Error: Subjects:ConditionSet1:ConditionSet2"]][[1]]["ConditionSet1:ConditionSet2","Pr(>F)"],"\n\nSpecific interactions:\n",sep=""); nInteractions = nrow(TukeyHSD\$'ConditionSet1:ConditionSet2'); for(EachComparison in 1:nInteractions){cat(" * ",rownames(TukeyHSD\$'ConditionSet1:ConditionSet2')[EachComparison],": Mean = ",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"diff"],", ", 100*PercentageForConfidenceInterval, "% confidence interval [",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"lwr"],", ",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"upr"],"], p = ",TukeyHSD\$'ConditionSet1:ConditionSet2'[EachComparison,"p adj"],"\n",sep="")}; cat("\nBetween-subjects effect: F(",ResultsSummary[["Error: Subjects"]][[1]]["ConditionSet1","Df"],",",ResultsSummary[["Error: Subjects"]][[1]]["Residuals","Df"],") = ",ResultsSummary[["Error: Subjects"]][[1]]["ConditionSet1","F value"],", p = ",ResultsSummary[["Error: Subjects"]][[1]]["ConditionSet1","Pr(>F)"],"\n\n",sep="")}; cat("\n") } #============= # The following is R's default way of displaying the results of the various tests performed above: #=====[9]===== show(ConditionMeans) show(ResultsSummary) show(TukeyHSD) #=============