# Two-way multiple analysis of variance (MANOVA) (i.e. with two predictors)
# by Aaron Albin
# Script began: 3/7/2012
###########################
# WHEN TO USE THIS SCRIPT #
###########################
# Use this script if all of the following is true:
# * You have multiple (i.e. two or more) sets of measurements that you wish to simultaneously compare between two different sets of non-numeric 'conditions'.
# * Your data are all stored in separate columns - two with the identifier labels and several more with the measurements.
# Example: You are investigating the impact of gender and profession (professor, construction worker, CEO, etc.) on average hours sleep per night and body weight.
# Note: If you don't have approximately the same number of data points inside each condition (e.g. because of several NA data points), the results must be taken with care, since the statistical procedures underlying R's ANOVA function 'aov()' were designed for 'balanced designs'.
###############
# SELECT DATA #
###############
# Run this to have R tell you the names for each of the columns that it ended up with after importing the data frame:
#=====[1]=====
colnames(Dataframe)
#=============
# Now, pick out which two of these columns have the different sets of identifier labels (the 'conditions') for the MANOVA. Type the name of those columns between the double-quotes below.
#=====[2]=====
Name_LabelColumn1 = "LabelColumn1"
Name_LabelColumn2 = "LabelColumn2"
# Name_LabelColumn1 = "SYLLABIF"; Name_LabelColumn2 = "CONSONAT"
#=============
# Below, list the names of the various columns that contain numbers for the analysis, separated by commas.
# You can have any number of them (as long as they are all separated by commas.
# Each of the names must be surrounded by double-quotes (").
# Here we put these inside the 'c()' ('combine') function, which stores multiple things inside one object in R.
#=====[3]=====
Names_NumberColumns = c("NumberColumn1","NumberColumn2")
#Names_NumberColumns = c("VOT1","ANS_B")
#=============
# Now run this to extract out the specific columns data you selected.
#=====[4]=====
NumberData = as.matrix( Dataframe[,c(Names_NumberColumns)] )
Conditions1 = Dataframe[,Name_LabelColumn1]
Conditions2 = Dataframe[,Name_LabelColumn2]
#=============
################
# RUN ANALYSES #
################
# Now, finally, run this code to fit a linear 'multiple analysis of variance' model to the data using this formula.
#=====[5]=====
FittedModel = aov(NumberData ~ Conditions1*Conditions2, qr=TRUE, contrasts=NULL)
#=============
# Calculate various summary statistics of the model just fitted.
# For advanced users: The line of code here calculates the Pillai–Bartlett statistic. See [5b], [5c] and [5d] below for three alternatives (the Wilks' Labmda, Hotelling-Lawley Trace, and Roy's Greatest Root statistics, respectively).
#=====[6a]=====
ResultsSummary = summary.manova(object=FittedModel, test="Pillai", intercept=FALSE, tol = 1e-7, keep.zero.df=TRUE)
#=============
#######################
# INSPECT THE RESULTS #
#######################
# Run the following code for a reformatted, easy-to-read representation of these various results, synthesized:
#=====[7]=====
cat("\n\nTEST THAT WAS RUN\n~~~~~~~~~~~~~~~~~\nUsing the grouping conditions in ", Name_LabelColumn1, " and ", Name_LabelColumn2, ",\n a two-way MANOVA was run on: ",paste(Names_NumberColumns,collapse=" and "),"\n \n\nRESULTS\n~~~~~~~\nMain effect of ", Name_LabelColumn1, ": F(",ResultsSummary$stats["Conditions1","Df"],",",ResultsSummary$stats["Residuals","Df"],") = ",ResultsSummary$stats["Conditions1","approx F"],", p = ",ResultsSummary$stats["Conditions1","Pr(>F)"],"\n\nMain effect of ", Name_LabelColumn2, ": F(",ResultsSummary$stats["Conditions2","Df"],",",ResultsSummary$stats["Residuals","Df"],") = ",ResultsSummary$stats["Conditions2","approx F"],", p = ",ResultsSummary$stats["Conditions2","Pr(>F)"],"\n\n",Name_LabelColumn1,"-",Name_LabelColumn2," interaction: F(",ResultsSummary$stats["Conditions1:Conditions2","Df"],",",ResultsSummary$stats["Residuals","Df"],") = ",ResultsSummary$stats["Conditions1:Conditions2","approx F"],", p = ",ResultsSummary$stats["Conditions1:Conditions2","Pr(>F)"],"\n\n\n",sep="")
#=============
# The following is R's default display of the output:
#=====[8]=====
show(ResultsSummary)
#=============
# All of the above code utilizes the Pillai–Bartlett Trace statistic, which is recommended by Hand, D. J. and Taylor, C. C. (1987). Multivariate Analysis of Variance and Repeated Measures. Chapman and Hall.
# Here are three other alternate ways of running the MANOVA based on different approximation methods. Run this instead of code line [6a] above, then run line [7] and/or [8] again.
# The Wilks' Labmda statistic ('popular in the literature')
#=====[6b]=====
ResultsSummary = summary.manova(object=FittedModel, test="Wilks", intercept=FALSE, tol = 1e-7, keep.zero.df=TRUE)
#=============
# The Hotelling-Lawley Trace statistic
#=====[6c]=====
ResultsSummary = summary.manova(object=FittedModel, test="Hotelling-Lawley", intercept=FALSE, tol = 1e-7, keep.zero.df=TRUE)
#=============
# (Upper bound of) Roy's Greatest Root statistic
#=====[6d]=====
ResultsSummary = summary.manova(object=FittedModel, test="Roy",intercept=FALSE, tol = 1e-7, keep.zero.df=TRUE)
#=============