# One-way ANOVA
# by Aaron Albin
# Script began: 2/28/2012
###########################
# WHEN TO USE THIS SCRIPT #
###########################
# Use this script if all of the following is true:
# * You have measurements that you wish to compare between one set of different non-numeric 'conditions'.
# * Your data are stored in two separate columns - one with the measurements and another with the labels for the 'conditions'.
# An example: Investigating the relationship between college students' year in school (a set of non-numeric 'conditions') on the average number of hours slept per night.
# Note: If you don't have approximately the same number of data points inside each condition (e.g. because of several NA data points), the results must be taken with care, since the statistical procedures underlying R's ANOVA function 'aov()' were designed for 'balanced designs'.
###############
# SELECT DATA #
###############
# Run this to have R tell you the names for each of the columns that it ended up with after importing the data frame:
#=====[1]=====
colnames(Dataframe)
#=============
# Now, pick out which of these columns has your number data for the ANOVA and which has the identifier labels. Type the name of those columns between the double-quotes below.
#=====[2]=====
Name_NumberColumn = "NumberColumn1"
Name_LabelColumn = "LabelColumn1"
# Name_NumberColumn = "Replays_x2"; Name_LabelColumn = "voiceless"
#=============
# Now run this to extract out the specific columns data you selected.
# The function 'as.factor()' ensures that your conditions will be treated as conditions (e.g. not numbers if they are just numbers (1,2,3,...) etc.)
#=====[3]=====
NumberData = Dataframe[,Name_NumberColumn]
Conditions = as.factor(Dataframe[,Name_LabelColumn])
#=============
################
# RUN ANALYSES #
################
# In the output, it gives you a confidence interval. R defaults to a 95% confidence interval, as this is common practice in the literature, but you can adjust the percentage here if you like (to 99%, 99.9% etc.).
# You must specify this in decimal form, i.e. 0.95, 0.99, 0.999, etc.:
#=====[4]=====
PercentageForConfidenceInterval = 0.95
#=============
# Run these four lines of code to conduct the analysis:
#=====[5]=====
FittedModel = aov(NumberData ~ Conditions, qr=TRUE, contrasts=NULL) # fit a linear 'analysis of variance' model to the data
ResultsSummary = summary.aov(object=FittedModel,intercept=FALSE,keep.zero.df=TRUE) # Calculate various summary statistics of the model just fitted
ConditionMeans = model.tables(x=FittedModel,type="means",se=FALSE) # Calculate the average of the measurements within each specific condition
TukeyHSD = TukeyHSD(x=FittedModel,ordered=FALSE, conf.level=PercentageForConfidenceInterval) # Calculate Tukey's Honest Significant Differences
#=============
#######################
# INSPECT THE RESULTS #
#######################
# Run the following code for a reformatted, easy-to-read synthesis of the various results:
#=====[6]=====
cat("\n\nDESCRIPTIVE STATISTICS\n~~~~~~~~~~~~~~~~~~~~~~\nOverall mean across the entire dataset = ",ConditionMeans$tables$'Grand mean',"\n\nMean for each condition:\n",sep="");nConditions = length(ConditionMeans$tables$Conditions);for(EachCondition in 1:nConditions){ExtractedN = ConditionMeans$n$Conditions[EachCondition];if(!is.na(ExtractedN)){CurrentN=ExtractedN}else{CurrentN=ConditionMeans$n$Conditions[1]};cat(" * Condition '",names(ConditionMeans$tables$Conditions)[EachCondition],"': ",ConditionMeans$tables$Conditions[EachCondition]," (",CurrentN," observations)\n",sep="")};cat("\n\nTEST THAT WAS RUN\n~~~~~~~~~~~~~~~~~\nA one-way ANOVA was run on '",Name_NumberColumn,"' as grouped by '", Name_LabelColumn, "'.\n\n\nRESULTS\n~~~~~~~\nOverall across all conditions of '", Name_LabelColumn, "':\n F(",ResultsSummary[[1]]["Conditions","Df"],",",ResultsSummary[[1]]["Residuals","Df"],") = ",ResultsSummary[[1]]["Conditions","F value"],", p = ",ResultsSummary[[1]]["Conditions","Pr(>F)"],"\n\n",sep="");nComparisons = nrow(TukeyHSD$Conditions);for(EachComparison in 1:nComparisons){cat("Comparison '",rownames(TukeyHSD$Conditions)[EachComparison],"': Mean = ",TukeyHSD$Conditions[EachComparison,"diff"],", ", 100*PercentageForConfidenceInterval, "% confidence interval [",TukeyHSD$Conditions[EachComparison,"lwr"],", ",TukeyHSD$Conditions[EachComparison,"upr"],"], p = ",TukeyHSD$Conditions[EachComparison,"p adj"],"\n",sep="")};cat("\n")
#=============
# The following is R's default way of displaying the results of the various tests performed above:
#=====[7]=====
show(ConditionMeans)
show(ResultsSummary)
show(TukeyHSD)
#=============