################################################################################ ################# MODELS FOR L2 AND PARALLEL TEXT PARAMETERS ################### ################################################################################ #Author: Chris Bentz #System: Linux Ubuntu 14.04 (64-bit) #R Version: 3.1.2 #------------------------------- LOAD PACKAGES --------------------------------- library(lme4) library(ggplot2) library(gridExtra) library(MuMIn) #------------------------------- LOAD DATA ------------------------------------ LDT_l2=read.csv(file.choose())#choose file from folder LDT_l2=LDT_l2[LDT_l2$iso!="epo",] #excluding Esperanto LDT_l2=LDT_l2[LDT_l2$iso!="san",] #excluding Sanskrit #------------------------------ MIXED EFFECTS MODEL------------------------------- #SIMPLE LINEAR MODEL LDT.lm=lm(LDTscaled~log(RatioL2),data=LDT_l2) summary(LDT.lm) AIC(LDT.lm) #MIXED-EFFECTS MODEL STEPWISE BUILDING LDT.lmer1=lmer(LDTscaled~(1|Stock),data=LDT_l2) summary(LDT.lmer1) AIC(LDT.lmer1) LDT.lmer2=lmer(LDTscaled~(1|Stock)+(1|Region),data=LDT_l2) summary(LDT.lmer2) AIC(LDT.lmer2) LDT.lmer2=lmer(LDTscaled~(1|Stock)+(1|Region),data=LDT_l2) summary(LDT.lmer2) AIC(LDT.lmer2) LDT.lmer3=lmer(LDTscaled~(1|Stock)+(1|Region)+(1|text),data=LDT_l2) summary(LDT.lmer3) AIC(LDT.lmer3) LDT.lmer3=lmer(LDTscaled~(1|Stock)+(1|Region)+(1|text),data=LDT_l2) summary(LDT.lmer3) AIC(LDT.lmer3) LDT.lmer4=lmer(LDTscaled~(1|Stock)+(1|Region)+(1|text)+(1|measure),data=LDT_l2) summary(LDT.lmer4) AIC(LDT.lmer4) LDT.lmer3=lmer(LDTscaled~(1|Stock)+(1|Region)+(1|text),data=LDT_l2) summary(LDT.lmer3) AIC(LDT.lmer3) LDT.lmer5=lmer(LDTscaled~(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(1|text),data=LDT_l2) summary(LDT.lmer5) AIC(LDT.lmer5) LDT.lmer5=lmer(LDTscaled~(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(1|text),data=LDT_l2) summary(LDT.lmer5) AIC(LDT.lmer5) LDT.lmer6=lmer(LDTscaled~(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text),data=LDT_l2) summary(LDT.lmer6) AIC(LDT.lmer6) LDT.lmer6=lmer(LDTscaled~(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text),data=LDT_l2) summary(LDT.lmer6) AIC(LDT.lmer6) LDT.lmer7=lmer(LDTscaled~(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer7) AIC(LDT.lmer7) LDT.lmer7=lmer(LDTscaled~(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer7) AIC(LDT.lmer7) LDT.lmer8=lmer(LDTscaled~(1+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer8) AIC(LDT.lmer8) LDT.lmer8=lmer(LDTscaled~(1+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer8) AIC(LDT.lmer8) LDT.lmer9=lmer(LDTscaled~(1+log(RatioL2)|Stock)+(1+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer9) AIC(LDT.lmer9) LDT.lmer8=lmer(LDTscaled~(1+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer8) AIC(LDT.lmer8) LDT.lmer10=lmer(LDTscaled~(1+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1+log(RatioL2)|text),data=LDT_l2) summary(LDT.lmer10) AIC(LDT.lmer10) LDT.lmer10=lmer(LDTscaled~(1+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1+log(RatioL2)|text),data=LDT_l2,REML=F) summary(LDT.lmer10) AIC(LDT.lmer10) LDT.lmer11=lmer(LDTscaled~log(RatioL2)+(1+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1+log(RatioL2)|text),data=LDT_l2,REML=F) summary(LDT.lmer11) AIC(LDT.lmer11) #OPTIMAL MODEL LDT.lmer.opt=lmer(LDTscaled~log(RatioL2)+(1|Stock)+(0+log(RatioL2)|Stock)+(1|Region)+(0+log(RatioL2)|Region)+(1|text)+(0+log(RatioL2)|text)+(1|measure)+(0+log(RatioL2)|measure)+(1|iso_639_3),data=LDT_l2) summary(LDT.lmer.opt) AIC(LDT.lmer.opt) #P-VALUE (according to BAAYEN 2008) 2*pt(-abs(-2.079),df=426-1) # Rsquared for GLMM with package MuMIn r.squaredGLMM(LDT.lmer.opt) #--------------------- GGPLOTS --------------------- #ALL frame_all=ggplot(LDT_l2,aes(x=log(RatioL2),y=LDTscaled)) Figure6=frame_all+ geom_point(size=2,alpha=0.5)+ geom_smooth(method="lm",alpha=0.3,size=1)+ labs(x="log(Ratio L2)",y="LDT(scaled)")+ theme(axis.title.x=element_text(size=17), axis.title.y=element_text(size=17), title=element_text(size=17))+ theme(axis.text.y=element_text(size=7), axis.text.x=element_text(size=7), axis.title.x=element_text(size=8), axis.title.y=element_text(size=8), legend.text=element_text(size=6), legend.title=element_text(size=8), legend.position=c(0.8,0.7)) Figure6 #ggsave("fileName", Figure6, width = 100, height = 70, units = "mm", dpi = 300, scale = 1) #BY STOCK frame_stock=ggplot(LDT_l2,aes(x=log(RatioL2),y=LDTscaled,colour=Stock_coarse)) Figure7=frame_stock+ geom_point(size=2,alpha=0.5)+geom_smooth(method="lm",alpha=0.1,size=1)+ labs(x="log(Ratio L2)",y="LDT")+ theme(axis.title.x=element_text(size=17), axis.title.y=element_text(size=17), title=element_text(size=17))+ scale_colour_discrete(name="Family")+ facet_wrap(~Stock_coarse)+ theme(axis.title.x=element_text(size=20), axis.title.y=element_text(size=20))+ theme_bw()+ theme(legend.position="none") Figure7 #ggsave("fileName", Figure7, width = 160, height = 130, units = "mm", dpi = 300, scale = 1) #BY REGION frame_region=ggplot(LDT_l2,aes(x=log(RatioL2),y=LDTscaled,colour=Region_coarse)) Figure8=frame_region+ geom_point(size=2,alpha=0.5)+ geom_smooth(method="lm",alpha=0.1,size=1)+ labs(x="log(Ratio L2)",y="LDT")+ theme(axis.title.x=element_text(size=17), axis.title.y=element_text(size=17), title=element_text(size=17))+ scale_colour_discrete(name="Region")+ facet_wrap(~Region_coarse)+ theme(axis.title.x=element_text(size=20), axis.title.y=element_text(size=20))+ theme_bw()+ theme(legend.position="none") Figure8 #ggsave("fileName", Figure8, width = 160, height = 130, units = "mm", dpi = 300, scale = 1) #BY MEASURE frame_measure=ggplot(LDT_l2,aes(x=log(RatioL2),y=LDTscaled,colour=measure)) Figure9=frame_measure+ geom_point(size=2,alpha=0.5)+ geom_smooth(method="lm",alpha=0.1,size=1)+ labs(x="log(Ratio L2)",y="LDT")+ theme(axis.title.x=element_text(size=17), axis.title.y=element_text(size=17), title=element_text(size=17))+ scale_colour_discrete(name="measure")+ facet_wrap(~measure)+ theme(axis.title.x=element_text(size=20), axis.title.y=element_text(size=20))+ theme_bw()+ theme(legend.position="none") Figure9 #ggsave("fileName", Figure9, width = 160, height = 80, units = "mm", dpi = 300, scale = 1) #BY TEXT frame_text=ggplot(LDT_l2,aes(x=log(RatioL2),y=LDTscaled,colour=text)) Figure10=frame_text+ geom_point(size=2,alpha=0.5)+ geom_smooth(method="lm",alpha=0.1,size=1)+ labs(x="log(Ratio L2)",y="LDT")+ theme(axis.title.x=element_text(size=17), axis.title.y=element_text(size=17), title=element_text(size=17))+ scale_colour_discrete(name="text")+ facet_wrap(~text)+ theme(axis.title.x=element_text(size=20), axis.title.y=element_text(size=20))+ theme_bw()+ theme(legend.position="none") Figure10 #ggsave("fileName", Figure10, width = 160, height = 80, units = "mm", dpi = 300, scale = 1) #---------------------------- CHECKING MODEL ASSUMPTIONS ---------------------------------- #### SIMPLE LINEAR MODEL #### #LINEARITY frame_all=ggplot(LDT_l2,aes(x=log(RatioL2),y=LDTscaled)) plot_lin_simple=frame_all+ geom_point(size=4,alpha=0.5)+ geom_smooth(method="lm",alpha=0.2,size=1.25,colour="black")+ geom_smooth(method="loess",alpha=0.2,size=1.25,linetype=2,colour="red")+ labs(x="log(L2 ratio)",y="LDT (scaled)")+ theme(axis.title.x=element_text(size=17), axis.title.y=element_text(size=17), title=element_text(size=17)) plot_lin_simple #NORMALITY LDT.lm.residuals=resid(LDT.lm) LDT.lm.fitted=fitted(LDT.lm) LDT.lm.df=data.frame(LDT.lm.residuals,LDT.lm.fitted) plot_norm_simple=ggplot(LDT.lm.df,aes(x=LDT.lm.residuals))+ geom_histogram(aes(y=..density..),fill="grey",binwidth=0.1)+ geom_line(aes(y=..density..),stat='density',colour="red",size=1)+ stat_function(fun=dnorm,args=with(LDT.lm.df, c(mean=mean(LDT.lm.residuals), sd=sd(LDT.lm.residuals))), colour="blue",size=1)+ labs(x="Residuals",y="Density") plot_norm_simple #HETEROSCEDASTICITY plot_hsced_simple=ggplot(LDT.lm.df,aes(x=LDT.lm.fitted,y=LDT.lm.residuals))+ geom_point(size=4,alpha=0.5)+ geom_smooth(method="lm",alpha=0.2,size=1.25,colour="red",linetype=2)+ labs(x="Fitted",y="Residuals") plot_hsced_simple #### MIXED EFFECTS #### #NORMALITY LDT.lmer.opt.residuals=resid(LDT.lmer.opt) LDT.lmer.opt.fitted=fitted(LDT.lmer.opt) LDT.lmer.opt.df=data.frame(LDT.lmer.opt.residuals,LDT.lmer.opt.fitted) plot_norm_mixed=ggplot(LDT.lmer.opt.df,aes(x=LDT.lmer.opt.residuals))+ geom_histogram(aes(y=..density..),fill="grey",binwidth=0.1)+ geom_line(aes(y=..density..),stat='density',colour="red",size=1)+ stat_function(fun=dnorm,args=with(LDT.lmer.opt.df, c(mean=mean(LDT.lmer.opt.residuals), sd=sd(LDT.lmer.opt.residuals))), colour="blue",size=1)+ labs(x="Residuals",y="Density") plot_norm_mixed #HETEROSCEDASTICITY plot_hsced_mixed=ggplot(LDT.lmer.opt.df,aes(x=LDT.lmer.opt.fitted,y=LDT.lmer.opt.residuals))+ geom_point(size=4,alpha=0.5)+ geom_smooth(method="lm",alpha=0.2,size=1.25,colour="red",linetype=2)+ labs(x="Fitted",y="Residuals") plot_hsced_mixed