# Quantifying abdominal pigmentaiton # Alexander W. Shingleton # Last updated: October 26th, 2016 #This script can be used to estimate the number of repeat images that need to be taken at the begining of each session to effectivly control for session effects. #The script uses repeated measurements of the same tergites over five sessions to estimate variance due to session effects, differences between indivduals tergites and residual measurement error. require(lme4) require(data.table) dt<-Representative.Results sim.loop<-function(data,trait,sample_size,overlap,no_sess){ dt<-data trait <- deparse(substitute(trait)) #First we calculate the variance due to session effects, differences between tergites and measurement error sds<-as.data.frame(VarCorr(lmer(dt[,trait]~(1|session)+(1|id), data=dt,REML=T))) #We then use these values as the parameters for our simulation. mean<-mean(dt[,trait]) sd_session<-sds[2,5] sd_ind<-sds[1,5] sd_meas<-sds[3,5] sample_size<-sample_size overlap<-overlap # This is the number of specimens that are repeat-measured from one session to the next no_sess<-no_sess # This is the number of sessions # We now simulate a sample of specimens measured without measurement error or session effects. session1<-as.data.frame(rnorm(sample_size,mean,sd_ind)) names(session1)<-c("trait") # We then add both variation due to session effects and measurement error. session1a<-session1+rnorm(sample_size,0,sd_meas)+rnorm(1,0,sd_session) session1a$session<-rep("session1",length(session1a)) # We then select the specimens that will be repeat measured in the next session. repeatmeas1<-tail(session1, overlap) # We then generate a second session of data, again measured without measurement error or session effects. session2<-as.data.frame(rnorm(sample_size-overlap,mean,sd_ind)) names(session2)<-c("trait") # We then add to this second session the data from the specimens that are repeat-measured, without measurement error or session effects. session2<-rbind(repeatmeas1,session2) # We again add both variation due to session effects and measurement error. session2a<-session2+rnorm(sample_size,0,sd_meas)+rnorm(1,0,sd_session) session2a$session<-rep("session2",length(session2a)) # This process is repeated to generate five sessions of data. repeatmeas2<-tail(session2, overlap) session3<-as.data.frame(rnorm(sample_size-overlap,mean,sd_ind)) names(session3)<-c("trait") session3<-rbind(repeatmeas2,session3) session3a<-session3+rnorm(sample_size,0,sd_meas)+rnorm(1,0,sd_session) session3a$session<-rep("session3",length(session3a)) repeatmeas3<-tail(session3, overlap) session4<-as.data.frame(rnorm(sample_size-overlap,mean,sd_ind)) names(session4)<-c("trait") session4<-rbind(repeatmeas3,session4) session4a<-session4+rnorm(sample_size,0,sd_meas)+rnorm(1,0,sd_session) session4a$session<-rep("session4",length(session4a)) repeatmeas4<-tail(session4, overlap) session5<-as.data.frame(rnorm(sample_size-overlap,mean,sd_ind)) names(session5)<-c("trait") session5<-rbind(repeatmeas4,session5) session5a<-session5+rnorm(sample_size,0,sd_meas)+rnorm(1,0,sd_session) session5a$session<-rep("session5",length(session5a)) simdata<-rbind(session1a,session2a,session3a,session4a,session5a) # We now have a complete simulated data set, which we can analyze. # First we subset the data by session. session1<-subset(simdata,session=="session1") session2<-subset(simdata,session=="session2") session3<-subset(simdata,session=="session3") session4<-subset(simdata,session=="session4") session5<-subset(simdata,session=="session5") # We then select the repeat-measured specimens from session 1 and session 2 and calculate their mean difference. session1tail<-tail(session1,overlap) session2head<-head(session2,overlap) mdiff<-mean(session1tail$trait-session2head$trait) # This mean difference is used to adjust the data from session 2. session2$trait<-session2$trait + mdiff # This process is repeated for the remaining sessions. session2tail<-tail(session2,overlap) session3head<-head(session3,overlap) mdiff<-mean(session2tail$trait-session3head$trait) session3$trait<-session3$trait + mdiff session3tail<-tail(session3,overlap) session4head<-head(session4,overlap) mdiff<-mean(session3tail$trait-session4head$trait) session4$trait<-session4$trait + mdiff session4tail<-tail(session4,overlap) session5head<-head(session5,overlap) mdiff<-mean(session4tail$trait-session5head$trait) session5$trait<-session5$trait + mdiff simdata.adj<-rbind(session1,session2,session3,session4,session5) names(simdata.adj)<-c("trait.adj") # We now have our corrected data, which we add back to our simulated data set. simdata<-cbind(simdata,simdata.adj$trait.adj) names(simdata)<-c("trait","session","trait.adj") # We now test whether session effects are eliminated after the correction. # Because lmer models need to have random factors, we add a random dummy variable to allow lmer comparison simdata$dummy <- sample(LETTERS[1:2],nrow(simdata), replace=TRUE ) raw<-lmer(trait~(1|dummy),data=simdata,REML=F) raw2<-lmer(trait~(1|dummy)+(1|session), data=simdata, REML=F) adj<-lmer(trait.adj~(1|dummy),data=simdata,REML=F) adj2<-lmer(trait.adj~(1|dummy)+(1|session), data=simdata,REML=F) return(list(anova(raw,raw2),anova(adj,adj2))) } sim.loop(dt,Pmax,50,15,5) # replicate same iteration, many times require(plyr) P.values<-replicate(100,sim.loop(dt,Pmax,50,20,5)[[2]]$`Pr(>Chisq)`[2]) hist(P.values) sum(P.values<0.05) # loop through different overlap sizes ldply(1:50,function(x) mean(replicate(100,sim.loop(dt,100,x,5)[[2]]$`Pr(>F)`[1])))