setwd("/Users/meghanbennett/Desktop") files = list.files() # Will list all your files #install.packages("lubridate") #install.packages("plyr") #install.packages("scales") library(lubridate) # load the lubridata package library(plyr) # load the plyr package. library(ggplot2) # load the plyr package. library(scales) name = basename(file.choose()) Fristymax = 0 Lastymax = 0 data = read.csv(name, header = TRUE) #reads the csv file "Index" into R" data$Date = as.character(data$Date) # the luridate needs the date changed from muneric to characters data$Date = mdy(data$Date) # format the dates data$Time = hms(data$Time) # format the time data$Hours = hour(data$Time) # extract the hour out of the Time column data$Minutes = minute(data$Time) # extract the minutes out of the Time column min = min(data$Date) #determine the earlest date in the vector data$Days = difftime(data$Date, min, units = "days") +1 # Generates the number of days of emergence starting at day 1 data$Days = as.numeric(data$Days) dataSort = data[order(data$Days, data$Time), ] # sort based on Day then Time dataSort$Duration = as.duration(dataSort$Time) # converts time (24 hours) in to seconds UniqueDays = unique(dataSort$Days) # makes a vector listing the days title <-unlist(strsplit(name, ".", fixed = TRUE)) # Removes the extension from the file name. titleB <-head(title, n=1)# Extract the file name from the vector TITLEB that contain both the file name and the extension daysqu = seq(1:max(UniqueDays)) # need this vector in case there are missing days in the data set numberofdays = max(UniqueDays) #Should not use length function, used filesthe max value function,there may be a missing day period = 600# 600 seconds in 10 minutes interval = seq(0, 86400, period)# 86400 seconds in 24 hours numberofintervals = length(interval) # detemine the total number of intervals totalDays = rep(daysqu, times = numberofintervals) # created a vector so that for each day mulipled #by the number of intervals. You need this vector to creat the TableLongForamt table totalDays = sort(totalDays) # sort the totaldays vector totalInterval = rep(interval, times = numberofdays) # created a vector so that for each interval mulipled #by the number of intervals. You need this vector to creat the TableLongForamt table #totalTime = rep(XXX, times = numberofdays) # created a vector so that for each interval mulipled TableLongFormat = data.frame(totalDays, totalInterval) # creat the table TableLong Format colnames(TableLongFormat) = c("Day", "Intervals") # Change columns names dataSort$Zones = ceiling(dataSort$Duration/period) # rounds up to the nearest whole integer # so to convert the seconds into discrete whole interger vaules Zones = interval/period # creat a vectors containing the list of Zones from 0 to max value dataSort$Zones = as.numeric(dataSort$Zones) #MedianDay = median(dataSort$Day) #MedianZones = median(dataSort$Zones) TableLongFormat$Zones = TableLongFormat$Intervals/period TableCounts = count(dataSort, c("Days", "Zones")) # Creat a table containing the sums of each zones by day colnames(TableCounts) = c("Day", "Zones", "Counts") # changes columns MergedTable = merge(TableLongFormat, TableCounts, all = TRUE, by = c("Day", "Zones"))# merge table B and TableLongFormat # using common columns MergedTable = subset(MergedTable, Zones > 0) # removed the Zones = 0 rows MergedTable$Hours = MergedTable$Intervals/3600 MergedTable$HoursWhole = floor(MergedTable$Hours) MergedTable$Minutes = (round(60*(MergedTable$Hours - MergedTable$HoursWhole), digits = 0)) MergedTable$Time = paste(MergedTable$HoursWhole, MergedTable$Minutes, sep = ":") Ramp = c("A", "B") Start = c(7*(60/(period/60)), 19*(60/(period/60))) End = c(8*(60/(period/60)), 20*(60/(period/60))) RampTable = data.frame(Ramp, Start, End) ggplot(MergedTable, aes(Zones, Day), main = titleB) + geom_point(shape = 1, aes(, size = Counts)) + scale_size_area(max_size = 20) + scale_x_discrete(limits = c(1, 24, 48, 72, 96, 120), label = c("00:00", "04:00", "08:00", "12:00", "16:00", "20:00"))+ scale_y_continuous(breaks = seq(from =1, to = numberofdays, by = 1), labels = seq(1: numberofdays)) + xlab("Time in hours") + ylab("Day") + #geom_hline(aes(yintercept = MedianDay, color = "red")) + #geom_vline(aes(xintercept = MedianZones, color = "red")) + geom_rect(aes(NULL, NULL, xmin = Start, xmax = End, fill = Ramp), ymin = 0, ymax = Fristymax, data = RampTable) + #geom_rect(aes(NULL, NULL, xmin = Start, xmax = End, fill = Ramp), ymin = 0, ymax = Lastymax, data = RampTable) + scale_fill_manual(values = alpha(c("blue", "blue"), 0.2), guide="none") + ggtitle(titleB) + theme(axis.line.x = element_line(color= "black", size = .5), axis.line.y = element_line(color="black", size = .5)) + theme(panel.background = element_rect(fill = "white")) + theme(axis.text.x = element_text(color = "black", face = "bold")) + theme(axis.title.x = element_text(color = "black", face = "bold")) + theme(axis.text.y = element_text(color = "black", face = "bold")) + theme(axis.title.y = element_text(color = "black", face = "bold")) # the warming Removed 2271 rows containing missing values (geom_point)" is just the NAs in the Counts column dont worry about it #write.csv(data, file ="data.csv")