setwd("/Users/meghanbennett/Desktop")
files = list.files() # Will list all your files
#install.packages("lubridate")
#install.packages("plyr")
#install.packages("scales")
library(lubridate) # load the lubridata package
library(plyr) # load the plyr package. 
library(ggplot2) # load the plyr package.
library(scales)
name = basename(file.choose())
Fristymax = 0
Lastymax = 0
data = read.csv(name, header = TRUE) #reads the csv file "Index" into R"
data$Date = as.character(data$Date) # the luridate needs the date changed from muneric to characters
data$Date = mdy(data$Date) # format the dates
data$Time = hms(data$Time) # format the time
data$Hours = hour(data$Time) # extract the hour out of the Time column
data$Minutes = minute(data$Time) # extract the minutes out of the Time column
min = min(data$Date) #determine the earlest date in the vector
data$Days = difftime(data$Date, min, units = "days") +1 # Generates the number of days of emergence starting at day 1
data$Days = as.numeric(data$Days)
dataSort = data[order(data$Days, data$Time), ] # sort based on Day then Time
dataSort$Duration = as.duration(dataSort$Time) # converts time (24 hours) in to seconds
UniqueDays = unique(dataSort$Days) # makes a vector listing the days
title <-unlist(strsplit(name, ".", fixed = TRUE)) # Removes the extension from the file name.
titleB <-head(title, n=1)# Extract the file name from the vector TITLEB that contain both the file name and the extension
daysqu = seq(1:max(UniqueDays)) # need this vector in case there are missing days in the data set
numberofdays = max(UniqueDays) #Should not use length function, used filesthe  max value function,there may be a missing day
period = 600# 600 seconds in 10 minutes
interval = seq(0, 86400, period)# 86400 seconds in 24 hours
numberofintervals = length(interval) # detemine the total number of intervals
totalDays = rep(daysqu, times = numberofintervals) # created a vector so that for each day mulipled 
#by the number of intervals. You need this vector to creat the TableLongForamt table
totalDays = sort(totalDays) # sort the totaldays vector
totalInterval = rep(interval, times = numberofdays) # created a vector so that for each interval mulipled 
#by the number of intervals. You need this vector to creat the TableLongForamt table
#totalTime = rep(XXX, times = numberofdays) # created a vector so that for each interval mulipled 
TableLongFormat = data.frame(totalDays, totalInterval) # creat the table TableLong Format
colnames(TableLongFormat) = c("Day", "Intervals") # Change columns names
dataSort$Zones = ceiling(dataSort$Duration/period) # rounds up to the nearest whole integer 
# so to convert the seconds into discrete whole interger vaules 
Zones = interval/period # creat a vectors containing the list of Zones from 0 to max value
dataSort$Zones = as.numeric(dataSort$Zones)
#MedianDay = median(dataSort$Day)
#MedianZones = median(dataSort$Zones)
TableLongFormat$Zones = TableLongFormat$Intervals/period
TableCounts = count(dataSort, c("Days", "Zones")) # Creat a table containing the sums of each zones by day 
colnames(TableCounts) = c("Day", "Zones", "Counts") # changes columns
MergedTable = merge(TableLongFormat, TableCounts, all = TRUE, by = c("Day", "Zones"))# merge table B and TableLongFormat 
# using common columns
MergedTable = subset(MergedTable, Zones > 0) # removed the Zones = 0 rows
MergedTable$Hours = MergedTable$Intervals/3600
MergedTable$HoursWhole = floor(MergedTable$Hours)
MergedTable$Minutes = (round(60*(MergedTable$Hours - MergedTable$HoursWhole), digits = 0))
MergedTable$Time = paste(MergedTable$HoursWhole, MergedTable$Minutes, sep = ":")
Ramp = c("A", "B")
Start = c(7*(60/(period/60)), 19*(60/(period/60)))
End = c(8*(60/(period/60)), 20*(60/(period/60)))
RampTable = data.frame(Ramp, Start, End)
ggplot(MergedTable, aes(Zones, Day), main = titleB)  + 
  geom_point(shape = 1, aes(, size = Counts)) +  
  scale_size_area(max_size = 20) +
  scale_x_discrete(limits = c(1, 24, 48, 72, 96, 120),
                   label = c("00:00", "04:00", "08:00", "12:00", "16:00", "20:00"))+
  scale_y_continuous(breaks = seq(from =1, to = numberofdays, by = 1), 
                     labels = seq(1: numberofdays)) +
  xlab("Time in hours") +
  ylab("Day") +
  #geom_hline(aes(yintercept  = MedianDay, color = "red")) +
  #geom_vline(aes(xintercept = MedianZones, color = "red")) +
  geom_rect(aes(NULL, NULL, xmin = Start, xmax = End, fill = Ramp), ymin = 0, ymax = Fristymax, data = RampTable) +
  #geom_rect(aes(NULL, NULL, xmin = Start, xmax = End, fill = Ramp), ymin = 0, ymax = Lastymax, data = RampTable) +
  scale_fill_manual(values = alpha(c("blue", "blue"), 0.2), guide="none") +
  ggtitle(titleB) +
  theme(axis.line.x = element_line(color= "black", size = .5), axis.line.y = element_line(color="black", size = .5)) +
  theme(panel.background = element_rect(fill = "white")) +
  theme(axis.text.x = element_text(color = "black", face = "bold")) +
  theme(axis.title.x = element_text(color = "black", face = "bold")) +
  theme(axis.text.y = element_text(color = "black", face = "bold")) +
  theme(axis.title.y = element_text(color = "black", face = "bold"))
# the warming Removed 2271 rows containing missing values (geom_point)" is just the NAs in the Counts column dont worry about it
#write.csv(data, file ="data.csv")