#### 1. Setup #### # 1.1. Turn off scientific notation options(scipen=999) # 1.2. Load required packages library("Rfacebook") library("dplyr") library("igraph") library("openxlsx") # 1.3 Load retention and user data jfh.retention <- read.csv("~/Desktop/jfh.members.retention.csv", header=TRUE) jfh.users <- read.xlsx("~/Desktop/jfh.users.xlsx") jfh.users <- left_join(jfh.users, jfh.retention[,c(1,3)], by = c("ID" = "id")) #### 2. Get Facebook data #### # Note: due to changes Facebook made to its Graph API, all functions of the package now require a working app that has undergone App Review. # more details are available here: https://developers.facebook.com/docs/pages/ # 2.2. Create access token #fb_oauth <- fbOAuth(app_id = "app_id", app_secret = "app_secret", extended_permissions = TRUE) # 2.3. Get Facebook group posts data #jfh.posts <- getGroup(groupid, token = fb_oauth, n = 1000, since = "2015/01/01", until = "2015/11/05", feed = TRUE, api = NULL) #write.xlsx(jfh.posts, "~/Desktop/jfh.posts.xlsx") jfh.posts <- read.xlsx("~/Desktop/jfh.posts.xlsx") jfh.posts <- left_join(jfh.posts, jfh.users, by = c("from_name" = "Name")) # 2.4. Get Facebook group post likes data #getFBpostlikes <- function(x){ # postIDlist <- x # postslikes <- lapply(1:nrow(postIDlist), function(x) as.data.frame(getPost(postIDlist[x,], token=fb_oauth)$likes)) # postIDs <- lapply(1:nrow(postIDlist), function(x) data.frame(postid = rep(postIDlist[x,], nrow(postslikes[[x]])))) # postslikes <- lapply(1:nrow(postIDlist), function(x) cbind(postslikes[[x]], postIDs[[x]])) # postslikes <- do.call(rbind,postslikes) # postslikes$from_id <- paste("'", postslikes$from_id, sep = "") # return(postslikes) #} #postIDlist <- data.frame(posts.id = jfh.posts$id) #jfh.postlikes <- getFBpostlikes(postIDlist) #write.xlsx(jfh.postlikes, "~/Desktop/jfh.postlikes.xlsx") jfh.postlikes <- read.xlsx("~/Desktop/jfh.postlikes.xlsx") jfh.postlikes <- left_join(jfh.postlikes, jfh.posts[,c(7, 2, 4)], by = c("postid" = "id")) colnames(jfh.postlikes)[c(1, 4)] <- c("from_name", "to_name") jfh.postlikes <- left_join(jfh.postlikes, jfh.users, by = c("from_name" = "Name")) jfh.postlikes <- left_join(jfh.postlikes, jfh.users, by = c("to_name" = "Name")) colnames(jfh.postlikes)[6:11] <- c("from.anonid", "from.role", "from.ret", "to.anonid", "to.role", "to.ret") # 2.5. Get Facebook group comments data #getFBpostcomms <- function(x){ # postIDlist <- x # postscomments <- lapply(1:nrow(postIDlist), function(x) as.data.frame(getPost(postIDlist[x,], token=fb_oauth)$comments)) # postIDs <- lapply(1:nrow(postIDlist), function(x) data.frame(postid = rep(postIDlist[x,], nrow(postscomments[[x]])))) # postscomments <- lapply(1:nrow(postIDlist), function(x) cbind(postscomments[[x]], postIDs[[x]])) # postscomments <- do.call(rbind, postscomments) # postscomments$from_id <- paste("'", postscomments$from_id, sep = "") # return(postscomments) #} #jfh.postscomments <- getFBpostcomms(postIDlist) #write.xlsx(jfh.postscomments, "~/Desktop/jfh.comments.xlsx") jfh.comments <- read.xlsx("~/Desktop/jfh.comments.xlsx") jfh.comments <- left_join(jfh.comments, jfh.posts[,c(7, 2)], by = c("postid" = "id")) colnames(jfh.comments)[c(2, 9)] <- c("from_name", "to_name") jfh.comments <- left_join(jfh.comments, jfh.users, by = c("from_name" = "Name")) jfh.comments <- left_join(jfh.comments, jfh.users, by = c("to_name" = "Name")) colnames(jfh.comments)[10:15] <- c("from.anonid", "from.role", "from.ret", "to.anonid", "to.role", "to.ret") # 2.6. Get Facebook group comment likes data #commIDlist <- data.frame(comments.id = jfh.postscomments$comments.id) #getFBcommentlikes <- function(x){ # commIDlist <- x # commentslikes <- lapply(1:nrow(commIDlist), function(x) as.data.frame(getLikes(commIDlist[x,], n = 1000, token=fb_oauth))) # commentIDs <- lapply(1:nrow(commIDlist), function(x) data.frame(commid = rep(commIDlist[x,], nrow(commentslikes[[x]])))) # commentslikes <- lapply(1:nrow(commIDlist), function(x) cbind(commentslikes[[x]], commIDlist[[x]])) # commentslikes <- do.call(rbind, commentslikes) # commentslikes$from_id <- paste("'", commentslikes$from_id, sep = "") # return(commentslikes) #} #jfh.commentslikes <- getFBcommentlikes(commIDlist) #write.xlsx(jfh.commentslikes, "~/Desktop/jfh.commentlikes.xlsx") jfh.commentlikes <- read.xlsx("~/Desktop/jfh.commentlikes.xlsx") jfh.commentlikes <- left_join(jfh.commentlikes, jfh.comments[,c(7, 2, 4)], by = c("commentid" = "id")) colnames(jfh.commentlikes)[c(1, 4)] <- c("from_name", "to_name") jfh.commentlikes <- left_join(jfh.commentlikes, jfh.users, by = c("from_name" = "Name")) jfh.commentlikes <- left_join(jfh.commentlikes, jfh.users, by = c("to_name" = "Name")) colnames(jfh.commentlikes)[6:11] <- c("from.anonid", "from.role", "from.ret", "to.anonid", "to.role", "to.ret") # 2.7. Combine, posts, post likes, comments, and comment likes data into one dataframe jfh.data <- rbind(data.frame(num = 1:nrow(jfh.posts), int.id = NA, type = "posts", from.anonid = jfh.posts$ID, from.role = jfh.posts$Role, to.anonid = NA, to.role = NA, from.ret = jfh.posts$membership.length, to.ret = NA, date = gsub("-", "", gsub("T.*", "", jfh.posts$created_time)), week = "x", month = "x", text = jfh.posts$message, comments = jfh.posts$comments_count, likes = jfh.posts$likes_count, stringsAsFactors = FALSE), data.frame(num = 1:nrow(jfh.comments), int.id = NA, type = "comments", from.anonid = jfh.comments$from.anonid, from.role = jfh.comments$from.role, to.anonid = jfh.comments$to.anonid, to.role = jfh.comments$to.role, from.ret = jfh.comments$from.ret, to.ret = jfh.comments$to.ret, date = gsub("-", "", gsub("T.*", "", jfh.comments$created_time)), week = "x", month = "x", text = jfh.comments$message, comments = NA, likes = jfh.comments$likes_count, stringsAsFactors = FALSE), data.frame(num = 1:nrow(jfh.postlikes), int.id = NA, type = "postlikes", from.anonid = jfh.postlikes$from.anonid, from.role = jfh.postlikes$from.role, to.anonid = jfh.postlikes$to.anonid, to.role = jfh.postlikes$to.role, from.ret = jfh.postlikes$from.ret, to.ret = jfh.postlikes$to.ret, date = gsub("-", "", gsub("T.*", "", jfh.postlikes$created_time)), week = "x", month = "x", text = NA, comments = NA, likes = NA, stringsAsFactors = FALSE), data.frame(num = 1:nrow(jfh.commentlikes), int.id = NA, type = "commentlikes", from.anonid = jfh.commentlikes$from.anonid, from.role = jfh.commentlikes$from.role, to.anonid = jfh.commentlikes$to.anonid, to.role = jfh.commentlikes$to.role, from.ret = jfh.commentlikes$from.ret, to.ret = jfh.commentlikes$to.ret, date = gsub("-", "", gsub("T.*", "", jfh.commentlikes$created_time)), week = "x", month = "x", text = NA, comments = NA, likes = NA, stringsAsFactors = FALSE) ) # 2.8. Add monthly breakdown (i.e. Month 1 to 8) jfh.data$month <- ifelse(jfh.data$date < 20150429, 1, ifelse(jfh.data$date < 20150527, 2, ifelse(jfh.data$date < 20150624, 3, ifelse(jfh.data$date < 20150722, 4, ifelse(jfh.data$date < 20150819, 5, ifelse(jfh.data$date < 20150916, 6, ifelse(jfh.data$date < 20151014, 7, 8))))))) # 2.9. Create dataframe of user data jfh.users.data <- jfh.data[,4:5] %>% distinct() %>% arrange(from.anonid) %>% mutate(role = ifelse(from.role == "Staff", 1, ifelse(from.role == "Client", 2, 3))) %>% filter(!is.na(role)) #### 3. Calculate Facebook activity by each client #### # 3.1. Calculate number of posts, comments, post likes and comment likes made by each client jfh.activities.from <- jfh.data %>% count(type, from.anonid) jfh.activities.from <- split(jfh.activities.from, jfh.activities.from$type) # 3.2. Calculate number of posts, comments, post likes and comment likes received by each client jfh.activities.to <- jfh.data %>% count(type, to.anonid) jfh.activities.to <- split(jfh.activities.to, jfh.activities.to$type) # 3.3 Join counts of facebook activities to member retention data for(i in seq_along(jfh.activities.from)){ colnames(jfh.activities.from[[i]])[2] <- colnames(jfh.activities.to[[i]])[2] <- "id" jfh.retention <- left_join(jfh.retention, jfh.activities.from[[i]][,c(2,3)], by = "id") jfh.retention <- left_join(jfh.retention, jfh.activities.to[[i]][,c(2,3)], by = "id") } colnames(jfh.retention)[4:ncol(jfh.retention)] <- unlist(lapply(names(jfh.activities.from), function(x) paste(x, c("made", "recieved"), sep = "."))) # 3.4 Calculate difference between posts and comments with likes and no likes diffcounter <- function(x, y, z){ data <- jfh.data[jfh.data$type == z, match(c(x, y),colnames(jfh.data))] jfh.diff <- data %>% count(data[,1], data[,2]) colnames(jfh.diff)[1:2] <- c(x, y) jfh.noint <- jfh.diff[jfh.diff[,2] == 0,] jfh.diff <- jfh.diff[jfh.diff[,2] != 0,] jfh.diff <- aggregate(n ~ from.anonid, jfh.diff, sum) jfh.diff <- left_join(jfh.diff, jfh.noint[,c(1,3)], by = "from.anonid") colnames(jfh.diff) <- c("id", "int", "noint") jfh.diff[is.na(jfh.diff)] <- 0 jfh.diff$likes.diff <- jfh.diff$int - jfh.diff$noint return(jfh.diff) } differences.likes <- lapply(c("posts", "comments"), function(x) diffcounter("from.anonid", "likes", x)) colnames(differences.likes[[1]]) <- c("id", "post.likes", "post.nolikes", "post.likes.diff") colnames(differences.likes[[2]]) <- c("id", "comment.likes", "comment.nolikes", "comment.likes.diff") # 3.5 Calculate difference between posts with comments and no comments differences.comments <- diffcounter("from.anonid", "comments", "posts") colnames(differences.comments) <- c("id", "post.comments", "post.nocomments", "post.comments.diff") # 3.6 Join likes difference data to retention data jfh.retention <- left_join(jfh.retention, differences.likes[[1]], by = "id") jfh.retention <- left_join(jfh.retention, differences.likes[[2]], by = "id") # 3.7 Join comments difference data to retention data jfh.retention <- left_join(jfh.retention, differences.comments, by = "id") # 3.8 Calculate all likes made by each client jfh.retention$all.likes.made <- jfh.retention$postlikes.made + jfh.retention$commentlikes.made # 3.9 Calculate all likes recieved by each client jfh.retention$all.likes.recieved <- jfh.retention$postlikes.recieved + jfh.retention$commentlikes.recieved # 3.10 Identify which users did not participate in Facebook group (no activity) jfh.retention$no.activity <- as.numeric(lapply(1:nrow(jfh.retention), function(x) ifelse(sum(jfh.retention[x, which(grepl("made", colnames(jfh.retention)))]) > 0, 0, 1))) #### 4. Social Network Analysis #### # 4.1. Create edge list edges.all <- jfh.data[jfh.data$type != "posts", c(4,6)] %>% mutate(relname = paste(from.anonid," to ", to.anonid, sep = "")) %>% filter(!is.na(from.anonid)) %>% filter(!is.na(to.anonid)) edges <- edges.all %>% distinct() edges <- left_join(edges, edges.all %>% count(relname), by = "relname") colnames(edges) <- c("from", "to", "relname", "weight") # 4.2. Create vertex list make.sna.vertex <- function(x, a, b){ id <- c(as.integer(x[,a]),as.integer(x[,b])) stack <- data.frame(id) stack <- stack %>% count(id) %>% distinct() colnames(stack)[2] <- "weight" return(stack) } vertexes <- make.sna.vertex(edges, 1, 2) # 4.3. Create graph and graph matrix objects graph <- graph.data.frame(edges, vertexes, directed = TRUE) graph.m <- get.adjacency(graph, sparse = FALSE) # 4.4. Calculate network stats for all interactions and join to retention data vertexes$degree <- degree(graph) vertexes$betweenness <- betweenness(graph) vertexes$id <- as.integer(vertexes$id) jfh.retention <- left_join(jfh.retention, vertexes, by = "id") # 4.5. Calculate network stats only for post-comment interactions edges.postcomm <- jfh.data[jfh.data$type == "comments", c(4,6)] %>% mutate(relname = paste(from.anonid," to ", to.anonid, sep = "")) %>% filter(!is.na(from.anonid)) %>% filter(!is.na(to.anonid)) %>% distinct() vertexes.postcomm <- make.sna.vertex(edges.postcomm, 1, 2) graph.postcomm <- graph.data.frame(edges.postcomm, vertexes.postcomm, directed = TRUE) graph.m.postcomm <- get.adjacency(graph.postcomm, sparse = FALSE) vertexes.postcomm$degree.postcomm <- degree(graph.postcomm) vertexes.postcomm$betweenness.postcomm <- betweenness(graph.postcomm) vertexes.postcomm$id <- as.integer(vertexes.postcomm$id) jfh.retention <- left_join(jfh.retention, vertexes.postcomm, by = "id") #### 5. Conduct computerized linguistic analysis in LIWC #### # 5.1. Export textual data along with user ID and text data type (post/comment) into a csv file to run through LIWC write.csv(jfh.data[jfh.data$type == "posts" | jfh.data$type == "comments", c(3, 4, 13)], "~/Desktop/jfh.liwc.csv", row.names = FALSE) # 5.4. Import LIWC results file into dataframe jfh.liwc.results <- read.csv("~/Desktop/jfh.liwc.results.csv", header = TRUE, stringsAsFactors = FALSE) colnames(jfh.liwc.results)[1:3] <- jfh.liwc.results[1,1:3] jfh.liwc.results <- jfh.liwc.results[2:nrow(jfh.liwc.results),] # 5.5. Calculate total LIWC scores per user in posts and comments, and join to retention data jfh.liwc <- lapply(4:ncol(jfh.liwc.results), function(x) aggregate(jfh.liwc.results[,x] ~ from.anonid + type, jfh.liwc.results, sum)) jfh.liwc.peruser <- do.call(cbind, jfh.liwc[2:93]) jfh.liwc.peruser <- cbind(jfh.liwc[1], jfh.liwc.peruser[,-c(which(grepl("type", colnames(jfh.liwc.peruser)) | grepl("from.anonid", colnames(jfh.liwc.peruser))))]) colnames(jfh.liwc.peruser)[c(1,3:ncol(jfh.liwc.peruser))] <- c("id", colnames(jfh.liwc.results)[4:ncol(jfh.liwc.results)]) jfh.liwc.peruser$id <- as.integer(jfh.liwc.peruser$id) jfh.liwc.peruser <- split(jfh.liwc.peruser, jfh.liwc.peruser$type) jfh.retention <- left_join(jfh.retention, jfh.liwc.peruser[[2]][,c(1,3:ncol(jfh.liwc.peruser[[2]]))], by = "id") colnames(jfh.retention)[28:120] <- paste("posts", colnames(jfh.retention)[28:120], sep = ".") jfh.retention <- left_join(jfh.retention, jfh.liwc.peruser[[1]][,c(1,3:ncol(jfh.liwc.peruser[[1]]))], by = "id") colnames(jfh.retention)[121:213] <- paste("comments", colnames(jfh.retention)[121:213], sep = ".") # 5.6. Calculate total LIWC scores per user in all textual data (post and comments combined), and join to retention data jfh.liwc.all <- lapply(4:ncol(jfh.liwc.results), function(x) aggregate(jfh.liwc.results[,x] ~ from.anonid, jfh.liwc.results, sum)) jfh.liwc.all.peruser <- do.call(cbind, jfh.liwc.all[2:93]) jfh.liwc.all.peruser <- cbind(jfh.liwc.all[1], jfh.liwc.all.peruser[,-c(which(grepl("from.anonid", colnames(jfh.liwc.all.peruser))))]) colnames(jfh.liwc.all.peruser)[c(1:ncol(jfh.liwc.all.peruser))] <- c("id", paste("all.", colnames(jfh.liwc.results)[4:ncol(jfh.liwc.results)], sep = "")) jfh.liwc.all.peruser$id <- as.integer(jfh.liwc.all.peruser$id) jfh.retention <- left_join(jfh.retention, jfh.liwc.all.peruser, by = "id") # 5.7. Remove NAs from retention data dataframe jfh.retention[is.na(jfh.retention)] <- 0 #### 6. Regression analysis #### # 6.1. Define independent variables IVs <- c("commentlikes.recieved", "all.likes.recieved", "comment.likes.diff", "degree", "posts.we", "posts.achieve", "all.achieve") # 6.2. Run regression analysis data1 <- lapply(seq_along(IVs), function(x) as.data.frame(summary(lm(jfh.retention$membership.length ~ jfh.retention[,match(IVs[[x]], colnames(jfh.retention))]))$coefficients)[2,]) data2 <- lapply(seq_along(IVs), function(x) summary(lm(jfh.retention$membership.length ~ jfh.retention[,match(IVs[[x]], colnames(jfh.retention))]))$r.squared) data3 <- lapply(seq_along(IVs), function(x) as.data.frame(summary(lm(scale(jfh.retention$membership.length) ~ scale(jfh.retention[,match(IVs[[x]], colnames(jfh.retention))])))$coefficients)[2,1]) # 6.3 Combine regression analysis results into one data.frame data1 <- do.call(rbind, data1) data1$pstar <- ifelse(data1$`Pr(>|t|)` < 0.05, "*", ifelse(data1$`Pr(>|t|)` < 0.01, "**", ifelse(data1$`Pr(>|t|)` < 0.001, "***", "ns"))) data2 <- do.call(rbind, data2) data3 <- do.call(rbind, data3) regressions <- data.frame(B = round(data1$Estimate, 2), SE = round(data1$`Std. Error`, 2), Beta = paste(round(data3, 2), data1$pstar, sep = ""), R2 = round(data2, 2)) #### 7. Create Monthly SNA Maps #### # 7.1. Prepare data frames for SNA Maps jfh.data.split <- split(jfh.data, jfh.data$month) for(i in seq_along(jfh.data.split)){ jfh.data.split[[i]] <- jfh.data[jfh.data$month < i+1,] } # 7.2. Create edge list based on monthly cumulative Facebook activity edges.all.split <- lapply(jfh.data.split, function(x) x[x$type == "commentlikes", c(4,6)] %>% mutate(relname = paste(from.anonid," to ", to.anonid, sep = "")) %>% filter(!is.na(from.anonid)) %>% filter(!is.na(to.anonid)) ) edges.split <- lapply(edges.all.split, function(x) x %>% distinct %>% mutate(relname = ifelse(from.anonid == to.anonid, "remove", as.character(relname))) %>% filter(!relname %in% "remove") ) for(i in seq_along(edges.split)){ edges.split[[i]] <- left_join(edges.split[[i]], edges.all.split[[i]] %>% count(relname), by = "relname") colnames(edges.split[[i]]) <- c("from", "to", "relname", "weight") } # 7.3. Create vertex list based on monthly cumulative Facebook activity vertexes.split <- lapply(edges.split, function(x) make.sna.vertex(x, 1, 2) ) vertexes.split <- lapply(seq_along(vertexes.split), function(x) left_join(vertexes.split[[x]], jfh.users.data, by = c("id" = "from.anonid")) ) max <- max(vertexes.split[[8]]$weight)/5 for(i in seq_along(vertexes.split)){ vertexes.split[[i]]$weight2 <- ifelse(vertexes.split[[i]]$weight < max*1, 1, ifelse(vertexes.split[[i]]$weight < max*2, 2, ifelse(vertexes.split[[i]]$weight < max*3, 3, ifelse(vertexes.split[[i]]$weight < max*4, 4, 5)))) vertexes.split[[i]]$label <- ifelse(vertexes.split[[i]]$role == 2, vertexes.split[[i]]$id, "") } # 7.4. Create graphs and graph matrices based on monthly cumulative Facebook activity graph.split <- lapply(seq_along(edges.split), function(x) graph.data.frame(edges.split[[x]], vertexes.split[[x]], directed = TRUE) ) graph.m.split <- lapply(graph.split, function(x) get.adjacency(x, sparse = FALSE) ) # 7.5. Set layout of SNA maps based on cumulative Facebook activity layout.split <- lapply(graph.split, function(x) layout.fruchterman.reingold(x)) # 7.6. Add colours based on user roles colrs <- c("#619CFF","#F8766D","#00BA38") for(i in seq_along(graph.split)){ V(graph.split[[i]])$color <- colrs[V(graph.split[[i]])$role] } # 7.7. Create SNA maps and save to file for(i in seq_along(graph.split)){ png(file = paste("/Users/muhammadiqbal/Desktop/SNA maps/", i, ".png", sep = ""), width = 5000, height = 5000) plot <- plot(graph.split[[i]], layout=layout.split[[i]], edge.arrow.size = 3, edge.width = 6, vertex.size = V(graph.split[[i]])$weight2*4.5, vertex.frame.color="black", vertex.label = V(graph.split[[i]])$label, vertex.label.cex = V(graph.split[[i]])$weight2*4, vertex.label.family = "Helvetica", vertex.label.color= "black" ) dev.off() } #### 8. Calculate monthly cumulative Facebook activity of the Facebook group #### # 8.1. Calculate monthly cumulative Facebook activity by Staff, Clients, Other members of the Facebook group cumulative.fb.activity.role <- jfh.data %>% mutate(type = ifelse(type == "posts" | type == "comments", "posts and comments", as.character(type))) %>% count(month, from.role, type) %>% mutate(type = gsub("likes", " likes", type)) %>% mutate(from.role.type = paste(from.role, type, sep = " ")) %>% group_by(from.role.type) %>% mutate(n = paste(n, " (", cumsum(n), ")", sep = "")) %>% filter(!is.na(from.role)) %>% filter(!is.na(month)) cumulative.fb.activity.role <- spread(cumulative.fb.activity.role[,c(1, 4, 5)], month, n) colnames(cumulative.fb.activity.role)[2:ncol(cumulative.fb.activity.role)] <- paste("Month", colnames(cumulative.fb.activity.role)[2:ncol(cumulative.fb.activity.role)], sep =" ") # 8.2. Calculate monthly cumulative Facebook activity by all members of the Facebook group cumulative.fb.activity.all <- jfh.data %>% mutate(type = ifelse(type == "posts" | type == "comments", "posts and comments", as.character(type))) %>% count(month, type) %>% mutate(type = gsub("likes", " likes", type)) %>% mutate(from.role.type = type) %>% group_by(type) %>% mutate(n = paste(n, " (", cumsum(n), ")", sep = "")) %>% filter(!is.na(month)) cumulative.fb.activity.all <- spread(cumulative.fb.activity.all[,c(1, 3, 4)], month, n) colnames(cumulative.fb.activity.all)[2:ncol(cumulative.fb.activity.all)] <- colnames(cumulative.fb.activity.role)[2:ncol(cumulative.fb.activity.role)] # 8.3. Join monthly cumulative Facebook activity dataframes together cumulative.fb.activity <- bind_rows(cumulative.fb.activity.all, cumulative.fb.activity.role)