library(data.table) # Number of resistant clones in the experiment n_clones = 3 ## Load variants file for first resistant clone and add a sample label column clone_1 = read.table('data/variants_1.vcf', header=F, sep='\t') clone_1$sample = “resistant_1” ## Repeat for variants for each resistant clone ## Combine all clones into single, tall data.table all_variants = as.data.table(rbind(clone_1, clone_2, clone_3)) ## Count recurrent mutations: filter REJECT calls, group by coordinate, ## and count samples for each coordinate ## VCF column headers: V1: chr, V2: start, V3: end, V4: ref allele, V5: alt allele, V7: KEEP/REJECT grouped = all_variants[V7 != "REJECT", list(alt=V5[1], n_samples=length(unique(sample))), by=list(chr=V1, start=V2, end=V2, ref=V4)] ## Remove any mutations not occurring in all clones, and write to file filtered = grouped[n_samples == n_clones, list(chr, start, end, ref, alt)] write.table(filtered, file=”data/filtered_recurrent_variants.txt”, row.names=F, quote=F)