setwd("/home/jtextori/Documents/Medecine/projets/HIF/microparticules/")
library(Cairo)
library(doBy)
library(ggplot2)
load("/data2/Documents/Medecine/projets/HIF/microparticules/workspace.R")
pt = ggplot(data=m,aes(Group,cd41))
m = data.frame(cd41 = data[,207],
Group = data$DC.lact.out)
levels(m$Group) = c("Yes\n< 2","Yes\n> 2","No\n< 2","No\n> 2")
pt = ggplot(data=m,aes(Group,cd41))
pt+ geom_boxplot(aes(fill=Group,alpha=0.3)) + coord_trans(ytrans="log")
table(data$DC,data$lact_factH0)
data$DC.lact.out[which(data$DC == "Non" & data$lact_factH0 == T)] = "B"
data$DC.lact.out[which(data$DC == "Oui" & data$lact_factH0 == F)] = "C"
data$DC.lact.out[which(data$DC == "Oui" & data$lact_factH0 == T)] = "D"
data$DC.lact.out[which(data$DC == "Non" & data$lact_factH0 == F)] = "A"
data$DC.lact.out = as.factor(data$DC.lact.out)
m = data.frame(cd41 = data[,207],
Group = data$DC.lact.out)
levels(m$Group) = c("Yes\n< 2","Yes\n> 2","No\n< 2","No\n> 2")
pt = ggplot(data=m,aes(Group,cd41))
pt+ geom_boxplot(aes(fill=Group,alpha=0.3)) + coord_trans(ytrans="log")
ggsave("pred_lact-DC2.pdf")
library(formatR)
install.package("formatR")
install.packages("formatR")
install.packages("gWidgetsRGtk2")
install.packages("readBrukerFlexData","MALDIquant","MALDIquantForeign")
setRepositories()
install.packages("readBrukerFlexData","MALDIquant","MALDIquantForeign")
tidy.gui()
library(formatR)
library(gWidgetsRGtk2)
tidy.gui()
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
library(readBrukerFlexData) # Allow to load Bruker raw data in R
library(MALDIquant)
library(MALDIquantForeign)
setwd("/data/partage/MALDI/JoVE/data_18h/")
spectra=importBrukerFlex("./")
length(spectra)
is(spectra[[1]])
v.empty = lapply(spectra,function(y) {
return(min(y@intensity)==max(y@intensity))
})
length(which(unlist(v.empty)==T))
sampleNames = lapply(spectra,function(y) {y@metaData$sampleName})
sampleNames = as.factor(unlist(sampleNames))
levels(sampleNames)
# all other vectors are computed from complete filename (with folder path)
group = lapply(spectra,function(y) {y@metaData$file})
group[grep(" IFNg ",group)] = "IFNg"
group[grep("IL-4",group)] = "IL4"
group[grep("NS",group)] = "NS"
group = as.factor(unlist(group))
levels(group)
plot(spectra[[1]])
spectra = lapply(spectra,transformIntensity,fun=sqrt)
movAvg = function(y) {return(filter(y,rep(1,5)/5,sides=2));}
spectra = lapply(spectra,transformIntensity,fun=movAvg)
spectra = lapply(spectra,removeBaseline,method="SNIP")
pk = lapply(spectra,detectPeaks,SNR=10,halfWindowSize=20)
nb.pk = unlist(lapply(pk,function(y) {length(y@mass)}))
plot(density(nb.pk),xlim=c(0,100))
pk = lapply(spectra,detectPeaks,SNR=6,halfWindowSize=20)
nb.pk = unlist(lapply(pk,function(y) {length(y@mass)}))
plot(density(nb.pk),xlim=c(0,100))
length(which(nb.pk < 40))
max.intensities = unlist(lapply(spectra,function(y) {
as.numeric(y@intensity)[order(as.numeric(y@intensity),decreasing=T)[1]]
}))
plot(density(max.intensities))
length(which(max.intensities < 50 | nb.pk < 40))
pk = lapply(spectra,detectPeaks,SNR=10,halfWindowSize=20)
nb.pk = unlist(lapply(pk,function(y) {length(y@mass)}))
plot(density(nb.pk),xlim=c(0,100))
length(which(nb.pk < 40))
max.intensities = unlist(lapply(spectra,function(y) {
as.numeric(y@intensity)[order(as.numeric(y@intensity),decreasing=T)[1]]
}))
plot(density(max.intensities))
length(which(max.intensities < 50 | nb.pk < 40))
which(max.intensities < 50 | nb.pk < 40)
plot(spectra[[4]],main="Poor quality spectrum")
plot(spectra[[8]],main="Poor quality spectrum")
plot(spectra[[1]],main="Good quality spectrum")
plot(spectra[[20]],main="Poor quality spectrum")
spectra = spectra[-c(which(max.intensities < 50 | nb.pk < 40))]
sampleNames = as.factor(as.vector(sampleNames[-c(which(max.intensities < 50 | nb.pk < 40))]))
group = as.factor(as.vector(group[-c(which(max.intensities < 50 | nb.pk < 40))]))
par(mfrow=c(2,3))
ind = sample(1:length(spectra),6)
for(i in ind) {
plot(spectra[[i]],col=group[i])
}
spectra <- standardizeTotalIonCurrent(spectra);
pk = lapply(spectra,detectPeaks,SNR=4, halfWindowSize=20)
refPeaks <- referencePeaks(pk,"strict",0.6,0.002)
par(mfrow=c(1,1))
plot(refPeaks)
pk = lapply(spectra,detectPeaks,SNR=6, halfWindowSize=20)
warpingFunctions <- determineWarpingFunctions(pk, reference=refPeaks);
pk.aligned <- warpMassPeaks(pk, warpingFunctions);
sp.aligned = warpMassSpectra(spectra, warpingFunctions)
mins = unlist(lapply(sp.aligned,function(y) {min(y@mass)}))
maxs = unlist(lapply(sp.aligned,function(y) {max(y@mass)}))
lim1 = round(max(mins,na.rm=T),0)+1
lim2 = round(min(maxs,na.rm=T),0)-1
lim1
lim2
as.binary.matrix <- function(x) {
return(ifelse(is.na(x), 0, 1))
}
jaccard <- function(x) {
n11 <- tcrossprod(x)
n01 <- tcrossprod(1-x, x)
n10 <- tcrossprod(x, 1-x)
#return(n11/(n01+n10+n11))
return(2*n11/(n01+n10+2*n11))
}
## This function computes the "Score".
## This version computes a score based on the Jaccard indice and a pearson
## correlation coefficient based on the intensities of common peaks between the
## two spectra : S = jac*cor ; jac is Jaccard indice (between 0-1) and cor is
## Pearson correlation coefficient (between 0-1).
computeModJacScoreOnPeaks <- function(p, tolerance=0.002, range=c(0, 20000)) {
trimmedPeaks <- trim(p, range[1], range[2])
binnedPeaks <- binPeaks(trimmedPeaks, method="relaxed", tolerance=tolerance)
## remove peaks occuring only once
filteredPeaks <- filterPeaks(binnedPeaks, minFrequency=2/length(binnedPeaks))
## to run a groupwise filtering use
#filteredPeaks <- filterPeaks(binnedPeaks, labels=group, minFrequency=2/3)
peakMatrix <- intensityMatrix(filteredPeaks)
ja <- jaccard(as.binary.matrix(peakMatrix))
co <- cor(t(peakMatrix), method="pearson", use="pairwise.complete.obs")
return(ja * co)
}
system.time({
m3 <- trim(sp.aligned, lim1, lim2);
m3 <- lapply(m3, function(x) {
m <- round(mass(x))
i <- unlist(lapply(split(intensity(x), m), median))
x@intensity <- i
x@mass <- unique(m)
return(x)
});
m3 <- intensityMatrix(m3)
})
score.mat <- computeModJacScoreOnPeaks(pk.aligned)
min(score.mat)
max(score.mat)
score.dist = as.dist(1-score.mat)
min(score.dist)
max(score.dist)
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="average"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="complete"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="single"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="median"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="mcquitty"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=1)
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="complete"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="average"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="average"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="complete"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="average"), labels=sampleNames, hang=-1)
score;dist*score.dist
score.dist*score.dist
dim(score.dist*score.dist)
square
score.dist²
score.dist^2
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist^2, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="average"), labels=sampleNames, hang=-1)
h = hclust(score.dist^2, method="ward")
h$height
h = hclust(score.dist^2, method="average")
h$height
hclust
plot(hclust(score.dist^2, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="complete"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="single"), labels=sampleNames, hang=-1)
plot(hclust(score.dist, method="ward"), labels=sampleNames, hang=-1)
