#!/bin/Rscript # *************************************************************** # Name: taxo_richness_plot.R # Purpose: This script generates multiple subplots for all the environmental # parameters against species richness in a single plot. # The species richness is rarefied to the minimum sample numbers and a # correlation test is performed between the rarefied richness and the # environmental parameter. The resulting correlation and their significance # is drawn on top of each subplot. Currently, it has support for three # correlation measures: # -Pearson # -Spearman # -Kendall # Furthermore this script also generates a log file that # contains the summary stats of regression of rarefied richness against # environmental parameters. The last column contains the P-values and if # significant, it indicates that the richness is affected by this environmental # parameter # Version: 0.2 # History: 0.1 to 0.2 Automatically remove NA entries # Authors: Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/umer.ijaz # Christopher Quince (Christopher.Quince@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/christopher.quince # Created: 2012-09-10 # License: Copyright (c) 2012 Computational Microbial Genomics Group, University of Glasgow, UK # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # **************************************************************/ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("mgcv")) suppressPackageStartupMessages(library("vegan")) suppressPackageStartupMessages(library("RColorBrewer")) suppressPackageStartupMessages(library("gplots")) #specify desired options in a list option_list <- list( make_option("--sfile", action="store",default=NULL, help="Species abundance file"), make_option("--efile", action="store",default=NULL, help="environmental file"), make_option("--opath", action="store",default=NULL, help="Output path"), make_option("--fsize", action="store", default="1.2", help="Font size [default %default]"), make_option("--width", type="integer",default=800, help="Width of jpeg files [default %default]"), make_option("--height", type="integer",default=800, help="Height of jpeg files [default %default]"), make_option("--correlation", type="integer",default=1, help="Correlation to use: 1=pearson, 2=spearman, 3=kendall [default %default]"), make_option("--mreads", type="integer",default=200, help="Minimum reads [default %default]"), make_option("--ccol", type="integer",default=1, help="Minimum reads [default %default]") ) #get command line options opt<-parse_args(OptionParser(usage="%prog [options] file", option_list=option_list)) if(is.null(opt$sfile) || is.null(opt$efile)) quit() formatPvalues <- function(pvalue) { ra<-"" if(pvalue <= 0.1) ra<-"." if(pvalue <= 0.05) ra<-"*" if(pvalue <= 0.01) ra<-"**" if(pvalue <= 0.001) ra<-"***" return(ra) } #generate colors function generateColors <- function(siteNames,column) { #generate the indices colInd<-"" if(column==0){ colInd<-rep("blue",length(siteNames)) } else{ #first split the names with respect to underscore and construct a parsedString matrix for(i in seq(1:length(siteNames))) { tmp<-t(data.matrix(unlist(strsplit(siteNames[i],"_")))) if(column>length(tmp)){ column<-length(tmp) } if(i==1){ parsedString<-tmp } else{ parsedString<-rbind(parsedString,tmp) } } #generate template colors crp<- colorRampPalette(c("blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red"),space = "Lab")(length(unique(parsedString[,column]))) for(i in seq(1:length(siteNames))) { colInd[i]<-crp[match(parsedString[i,column],unique(parsedString[,column]))] } } return(colInd) } #Importing data now AS_C <-read.csv(opt$sfile,header=TRUE,row.names=1) Env <- read.csv(opt$efile,header=TRUE,row.names=1) #transpose the data AS <- t(AS_C) #filter the data for reads smaller than a particular number AS<-AS[rowSums(AS)>opt$mreads,] Env<- Env[rownames(AS),] #use complete.cases function to filter out those rows that have NA entry in Env Env<-Env[complete.cases(Env),] AS<-AS[rownames(Env),] # Calculate species richness N <- rowSums(AS) S <- specnumber(AS) S.rar <-rarefy(AS, min(N)) S.lm <- lm(S.rar ~ ., data = Env) summarystats<-summary(S.lm) capture.output(summary(S.lm), file=paste(opt$opath,substr(basename(opt$sfile),1,nchar(basename(opt$sfile))-4),"_RP.txt",sep="")) method<-switch(opt$correlation,"pearson","spearman","kendall") jpeg(filename = paste(opt$opath,substr(basename(opt$sfile),1,nchar(basename(opt$sfile))-4),"_RP.jpg",sep=""),width = as.numeric(opt$width), height = as.numeric(opt$height), quality=100) par( mfrow=c(ceiling(ncol(Env)/4),4) ,mai=c(1,1,1,0.2) ,bg="white") #draw species richness plots for (e in seq(1,ncol(Env))){ correlationTest<-cor.test(Env[,e],S.rar,method=method) plot(Env[,e],S.rar, xlab = colnames(Env)[e], ylab = "richness", cex.axis=as.numeric(opt$fsize)*0.6, cex.lab=as.numeric(opt$fsize), main=paste(sprintf("%.3f\n", correlationTest$estimate),formatPvalues(pvalue=correlationTest$p.value),sep=""), cex=as.numeric(opt$fsize), cex.main=as.numeric(opt$fsize), col=generateColors(rownames(AS),opt$ccol),pch=16) } dev.off(which=dev.cur())