#!/bin/Rscript # *************************************************************** # Name: taxo_dissimilarity_plot.R # Purpose: Generates plot of dissimilarity measures between samples. # Magenta -> Low Dissimilarity (High Similarity) # Cyan -> High Dissimilarity (Low Similarity) # Datafile has the following organization: # Var_1 Var_2 Var_3 .. Var_R # Sample_1 # Sample_2 # Sample_3 # ... # Sample_N # # In the current version of the program, you can use the following dissimilarity measures: # -Bray-Curtis dissimilarity matrix on raw species data # -Bray-Curtis dissimilarity matrix on log-transformed abundances # -Chord distance matrix # -Hellinger distance matrix # -Chi-square pre-transformation followed by Euclidean distance # Version: 0.3 # History: 0.1 to 0.2 fixed the bug that produces exta spaces in the files by including sep="" in paste command # 0.2 to 0.3 included Q Mode and R Mode # 0.3 to 0.4 fixed png transparency bug # Authors: Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/umer.ijaz # Christopher Quince (Christopher.Quince@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/christopher.quince # Created: 2012-09-10 # License: Copyright (c) 2012 Computational Microbial Genomics Group, University of Glasgow, UK # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # **************************************************************/ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("gclus")) suppressPackageStartupMessages(library("vegan")) #specify desired options in a list option_list <- list( make_option("--ifile", action="store",default=NULL, help="CSV file"), make_option("--opath", action="store",default=NULL, help="Output path"), make_option("--fsize", action="store", default="1.2", help="Font size [default %default]"), make_option("--width", type="integer",default=800, help="Width of jpeg files [default %default]"), make_option("--height", type="integer",default=800, help="Height of jpeg files [default %default]"), make_option("--method", type="integer",default=1, help="Correlation to use: 1=brayCurtis, 2=brayCurtisLogTransformed, 3=chord 4=hellinger 5=D16 [default %default]"), make_option("--ddisplay", action="store_true",default=FALSE, help="Diagonal display [default %default]"), make_option("--ordered", action="store_true",default=FALSE, help="Order dissimilarity matrix [default %default]"), make_option("--rmode", action="store_true",default=FALSE, help="Mode: TRUE=R mode, FALSE=Q mode [default %default]") ) #get command line options opt<-parse_args(OptionParser(usage="%prog [options] file", option_list=option_list)) if(is.null(opt$ifile)) quit() #Import data now data <- read.csv(opt$ifile,header=TRUE,row.names=1) #use complete.cases function to filter out those rows that have NA entry in Env data<-data[complete.cases(data),] #Transpose the data if R Mode if(opt$rmode){ data=t(data) } "plotDissimilarityMatrix" <- function(D, nc = 4, byrank = TRUE, diag = FALSE, ord=TRUE, cex=0.01) { require(gclus) if (max(D)>1) D <- D/max(D) if (byrank) { data.color = dmat.color(1-D, cm.colors(nc)) } else { data.color = dmat.color(1-D, byrank=FALSE, cm.colors(nc)) } data.o = order.single(1-D) datao.color = data.color[data.o,data.o] op = par(mfrow=c(1,1), pty="s", cex=cex) if (diag) { if(ord) { plotcolors(datao.color, rlabels=attributes(D)$Labels[data.o], dlabels=attributes(D)$Labels[data.o]) } else { plotcolors(data.color, rlabels=attributes(D)$Labels, dlabels=attributes(D)$Labels) } } else { if (ord) { plotcolors(datao.color, rlabels=attributes(D)$Labels[data.o]) } else { plotcolors(data.color, rlabels=attributes(D)$Labels) } } par(op) } if(opt$method == 1){ # Bray-Curtis dissimilarity matrix on raw species data data.dis <- vegdist(data) # Bray-Curtis dissimilarity (default) } else if (opt$method == 2){ # Bray-Curtis dissimilarity matrix on log-transformed abundances data.dis <- vegdist(log1p(data)) } else if (opt$method == 3){ # Chord distance matrix data.norm <- decostand(data, "nor") data.dis <- dist(data.norm) } else if (opt$method == 4){ # Hellinger distance matrix data.hel <- decostand(data, "hel") data.dis <- dist(data.hel) } else { # Chi-square pre-transformation followed by Euclidean distance data.chi <- decostand(data, "chi.square") data.dis <- dist(data.chi) } # Plot the data jpeg(filename = paste(opt$opath,substr(basename(opt$ifile),1,nchar(basename(opt$ifile))-4),"_DP.jpg",sep=""),width = as.numeric(opt$width), height = as.numeric(opt$height), quality=100) par(bg="white") plotDissimilarityMatrix(data.dis, diag=opt$ddisplay, ord=opt$ordered, cex=as.numeric(opt$fsize)) dev.off()