#!/bin/Rscript # *************************************************************** # Name: taxo_hclus_plot.R # Purpose: Generates the hierarchical clustering plot # Version: 0.2 # Authors: Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/umer.ijaz # Christopher Quince (Christopher.Quince@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/christopher.quince # Created: 2012-09-10 # License: Copyright (c) 2012 Computational Microbial Genomics Group, University of Glasgow, UK # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # **************************************************************/ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("MASS")) suppressPackageStartupMessages(library("vegan")) suppressPackageStartupMessages(library("calibrate")) #specify desired options in a list option_list <- list( make_option("--ifile", action="store",default=NULL, help="CSV file"), make_option("--opath", action="store",default=NULL, help="Output path"), make_option("--fsize", action="store", default="1.2", help="Font size [default %default]"), make_option("--width", type="integer",default=800, help="Width of jpeg files [default %default]"), make_option("--height", type="integer",default=800, help="Height of jpeg files [default %default]"), make_option("--mreads", type="integer",default=200, help="Minimum reads [default %default]"), make_option("--dmethod", type="integer",default=6, help="Distance measure: 1=euclidean, 2=manhattan, 3=gower, 4=altGower, 5=canberra, 6=bray, 7=kulczynski, 8=morisita,9=horn, 10=binomial, 11=cao [default %default]"), make_option("--cmethod", type="integer",default=1, help="Clustering method: 1=ward, 2=single, 3=complete, 4=average, 5=mcquitty, 6=median, 7=centroid [default %default]"), make_option("--rmode", action="store_true",default=FALSE, help="Mode: TRUE=R mode, FALSE=Q mode [default %default]") ) #get command line options opt<-parse_args(OptionParser(usage="%prog [options] file", option_list=option_list)) if(is.null(opt$ifile)) quit() #Importing data now AS_C <-read.csv(opt$ifile,header=TRUE,row.names=1) #transpose the data AS <- t(AS_C) #filter the data for reads smaller than a particular number AS<-AS[rowSums(AS)>opt$mreads,] # we need to filter out any samples taxas that have zero entries AS_Z<-subset(AS,rowSums(AS)!=0) # Convert to relative frequencies: ASP <- AS_Z/rowSums(AS_Z) #Transpose the data if R Mode if(opt$rmode){ ASP=t(ASP) } dmethod<-switch(opt$dmethod,"euclidean","manhattan","gower","altGower","canberra","bray","kulczynski","morisita","horn","binomial","cao") cmethod<-switch(opt$cmethod,"ward","single","complete","average","mcquitty","median","centroid") ASP.dist <- vegdist(ASP,dist=dmethod) ASP.hclust <-hclust(ASP.dist, method= cmethod) # Plot the data jpeg(filename = paste(opt$opath,substr(basename(opt$ifile),1,nchar(basename(opt$ifile))-4),"_HCLUS.jpg",sep=""),width = as.numeric(opt$width), height = as.numeric(opt$height), quality=100) par(mai=c(2,2,1,1) ,bg="white") plot(ASP.hclust,main="",sub=NA, xlab=NA,cex=as.numeric(opt$fsize)*0.6,cex.lab=as.numeric(opt$fsize)*0.6, cex.axis=as.numeric(opt$fsize)*0.6, cex.main=as.numeric(opt$fsize),lwd=as.numeric(opt$fsize)*1.5) dev.off()