#!/bin/Rscript
# ***************************************************************
# Name:      taxo_hclus_plot.R
# Purpose:   Generates the hierarchical clustering plot
# Version:   0.2
# Authors:   Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk)
#                 http://userweb.eng.gla.ac.uk/umer.ijaz
#            Christopher Quince (Christopher.Quince@glasgow.ac.uk)
#                 http://userweb.eng.gla.ac.uk/christopher.quince
# Created:   2012-09-10
# License:   Copyright (c) 2012 Computational Microbial Genomics Group, University of Glasgow, UK
# 
#            This program is free software: you can redistribute it and/or modify
#            it under the terms of the GNU General Public License as published by
#            the Free Software Foundation, either version 3 of the License, or
#            (at your option) any later version.
#
#            This program is distributed in the hope that it will be useful,
#            but WITHOUT ANY WARRANTY; without even the implied warranty of
#            MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#            GNU General Public License for more details.
#
#            You should have received a copy of the GNU General Public License
#            along with this program.  If not, see <http://www.gnu.org/licenses/>.
# **************************************************************/
suppressPackageStartupMessages(library("optparse"))
suppressPackageStartupMessages(library("MASS"))
suppressPackageStartupMessages(library("vegan"))
suppressPackageStartupMessages(library("calibrate"))

#specify desired options in a list
option_list <- list(
make_option("--ifile", action="store",default=NULL, help="CSV file"),
make_option("--opath", action="store",default=NULL, help="Output path"),
make_option("--fsize", action="store", default="1.2", help="Font size [default %default]"),
make_option("--width", type="integer",default=800, help="Width of jpeg files [default %default]"),
make_option("--height", type="integer",default=800, help="Height of jpeg files [default %default]"),
make_option("--mreads", type="integer",default=200, help="Minimum reads [default %default]"),
make_option("--dmethod", type="integer",default=6, help="Distance measure: 1=euclidean, 2=manhattan, 3=gower, 4=altGower, 5=canberra, 6=bray, 7=kulczynski, 8=morisita,9=horn, 10=binomial, 11=cao [default %default]"),
make_option("--cmethod", type="integer",default=1, help="Clustering method: 1=ward, 2=single, 3=complete, 4=average, 5=mcquitty, 6=median, 7=centroid [default %default]"),
make_option("--rmode", action="store_true",default=FALSE, help="Mode: TRUE=R mode, FALSE=Q mode [default %default]")
)

#get command line options
opt<-parse_args(OptionParser(usage="%prog [options] file", option_list=option_list))

if(is.null(opt$ifile))
quit()

#Importing data now
AS_C <-read.csv(opt$ifile,header=TRUE,row.names=1)

#transpose the data
AS <- t(AS_C)

#filter the data for reads smaller than a particular number
AS<-AS[rowSums(AS)>opt$mreads,]

# we need to filter out any samples taxas that have zero entries 
AS_Z<-subset(AS,rowSums(AS)!=0)

# Convert to relative frequencies:
ASP <- AS_Z/rowSums(AS_Z)

#Transpose the data if R Mode
if(opt$rmode){
    ASP=t(ASP)
}

dmethod<-switch(opt$dmethod,"euclidean","manhattan","gower","altGower","canberra","bray","kulczynski","morisita","horn","binomial","cao")
cmethod<-switch(opt$cmethod,"ward","single","complete","average","mcquitty","median","centroid")

ASP.dist <- vegdist(ASP,dist=dmethod)
ASP.hclust <-hclust(ASP.dist, method= cmethod)

# Plot the data
jpeg(filename = paste(opt$opath,substr(basename(opt$ifile),1,nchar(basename(opt$ifile))-4),"_HCLUS.jpg",sep=""),width = as.numeric(opt$width), height = as.numeric(opt$height), quality=100)

par(mai=c(2,2,1,1)
    ,bg="white")
plot(ASP.hclust,main="",sub=NA, xlab=NA,cex=as.numeric(opt$fsize)*0.6,cex.lab=as.numeric(opt$fsize)*0.6, cex.axis=as.numeric(opt$fsize)*0.6, cex.main=as.numeric(opt$fsize),lwd=as.numeric(opt$fsize)*1.5)

dev.off()