#!/usr/bin/Rscript
# ***************************************************************
# Name:      collate_CSV.R
# Purpose:   This script takes two frequency tables and collates them together
#            Two options are supported, both union and intersect
#                         
# Version:   0.1
# Authors:   Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk)
#                 http://userweb.eng.gla.ac.uk/umer.ijaz
# Created:   2013-06-14
# License:   Copyright (c) 2013 Computational Microbial Genomics Group, University of Glasgow, UK
# 
#            This program is free software: you can redistribute it and/or modify
#            it under the terms of the GNU General Public License as published by
#            the Free Software Foundation, either version 3 of the License, or
#            (at your option) any later version.
#
#            This program is distributed in the hope that it will be useful,
#            but WITHOUT ANY WARRANTY; without even the implied warranty of
#            MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#            GNU General Public License for more details.
#
#            You should have received a copy of the GNU General Public License
#            along with this program.  If not, see <http://www.gnu.org/licenses/>.
# **************************************************************/
suppressPackageStartupMessages(library("optparse"))

#specify desired options in a list
option_list <- list(
  make_option("--ifile1", action="store",default=NULL, help="CSV file 1"),
  make_option("--ifile2", action="store",default=NULL, help="CSV file 2"),
  make_option("--ofile", action="store",default=NULL, help="CSV file"),  
  make_option("--opath", action="store",default=NULL, help="Output path"),
  make_option("--method", type="integer",default=1, help="1 = union, 2 = intersection [default %default]")
)

#get command line options
opt<-parse_args(OptionParser(usage="%prog [options] file", option_list=option_list))

if(is.null(opt$ifile1) || is.null(opt$ifile2) || is.null(opt$ofile))
  quit()


file1=read.csv(opt$ifile1,check.names=0,row.names=1)
rownames(file1)<-sapply(rownames(file1),function(x) paste("F1_",x,sep=""))
file2=read.csv(opt$ifile2,check.names=0,row.names=1)
rownames(file2)<-sapply(rownames(file2),function(x) paste("F2_",x,sep=""))

if(opt$method==1){
  collated_names<-union(colnames(file1),colnames(file2))
} else {
  collated_names<-intersect(colnames(file1),colnames(file2))
}

total_samples<-dim(file1)[1]+dim(file2)[1]
total_columns<-length(collated_names)

collated_matrix<-data.frame(matrix(rep(0,total_samples*total_columns),nrow=total_samples,ncol=total_columns))
rownames(collated_matrix)<-as.character(cbind(t(matrix(rownames(file1))),t(matrix(rownames(file2)))))
colnames(collated_matrix)<-collated_names
for(i in 1:total_samples)
{
  for(j in 1:total_columns)
  {
    if (rownames(collated_matrix)[i] %in% rownames(file1))
    {
     if(colnames(collated_matrix[j]) %in% colnames(file1))
     {
       collated_matrix[rownames(collated_matrix)[i],colnames(collated_matrix)[j]]<-file1[rownames(collated_matrix)[i],colnames(collated_matrix)[j]]
     }
     
    }
    else{
      if(colnames(collated_matrix[j]) %in% colnames(file2))
      {
        collated_matrix[rownames(collated_matrix)[i],colnames(collated_matrix)[j]]<-file2[rownames(collated_matrix)[i],colnames(collated_matrix)[j]]
      }
           
    }
  }
}
write.csv(collated_matrix, file = paste(opt$opath,opt$ofile,sep=""),quote=FALSE)