#!/usr/bin/Rscript # *************************************************************** # Name: collate_CSV.R # Purpose: This script takes two frequency tables and collates them together # Two options are supported, both union and intersect # # Version: 0.1 # Authors: Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/umer.ijaz # Created: 2013-06-14 # License: Copyright (c) 2013 Computational Microbial Genomics Group, University of Glasgow, UK # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # **************************************************************/ suppressPackageStartupMessages(library("optparse")) #specify desired options in a list option_list <- list( make_option("--ifile1", action="store",default=NULL, help="CSV file 1"), make_option("--ifile2", action="store",default=NULL, help="CSV file 2"), make_option("--ofile", action="store",default=NULL, help="CSV file"), make_option("--opath", action="store",default=NULL, help="Output path"), make_option("--method", type="integer",default=1, help="1 = union, 2 = intersection [default %default]") ) #get command line options opt<-parse_args(OptionParser(usage="%prog [options] file", option_list=option_list)) if(is.null(opt$ifile1) || is.null(opt$ifile2) || is.null(opt$ofile)) quit() file1=read.csv(opt$ifile1,check.names=0,row.names=1) rownames(file1)<-sapply(rownames(file1),function(x) paste("F1_",x,sep="")) file2=read.csv(opt$ifile2,check.names=0,row.names=1) rownames(file2)<-sapply(rownames(file2),function(x) paste("F2_",x,sep="")) if(opt$method==1){ collated_names<-union(colnames(file1),colnames(file2)) } else { collated_names<-intersect(colnames(file1),colnames(file2)) } total_samples<-dim(file1)[1]+dim(file2)[1] total_columns<-length(collated_names) collated_matrix<-data.frame(matrix(rep(0,total_samples*total_columns),nrow=total_samples,ncol=total_columns)) rownames(collated_matrix)<-as.character(cbind(t(matrix(rownames(file1))),t(matrix(rownames(file2))))) colnames(collated_matrix)<-collated_names for(i in 1:total_samples) { for(j in 1:total_columns) { if (rownames(collated_matrix)[i] %in% rownames(file1)) { if(colnames(collated_matrix[j]) %in% colnames(file1)) { collated_matrix[rownames(collated_matrix)[i],colnames(collated_matrix)[j]]<-file1[rownames(collated_matrix)[i],colnames(collated_matrix)[j]] } } else{ if(colnames(collated_matrix[j]) %in% colnames(file2)) { collated_matrix[rownames(collated_matrix)[i],colnames(collated_matrix)[j]]<-file2[rownames(collated_matrix)[i],colnames(collated_matrix)[j]] } } } } write.csv(collated_matrix, file = paste(opt$opath,opt$ofile,sep=""),quote=FALSE)