#!/usr/bin/perl
# ***************************************************************
# Name: convIDs.pl
# Purpose: This script takes a delimited file and convert the IDs in a particular column (numbering starts from 1) to
# to those provided by an IDs list map
# Version: 0.1
# Authors: Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk)
# http://userweb.eng.gla.ac.uk/umer.ijaz
# Christopher Quince (Christopher.Quince@glasgow.ac.uk)
# http://userweb.eng.gla.ac.uk/christopher.quince
# Created: 2013-02-05
# License: Copyright (c) 2013 Computational Microbial Genomics Group, University of Glasgow, UK
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# **************************************************************/
use strict;
use Getopt::Long;
my %opts; #store the input arguments
GetOptions(\%opts,
'in_file|i=s',
'list_file|l=s',
'column_no|c=i',
'type|t=s'
);
if((not defined $opts{"in_file"}) || (not defined $opts{"list_file"}) )
{
print < -l -c -t comma/tab >
EOF
exit;
}
my $in_file = $opts{"in_file"};
my $list_file = $opts{"list_file"};
my $column_no=3;
unless(not defined $opts{"column_no"})
{$column_no=$opts{"column_no"};}
my $type="comma";
unless(not defined $opts{"type"})
{$type=$opts{"type"};}
#Populate IDs_hash
my %IDs_hash={};
open(FILE, $list_file) or die;
while(my $line=){
chomp($line);
my @tokens=split(/,/,$line);
if($IDs_hash{$tokens[0]} eq undef){
$IDs_hash{$tokens[0]}=$tokens[1];
}
}
close(FILE);
open(FILE, $in_file) or die;
while(my $line=)
{
chomp($line);
my @tokens;
if($type=="comma"){
@tokens=split(/,/,$line);
}
elsif($type=="tab"){
@tokens=split(/\t/,$line);
}
my $nTokens = scalar(@tokens);
for(my $i=0;$i<$nTokens;$i++)
{
if($i==($column_no-1)){
if($IDs_hash{$tokens[$i]} eq undef)
{
print $tokens[$i];
}
else {
print $IDs_hash{$tokens[$i]};
}
if($i!=($nTokens-1))
{
if($type=="comma"){
print ",";
}
elsif($type=="tab"){
print "\t";
}
}
}
else {
print $tokens[$i];
if($i!=($nTokens-1))
{
if($type=="comma"){
print ",";
}
elsif($type=="tab"){
print "\t";
}
}
}
}
print "\n";
}
close(FILE);