#!/usr/bin/perl # *************************************************************** # Name: collateGCMSResults.pl # Purpose: This script collates multiple text files produced from GC-MS machine, each # residing in a subfolder. # # Format of a typical file is: # # Data Path : C:\msdchem\1\data\Gu-VFA\2013-09\20130904\ # Data File : 00701019.d # Signal(s) : FID1A.ch # Acq On : 4 Sep 2013 19:24 # Operator : Gu # Sample : ADR2 7 19/08 # Misc : # ALS Vial : 7 Sample Multiplier: 1 # # Integration File: autoint1_20140904.e # Quant Time: Feb 27 17:33:41 2014 # Quant Method : C:\msdchem\1\methods\GU-VFA1.M # Quant Title : # QLast Update : Thu Feb 27 17:24:49 2014 # Response via : Initial Calibration # Integrator: ChemStation # # Volume Inj. : # Signal Phase : # Signal Info : # # Compound R.T. Response Conc Units # --------------------------------------------------------------------------- # # Target Compounds # 1) Ethanol 0.000 0 N.D. mg/L # 2) Acetate 6.893 166198 39.167 mg/L # 3) Propionate 7.449 46075 6.931 mg/L # 4) isoButyrate 7.623 27925 3.233 mg/L # 5) n-Butyrate 8.008 12058 1.427 mg/L # 6) isoValerate 8.251 69083 7.135 mg/L # 7) n-Valerate 8.645 7929 0.861 mg/L # 8) isoCaproate 9.006 47784 4.942 mg/L # 9) n-Caproate 9.245 65775 7.206 mg/L # --------------------------------------------------------------------------- # # # Description: # 1) We do two passes, first pass to get all possible compounds, and second pass to populate the # frequency table # 2) We match lines that end in "mg/L" using $line=~/mg\/L/ # 3) We use a substring of directory names as sample names using substr($sample_terms[$i],-5,-2) # 4) N.D. gets replaced by 0 in the frequency table # # Version: 0.1 # Authors: Umer Zeeshan Ijaz (Umer.Ijaz@glasgow.ac.uk) # http://userweb.eng.gla.ac.uk/umer.ijaz/index.htm # Created: 2014-03-05 # License: Copyright (c) 2014 Computational Microbial Genomics Group, University of Glasgow, UK # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # **************************************************************/ use warnings; use strict; use File::Slurp qw(read_dir); use Getopt::Long; my %opts; #store the input arguments GetOptions(\%opts, 'folder|f=s', 'pattern|p=s', ); if((not defined $opts{"folder"}) || (not defined $opts{"pattern"})) { print < -p > For example, perl collateGCMSResults.pl -f /home/projectx -p epatemp EOF exit; } my $root=$opts{"folder"}; my $pattern=$opts{"pattern"}; my $line; my @tokens; my @unique_terms; my @sample_terms; #First pass to collect all the terms for my $dir (grep { -d "$root/$_" } read_dir($root)) { for my $file (grep { /$pattern/ } read_dir($root."/".$dir)){ push @sample_terms, $dir; open(FILE,$root."/".$dir."/".$file) or die "Can't open $root/$dir/$file\n"; while ($line=){ chomp($line); if ($line=~/mg\/L/){ @tokens = split(/\s+/,$line); unless ($tokens[2] ~~ @unique_terms) #enter unique terms { push @unique_terms, $tokens[2]; } } } close(FILE); } } my @frequency_table=(); for(my $i = 0; $i < scalar(@unique_terms); $i++){ for(my $j = 0; $j < scalar(@sample_terms); $j++){ $frequency_table[$i][$j]="0"; } } #Second pass to populate @frequency_table for my $dir (grep { -d "$root/$_" } read_dir($root)) { for my $file (grep { /$pattern/ } read_dir($root."/".$dir)){ open(FILE,$root."/".$dir."/".$file) or die "Can't open $root/$dir/$file\n"; while ($line=){ chomp($line); if ($line=~/mg\/L/){ @tokens = split(/\s+/,$line); my( $index_terms )= grep { $unique_terms[$_] eq $tokens[2] } 0..$#unique_terms; my( $index_samples )= grep { $sample_terms[$_] eq $dir } 0..$#sample_terms; if(defined $index_terms && defined $index_samples) { if ($tokens[scalar(@tokens)-2]=~/N\.D\./) { $frequency_table[$index_terms][$index_samples]=0.0; } else { $frequency_table[$index_terms][$index_samples]=$tokens[scalar(@tokens)-2]; } } } } close(FILE); } } #Now generate the frequency table print "Samples"; for(my $i=0; $i < @sample_terms; $i++) { print ",".substr($sample_terms[$i],-5,-2); } print "\n"; for(my $i = 0; $i < scalar(@unique_terms); $i++){ print $unique_terms[$i]; for(my $j = 0; $j < scalar(@sample_terms); $j++){ print ","; print $frequency_table[$i][$j]; } print "\n"; }