I have bunch of input files around 200MB which I need to read in Perl, extract specific information, and write it into a new file for each of those files. How can I do it without using a lot of while loops.
Each input file is tab-separated like this. The fields are ACME A
, 0
, 2
In every file I want to obtain then third column
ACME A 0 2
ACME A 1 0
ACME A 2 0
ACME A 3 0
ACME A 4 0
ACME A 5 0
ACME A 6 0
Here is my code so far;
#! /usr/bin/perl -w
#compiler profilleri
use strict;
use warnings;
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//; #/ turn off wrong syntax highlight
return $string;
}
#dosya locationları
my $input_file = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_01.txt";
my $input_file1 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_06.txt";
my $input_file2 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_11.txt";
my $input_file3 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_16.txt";
my $input_file4 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_21.txt";
my $input_file5 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_26.txt";
my $input_file6 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_31.txt";
my $input_file7 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_36.txt";
my $input_file8 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_41.txt";
my $input_file9 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_46.txt";
my $input_file10 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_51.txt";
my $input_file11 = "C:/Perl64/output/sbc_cause_comp_intraday_2017-02-03_00_56.txt";
my $output_file = "C:/Perl64/output/denemecik.txt";
#komutlar######
my $ne;
my @cc_type;
my @cc_count;
my @cc_count1;
my @cc_count2;
my @cc_count3;
my @cc_count4;
my @cc_count5;
my @cc_count6;
my @cc_count7;
my @cc_count8;
my @cc_count9;
my @cc_count10;
my @cc_count11;
my @total;
my $i;
my @count = 0;
my @count1 = 0;
my @count2 = 0;
my @count3 = 0;
my @count4 = 0;
my @count5 = 0;
my @count6 = 0;
my @count7 = 0;
my @count8 = 0;
my @count9 = 0;
my @count10 = 0;
my @count11 = 0;
my $date = 'sbc_cause_comp_intraday_2017-02-03_00_01';
my $date1 = substr( $date, 24, 10 );
my $hour = substr( $date, 35, 1 );
#print ($hour);
open INPUT, "< $input_file" or die "$0: open of $input_file failed, error: $! \n";
open INPUT1, "< $input_file1" or die "$0: open of $input_file1 failed, error: $! \n";
open INPUT2, "< $input_file2" or die "$0: open of $input_file2 failed, error: $! \n";
open INPUT3, "< $input_file3" or die "$0: open of $input_file3 failed, error: $! \n";
open INPUT4, "< $input_file4" or die "$0: open of $input_file4 failed, error: $! \n";
open INPUT5, "< $input_file5" or die "$0: open of $input_file5 failed, error: $! \n";
open INPUT6, "< $input_file6" or die "$0: open of $input_file6 failed, error: $! \n";
open INPUT7, "< $input_file7" or die "$0: open of $input_file7 failed, error: $! \n";
open INPUT8, "< $input_file8" or die "$0: open of $input_file8 failed, error: $! \n";
open INPUT9, "< $input_file9" or die "$0: open of $input_file9 failed, error: $! \n";
open INPUT10, "< $input_file10" or die "$0: open of $input_file10 failed, error: $! \n";
open INPUT11, "< $input_file11" or die "$0: open of $input_file11 failed, error: $! \n";
open OUTPUT, "> $output_file" or die "$0: open of $output_file failed, error: $! \n";
print OUTPUT ( "**********************************************************************\n" );
while ( defined( $_ = <INPUT> ) ) {
my $line = $_;
my ( $ne, $cc_type, $cc_count ) = split( '\t', $line );
my $count = trim( $cc_count );
print( "$ne\n" );
while ( defined( $_ = <INPUT1> ) ) {
my $line1 = $_;
my ( undef, undef, $cc_count1 ) = split( '\t', $line1 );
my $count1 = trim( $cc_count1 );
#print("$count1\n");
while ( defined( $_ = <INPUT2> ) ) {
my $line2 = $_;
my ( undef, undef, $cc_count2 ) = split( '\t', $line2 );
my $count2 = trim( $cc_count2 );
#print("$cc_count2\n");
while ( defined( $_ = <INPUT3> ) ) {
my $line3 = $_;
my ( undef, undef, $cc_count3 ) = split( '\t', $line3 );
my $count3 = trim( $cc_count3 );
#print("$cc_count3\n");
while ( defined( $_ = <INPUT4> ) ) {
my $line4 = $_;
my ( undef, undef, $cc_count4 ) = split( '\t', $line4 );
my $count4 = trim( $cc_count4 );
# print("$cc_count4\n");
while ( defined( $_ = <INPUT5> ) ) {
my $line5 = $_;
my ( undef, undef, $cc_count5 ) = split( '\t', $line5 );
my $count5 = trim( $cc_count5 );
#print("$cc_count5\n");
while ( defined( $_ = <INPUT6> ) ) {
my $line6 = $_;
my ( undef, undef, $cc_count6 ) = split( '\t', $line6 );
my $count6 = trim( $cc_count6 );
#print("$cc_count6\n");
while ( defined( $_ = <INPUT7> ) ) {
my $line7 = $_;
my ( undef, undef, $cc_count7 ) = split( '\t', $line7 );
my $count7 = trim( $cc_count7 );
#print("$cc_count7\n");
while ( defined( $_ = <INPUT8> ) ) {
my $line8 = $_;
my ( undef, undef, $cc_count8 ) = split( '\t', $line8 );
my $count8 = trim( $cc_count8 );
#print("$cc_count8\n");
while ( defined( $_ = <INPUT9> ) ) {
my $line9 = $_;
my ( undef, undef, $cc_count9 ) = split( '\t', $line9 );
my $count9 = trim( $cc_count9 );
#print("$cc_count9\n");
while ( defined( $_ = <INPUT10> ) ) {
my $line10 = $_;
my ( undef, undef, $cc_count10 ) = split( '\t', $line10 );
my $count10 = trim( $cc_count10 );
#print("$cc_count10\n");
while ( defined( $_ = <INPUT11> ) ) {
my $line11 = $_;
my ( undef, undef, $cc_count11 ) = split( '\t', $line11 );
my $count11 = trim( $cc_count11 );
#print("$cc_count11\n");
for ( $i = 0; $i < scalar @count; $i++ ) {
$total[$i] = $count[$i]
+ $count1[$i]
+ $count2[$i]
+ $count3[$i]
+ $count4[$i]
+ $count5[$i]
+ $count6[$i]
+ $count7[$i]
+ $count8[$i]
+ $count9[$i]
+ $count10[$i]
+ $count11[$i];
# print("@total\n");
}
print OUTPUT (
"$date1 $hour $ne $cc_type $count $count1 $count2 $count3 $count4 $count5 $count6 $count7 $count8 $count9 $count10 $count11 $total\n"
);
# print("@total\n");
}
}
}
}
}
}
}
}
}
}
}
}
close OUTPUT;
close INPUT;
close INPUT1;
close INPUT2;
close INPUT3;
close INPUT4;
close INPUT5;
close INPUT6;
close INPUT7;
close INPUT8;
close INPUT9;
close INPUT10;
close INPUT11;
Can you please help me to write this code more logically? Right now, I am obtaining the values in an array but the output file shows only the first row as true, and the remaining rows just the iteration of the first one, so it is not true.
The second problem is I don't know how to add number values in one row properly. I want to add those values for each row and write this value as a final column in output file.
Here is the example of output file;
DATE / HOUR NE CC TYPE FILE 00_01 FILE / 00_06 FILE / 00_11 FILE
00_16 FILE 00_21 FILE 00_26 FILE 00_31 FILE 00_36 FILE 00_41 FILE
00_46 FILE / 00_51 FILE / 00_56 TOTAL
2/3/2017 00 ACME A 0 2 4 43 4 4 25 4 3 26 4 4 18 141
2/3/2017 00 ACME A 1 0 0 1 8 0 0 0 0 4 0 0 0 13
2/3/2017 00 ACME A 2 0 0 0 0 0 0 0 0 0 0 0 0 0
2/3/2017 00 ACME A 3 0 0 3 1 0 6 5 0 6 1 4 1 27
2/3/2017 00 ACME A 4 0 0 0 0 0 0 0 0 0 0 0 0 0
2/3/2017 00 ACME A 5 0 0 0 0 0 0 0 0 0 0 0 0 0