I can't get the Perl script below to write to the file output.html
.
I doesn't need to be a CGI script yet, but that is the ultimate intention.
Can anyone tell me why it isn't writing any text to output.html?
#!/usr/bin/perl
#-----------------------------------------------------------------------
# This script should work as a CGI script, if I get it correctly.
# Most CGI scripts for Perl begin with the same line and must be
# stored in your servers cgi-bin directory. (I think this is set by
# your web server.
#
# This scripts will scrape news sites for stories about topics input
# by the users.
#
# Lara Landis
# Sinister Porpoise Computing
# 1/4/2018
# Personal Perl Project
#-----------------------------------------------------------------------
@global_sites = ();
print( "Starting program.\n" );
if ( !( -e "sitedata.txt" ) ) {
enter_site_info( @global_sites );
}
if ( !( -e "scrpdata.txt" ) ) {
print( "scrpdata.txt does not exist. Creating file now.\n" );
print( "Enter the search words you wish to search for below. Press Ctrl-D to finish.\n" );
open( SCRAPEFILE, ">scrpdata.txt" );
while ( $line = <STDIN> ) {
chop( $line );
print SCRAPEFILE ( "$line\n" );
}
close( SCRAPEFILE );
}
print( "Finished getting site data..." );
scrape_sites( @global_sites );
#----------------------------------------------------------------------
# This routine gets information from the user after the file has been
# created. It also has some basic checking to make sure that the lines
# fed to it are legimate domains. This is not an exhaustive list of
# all domains in existence.
#----------------------------------------------------------------------
sub enter_site_info {
my ( @sisites ) = @_;
$x = 1;
open( DATAFILE, ">sitedata.txt" ) || die( "Could not open datafile.\n" );
print( "Enter websites below. Press Crtl-D to finish.\n" );
while ( $x <= @sisites ) {
$sisites[$x] = <STDIN>;
print( "$sisites[$x] added.\n" );
print DATAFILE ( "$sisites[$x]\n" );
$x++;
}
close( DATAFILE );
return @sisites;
}
#----------------------------------------------------------------------
# If the file exists, just get the information from it. Read info in
# from the sites. Remember to create a global array for the sites
# data.
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# Get the text to find in the sites that are being scraped. This requires
# nested loops. It starts by going through the loops for the text to be
# scraped, and then it goes through each of the websites listend in the
# sitedata.txt file.
#-----------------------------------------------------------------------
sub scrape_sites {
my ( @ss_info ) = @_;
@gsi_info = ();
@toscrape = ();
$y = 1;
#---------------------------
# Working code to be altered
#---------------------------
print( "Getting site info..." );
$x = 1;
open( DATAFILE, "sitedata.txt" ) || die( "Can't open sitedata.txt.txt\n" );
while ( $gsi_info[$x] = <DATAFILE> ) {
chop( $gsi_info[$x] );
print( "$gsi_info[$x]\n" );
$x++;
}
close( DATAFILE );
open( SCRAPEFILE, "scrpdata.txt" ) || die( "Can't open scrpdata.txt\n" );
print( "Getting scrape data.\n" );
$y = 1;
while ( $toscrape[$y] = <SCRAPEFILE> ) {
chop( $toscrape[$y] );
$y++;
}
close( SCRAPEFILE );
print( "Now opening the output file.\n" );
$z = 1;
open( OUTPUT, ">output.html" );
print( "Now scraping sites.\n" );
while ( $z <= @gsi_info ) { #This loop contains SITES
system( "rm -f index.html.*" );
system( "wget $gsi_info[$z]" );
$z1 = 1;
print( "Searching site $gsi_info[$z] for $toscrape[$z1]\n" );
open( TEMPFILE, "$gsi_info[$z]" );
$comptext = <TEMPFILE>;
while ( $comptext =~ /$toscrape[z1]/ig ) { # This loop fetches data from the search terms
print( "Now scraping $gsi_info[$z] for $toscrape[$z1]\n" );
print OUTPUT ( "$toscrape[$z1]\n" );
$z1++;
}
close( TEMPFILE );
$z++;
}
close( OUTPUT );
return ( @gsi_info );
}