Sometimes, inside a perl script, I want to do to a long string --- or to a file containing many long lines --- what the following would do on the unix command line:
fmt myfile
# or
echo Now is the time, and also all work and no play makes Jack a dull boy, over and over | fmt -20
That is, I want to emulate the most basic task that unix fmt
performs: to break lines of a file so that they do not run off the screen, and to keep paragraphs separate.
The script below does it. Am I missing something; is there a much easier way to do it in perl?
#!/usr/bin/perl
#
use strict; use warnings;
use Getopt::Long;
my $diagnose = 0; # not used
my $maxlimit = 50;
#https://stackoverflow.com/questions/11526517/should-you-check-the-return-code-from-getoptlonggetoptions
die unless GetOptions (
'diagnose!' => \$diagnose, # not used
'maxlimit=i' => \$maxlimit,
);
$/ = undef; # slurp entire file into one string
my $DATA = <>;
my $rgx_split_prg = qr/\n\s*\n/; # any all-whitespace line will break paragraphs
my $rgx_split_line = qr/\n+/;
my @Paragraphs = split ( $rgx_split_prg, $DATA );
my $index_of_last_paragraph = scalar @Paragraphs;
my $countparagraph = 0;
foreach my $paragraph (@Paragraphs)
{
$countparagraph++;
$paragraph =~ s/^\s*//; # remove leading whitespace
my $multilinestring = '';
my $localtotal = 0;
foreach my $line (split ($rgx_split_line, $paragraph) )
{
foreach my $el (split('\s+', $line,))
{
next unless ($el=~/\S/);
$localtotal+=length $el;
$localtotal+=1; # interword space uses a column, so count it
$multilinestring = join('', $multilinestring, $el, ' ',);
if($localtotal > $maxlimit)
{
$multilinestring = join('', $multilinestring, "\n",);
$localtotal=0;
}
}
}
# 1st newline makes last (non-whitespace / nontrivial) line in the paragraph end in a newline, i.e., not pathological.
# 2nd newline places a pure newline *BETWEEN* paragraphs --- so, not if it's the last paragraph.
if($countparagraph == $index_of_last_paragraph)
{
$multilinestring =~ s/\s*\z/\n/s; #
}
else
{
$multilinestring =~ s/\s*\z/\n\n/s; # separate paragraphs by exactly one (1) pure newline.
}
print $multilinestring;
}
example:
> echo Now is the time, and also all work and no play makes Jack a dull boy, over and over | myfmt -m 20
Now is the time, and
also all work and no
play makes Jack a dull
boy, over and over