Despite the advises to use Templates or parsers modules, I assure there is no one of those modules that can handle this direct.
Here is the solution I came up with.
First I find the outer blocks in the entire content or template with simple regex:
/(<!--block:.*?-->(?:(?:(?!<!--(?:.*?)-->).)|(?R))*?<!--endblock-->)/gis
Then I parse each outer block to find its nested sub blocks.
/(<!--block:(.*?)-->((?:(?:(?!<!--block:(?:.*?)-->).)|(?R))*?)<!--endblock-->|((?:(?!<!--.*?-->).)+))/igsx
Then everything is working well. I tested with two outer blocks and each one has nested blocks.
I can reach any sub block simply like that:
print $blocks->{first}->{content};
print $blocks->{first}->{match};
print $blocks->{first}->{second}->{third}->{fourth}->{content}
Each block hash ref has the keys:
`content`: the block content without the block name and endblock tags.
`match`: the block content with the block name and endblock tags, good for replacing.
`#next`: has the sub block name if exists, good to check if block has children and access them.
Below is the final Perl tested and working code.
use Data::Dumper;
$/ = undef;
my $content = <DATA>;
my $blocks = parse_blocks($content);
print Dumper($blocks);
#print join "\n", keys( %{$blocks->{first}}); # root blocks names
#print join "\n", keys( %{$blocks->{first}}); #
#print join "\n", keys( %{$blocks->{first}->{second}});
#print Dumper $blocks->{first};
#print Dumper $blocks->{first}->{content};
#print Dumper $blocks->{first}->{match};
# check if fourth block has sub block.
#print exists $blocks->{first}->{second}->{third}->{fourth}->{'#next'}, "\n";
# check if block has sub block, get it:
#if (exists $blocks->{first}->{second}->{third}->{fourth}->{'#next'}) {
# print $blocks->{first}->{second}->{third}->{fourth}->{ $blocks->{first}->{second}->{third}->{fourth}->{'#next'} }->{content}, "\n";
#}
exit;
#================================================
sub parse_blocks {
my ($content) = @_;
my $href = {};
# find outer blocks only
while ($content =~ /(<!--block:.*?-->(?:(?:(?!<!--(?:.*?)-->).)|(?R))*?<!--endblock-->)/gis) {
# parse each outer block nested blocks
parse_nest_blocks($href, $1);
}
return $href;
}
#================================================
sub parse_nest_blocks {
my ($aref, $core) = @_;
my ($k, $v);
while ( $core =~ /(<!--block:(.*?)-->((?:(?:(?!<!--block:(?:.*?)-->).)|(?R))*?)<!--endblock-->|((?:(?!<!--.*?-->).)+))/igsx )
{
if (defined $2) {
$k = $2; $v = $3;
$aref->{$k} = {};
$aref->{$k}->{content} = $v;
$aref->{$k}->{match} = $1;
#print "1:{{$k}}\n2:[[$v]]\n";
my $curraref = $aref->{$k};
my $ret = parse_nest_blocks($aref->{$k}, $v);
if ($ret) {
$curraref->{'#next'} = $ret;
}
return $k;
}
}
}
#================================================
__DATA__
some html content here top base
<!--block:first-->
<table border="1" style="color:red;">
<tr class="lines">
<td align="left" valign="<--valign-->">
<b>bold</b><a href="http://www.mewsoft.com">mewsoft</a>
<!--hello--> <--again--><!--world-->
some html content here 1 top
<!--block:second-->
some html content here 2 top
<!--block:third-->
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
<!--endblock-->
some html content here 2 bottom
<!--endblock-->
some html content here 1 bottom
<!--endblock-->
some html content here1-5 bottom base
some html content here 6-8 top base
<!--block:six-->
some html content here 6 top
<!--block:seven-->
some html content here 7 top
<!--block:eight-->
some html content here 8a
some html content here 8b
<!--endblock-->
some html content here 7 bottom
<!--endblock-->
some html content here 6 bottom
<!--endblock-->
some html content here 6-8 bottom base
and the output of the entire hash dump is:
$VAR1 = {
'first' => {
'second' => {
'third' => {
'match' => '<!--block:third-->
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
<!--endblock-->',
'content' => '
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
',
'fourth' => {
'fifth' => {
'match' => '<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->',
'content' => '
some html content here 5a
some html content here 5b
'
},
'match' => '<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->',
'content' => '
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
',
'#next' => 'fifth'
},
'#next' => 'fourth'
},
'match' => '<!--block:second-->
some html content here 2 top
<!--block:third-->
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
<!--endblock-->
some html content here 2 bottom
<!--endblock-->',
'content' => '
some html content here 2 top
<!--block:third-->
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
<!--endblock-->
some html content here 2 bottom
',
'#next' => 'third'
},
'match' => '<!--block:first-->
<table border="1" style="color:red;">
<tr class="lines">
<td align="left" valign="<--valign-->">
<b>bold</b><a href="http://www.mewsoft.com">mewsoft</a>
<!--hello--> <--again--><!--world-->
some html content here 1 top
<!--block:second-->
some html content here 2 top
<!--block:third-->
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
<!--endblock-->
some html content here 2 bottom
<!--endblock-->
some html content here 1 bottom
<!--endblock-->',
'content' => '
<table border="1" style="color:red;">
<tr class="lines">
<td align="left" valign="<--valign-->">
<b>bold</b><a href="http://www.mewsoft.com">mewsoft</a>
<!--hello--> <--again--><!--world-->
some html content here 1 top
<!--block:second-->
some html content here 2 top
<!--block:third-->
some html content here 3 top
<!--block:fourth-->
some html content here 4 top
<!--block:fifth-->
some html content here 5a
some html content here 5b
<!--endblock-->
<!--endblock-->
some html content here 3a
some html content here 3b
<!--endblock-->
some html content here 2 bottom
<!--endblock-->
some html content here 1 bottom
',
'#next' => 'second'
},
'six' => {
'match' => '<!--block:six-->
some html content here 6 top
<!--block:seven-->
some html content here 7 top
<!--block:eight-->
some html content here 8a
some html content here 8b
<!--endblock-->
some html content here 7 bottom
<!--endblock-->
some html content here 6 bottom
<!--endblock-->',
'content' => '
some html content here 6 top
<!--block:seven-->
some html content here 7 top
<!--block:eight-->
some html content here 8a
some html content here 8b
<!--endblock-->
some html content here 7 bottom
<!--endblock-->
some html content here 6 bottom
',
'seven' => {
'match' => '<!--block:seven-->
some html content here 7 top
<!--block:eight-->
some html content here 8a
some html content here 8b
<!--endblock-->
some html content here 7 bottom
<!--endblock-->',
'content' => '
some html content here 7 top
<!--block:eight-->
some html content here 8a
some html content here 8b
<!--endblock-->
some html content here 7 bottom
',
'eight' => {
'match' => '<!--block:eight-->
some html content here 8a
some html content here 8b
<!--endblock-->',
'content' => '
some html content here 8a
some html content here 8b
'
},
'#next' => 'eight'
},
'#next' => 'seven'
}
};