I have xml data like this i want extract content in ce:afffilliation and sa:afffilliation after extract put into two variable and in sa:affilliation variable make text like ce:affillition and compare two text
<ce:affiliation id="aff1"><ce:label>a</ce:label><ce:textfn>Department of Urology, Radboud University Nijmegen Medical Center, Nijmegen, The Netherlands</ce:textfn><sa:affiliation><sa:organization>Department of Urology</sa:organization><sa:organization>Radboud University Nijmegen Medical Center</sa:organization><sa:city>Nijmegen</sa:city><sa:country>The Netherlands</sa:country></sa:affiliation></ce:affiliation><ce:affiliation id="aff2"><ce:label>b</ce:label><ce:textfn>Norris Comprehensive Cancer Center, University of Southern California Institute of Urology, Los Angeles, California</ce:textfn><ce:affiliation id="aff3"><ce:label>c</ce:label><ce:textfn>Department of Urology, Stanford University, Stanford, California</ce:textfn><sa:affiliation><sa:organization>Department of Urology</sa:organization><sa:organization>Stanford University</sa:organization><sa:city>Stanford</sa:city><sa:state>California</sa:state></sa:affiliation></ce:affiliation>
#!/usr/bin/perl
@files = <*.xml>;
open my $out, '>', 'output.xml' or die $!;
foreach $file (@files) {
open (FILE, "$file");
$a =1;
while(my $line= <FILE> ){
do{
if ($line =~ /<ce:affiliation id=\"aff$a\">(.+?)<ce:textfn>(.+?)<\/ce:textfn><sa:affiliation>(.+?)<\/sa:affiliation><\/ce:affiliation>/){
$count = $3;
$textfn = $2;
print ("$count\n");
print ("$textfn\n");
if ($count =~ /<\/sa:(.+?)>/){
$count =~ s/<\/sa:organization>/, /g;
$count =~ s/<\/sa:city>/, /g;
$count =~ s/<\/sa:country>/, /g;
$count =~ s/<\/sa:state>/, /g;
$count =~ s/<sa:organization>//g;
$count =~ s/<sa:city>//g;
$count =~ s/<sa:country>//g;
$count =~ s/<sa:state>//g;
chop($count);
chop($count);
if($count ne $textfn){
print $out("$file affilliation $a is mismatch\n");}}}
else{
if($line =~ /<ce:affiliation id=\"aff$a\">(.+?)<ce:textfn>(.+?)<\/ce:textfn><\/ce:affiliation>/){
print $out("$file sa:affilliation missing for $a\n");}}
$a=$a+1;}
while($line =~ /aff$a/);}}
This code failed if some ce:affillition not contains
<ce:label> and <sa:affillition>
<ce:affiliation id="aff1"><ce:label>a</ce:label><ce:textfn>Department of Urology, Radboud University Nijmegen Medical Center, Nijmegen, The Netherlands</ce:textfn><sa:affiliation><sa:organization>Department of Urology</sa:organization><sa:organization>Radboud University Nijmegen Medical Center</sa:organization><sa:city>Nijmegen</sa:city><sa:country>The Netherlands</sa:country></sa:affiliation></ce:affiliation><ce:affiliation id="aff2"><ce:textfn>Norris Comprehensive Cancer Center, University of Southern California Institute of Urology, Los Angeles, California</ce:textfn></ce:affiliation><ce:affiliation id="aff3"><ce:label>c</ce:label><ce:textfn>Department of Urology, Stanford University, Stanford, California</ce:textfn><sa:affiliation><sa:organization>Department of Urology</sa:organization><sa:organization>Stanford University</sa:organization><sa:city>Stanford</sa:city><sa:state>California</sa:state></sa:affiliation></ce:affiliation><ce:correspondence id="cor1"></article>