3

I am trying to grep multiple strings from pom.xml file... for eg below are the contents of the xml file

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.bluesoft.muleesb</groupId>
    <artifactId>Magellan23888Adapter</artifactId>
    <version>1.0.0-SNAPSHOT</version>
    <packaging>mule-application</packaging>

    <name>Magellan23888Adapter</name>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <app.runtime>4.3.0</app.runtime>
        <mule.maven.plugin.version>3.3.5</mule.maven.plugin.version>
    </properties>

    <build>
        <plugins>
            <plugin>
                <groupId>org.mule.tools.maven</groupId>
                <artifactId>mule-maven-plugin</artifactId>
                <version>${mule.maven.plugin.version}</version>
                <extensions>true</extensions>
                <configuration>
                    <sharedLibraries>
                        <sharedLibrary>
                            <groupId>com.ibm.mq</groupId>
                            <artifactId>com.ibm.mq.allclient</artifactId>
                        </sharedLibrary>
                    </sharedLibraries>
                    <classifier>mule-application</classifier>
                </configuration>
            </plugin>
        </plugins>
    </build>
    <distributionManagement>
        <repository>
            <id>releases</id>
            <name>bluesoft releases</name>
            <url>http://nexus.bluesoft.com/repository/releases</url>
        </repository>
        <snapshotRepository>
            <id>snapshots</id>
            <name>bluesoft snapshots</name>
            <url>http://nexus.bluesoft.com/repository/snapshots</url>
        </snapshotRepository>
    </distributionManagement>
    <dependencies>
        <dependency>
            <groupId>com.mulesoft.connectors</groupId>
            <artifactId>mule-x12-connector</artifactId>
            <version>2.6.2</version>
            <classifier>mule-plugin</classifier>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.logging.log4j</groupId>
                    <artifactId>log4j-core</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.logging.log4j</groupId>
                    <artifactId>log4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.mule.connectors</groupId>
            <artifactId>mule-objectstore-connector</artifactId>
            <version>1.1.7</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>com.ibm.mq</groupId>
            <artifactId>com.ibm.mq.allclient</artifactId>
            <version>9.2.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.mule.modules</groupId>
            <artifactId>mule-json-module</artifactId>
            <version>2.1.3</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.12.0</version>
        </dependency>
        <dependency>
            <groupId>org.mule.connectors</groupId>
            <artifactId>mule-http-connector</artifactId>
            <version>1.5.6</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>org.mule.connectors</groupId>
            <artifactId>mule-file-connector</artifactId>
            <version>1.3.3</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>com.mulesoft.connectors</groupId>
            <artifactId>mule-ibm-mq-connector</artifactId>
            <version>1.6.3</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>org.mule.connectors</groupId>
            <artifactId>mule-sftp-connector</artifactId>
            <version>1.4.0</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>com.mulesoft.modules</groupId>
            <artifactId>mule-secure-configuration-property-module</artifactId>
            <version>1.1.0</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>org.mule.connectors</groupId>
            <artifactId>mule-email-connector</artifactId>
            <version>1.4.1</version>
            <classifier>mule-plugin</classifier>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.16.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
            <version>2.16.0</version>
        </dependency>
    </dependencies>

    <repositories>
        <repository>
            <id>bluesoft-nexus</id>
            <name>bluesoft Nexus</name>
            <url>https://nexus.bluesoft.com/content/groups/public</url>
            <layout>default</layout>
        </repository>
    </repositories>
    <pluginRepositories>
        <pluginRepository>
            <id>bluesoft-nexus-plugins</id>
            <name>bluesoft Nexus</name>
            <layout>default</layout>
            <url>https://nexus.bluesoft.com/content/groups/public</url>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </pluginRepository>
    </pluginRepositories>

</project>

I am trying to run the below command to get the above output from the file

grep -i -A3 'com.mulesoft.connectors\|org.mule.connectors' pom.xml

I want to print the values as below when the above criteria matches, for eg

I am trying to capture all the matched patterns from my below command

grep -i -A3 'com.mulesoft.connectors\|org.mule.connectors' pom.xml 

and from this command i am trying to get artifactId and version

expected output:

artifactId value,version
------------------------
mule-ibm-mq-connector,1.8.3
mule-file-connector,1.5.2
mule-salesforce-connector,10.8.1
mule-email-connector,1.1.8
mule-connector,1.4.1
RavinderSingh13
  • 130,504
  • 14
  • 57
  • 93
  • 4
    obligatory https://stackoverflow.com/a/1732454/1032785 – jordanm Aug 30 '22 at 18:01
  • 1
    This is something that would be readily written as an XSLT template. Look at [XMLStarlet](http://xmlstar.sourceforge.net/) as an example of a tool that will write those templates for you (after they're built, you can run them with xmlproc, which is preinstalled pretty much everywhere). Of course, you can also just tell xmlstarlet to just output the data you want instead of having it write the template explicitly. – Charles Duffy Aug 30 '22 at 18:06
  • Please show valid XML. – Cyrus Aug 30 '22 at 18:33
  • 1
    (err, when I said "xmlproc" above that should have been "xsltproc") – Charles Duffy Aug 30 '22 at 19:51
  • @jordanm I suggest that you *not* post that answer, because chances are that OP won't understand it. You and I may laugh at it because we understand what it's saying, but rookies looking for help won't get it. Instead, point to a something that actually explains the problem. – Andy Lester Aug 30 '22 at 19:55

3 Answers3

3

An XML parser like xmllint suites this task better but you cannot install a new tool for some reason you may consider this grep | gnu-awk solution:

grep -i -A3 'com\.mulesoft\.connectors\|org\.mule\.connectors' pom.xml |
awk -v RS='--' -v OFS=, 'BEGIN {print "artifactId value,version"} 
{gsub(/<\/?[^>]+>/, ""); print $2, $3}'

artifactId value,version
mule-x12-connector,2.6.2
mule-objectstore-connector,1.1.7
mule-http-connector,1.5.6
mule-file-connector,1.3.3
mule-ibm-mq-connector,1.6.3
mule-sftp-connector,1.4.0
mule-email-connector,1.4.1
anubhava
  • 761,203
  • 64
  • 569
  • 643
  • I tried both the solutions but non is working @anubhava – Praveen Kumar Aug 30 '22 at 19:06
  • I want to capture all from my below command grep -i -A3 'com.mulesoft.connectors\|org.mule.connectors' pom.xml from this command i am trying to get artifactId and version – Praveen Kumar Aug 30 '22 at 19:18
  • i edited my question and expected output as well in my post – Praveen Kumar Aug 30 '22 at 19:28
  • your solution works but the problem is it is also printing the values from other groupId tags, apart from the two that i am looking for – Praveen Kumar Aug 30 '22 at 19:41
  • solution worked. I really appreciate taking time and helping me with the solution. Is there any way i can also append the file name on each line? i tried to pass the variable but its not working – Praveen Kumar Aug 30 '22 at 19:57
  • I tried to pass variable but it is not working – Praveen Kumar Aug 30 '22 at 20:07
  • What are you trying to pass? Are you expanding your problem or it is part of the question's expected output? – anubhava Aug 30 '22 at 20:11
  • It is not part of the question, I was just trying to print as below

    artifactId value,version,filename mule-x12-connector,2.6.2,pom.xml
    – Praveen Kumar Aug 30 '22 at 20:16
  • 1
    ok try: `fn='pom.xml'; grep -i -A3 'com\.mulesoft\.connectors\|org\.mule\.connectors' "$fn" | awk -v fn="$fn" -v RS='--' -v OFS=, 'BEGIN {print "artifactId value,version,filename"} {gsub(/<\/?[^>]+>/, ""); print $2, $3, fn}'` – anubhava Aug 30 '22 at 20:18
  • 1
    Thank you. Looks like I was trying the same but looks like there was a typo and parenthesis was missing at the end. – Praveen Kumar Aug 30 '22 at 20:24
  • how can i ignore comments with in xml file? for eg some of my xml files has commented xml tags something like this and some has the comments ended at the end of the block of xml tags – Praveen Kumar Sep 13 '22 at 21:33
  • 1
    Praveen you need to post a new question for new requirements. Comment section is for discussion on current problem and solution – anubhava Sep 14 '22 at 02:05
2

With your shown samples and attempts please try following Single awk code(Not using grep here and within single awk itself handling the matches, conditions and printing values as per requirement). Written and tested in GNU awk and it uses GNU awk' match function's creating arrays from capturing group values(which are getting created by regex used in solution).

Here is the Online Demo for used regex for understanding purposes, regex creates 3 capturing groups out of which 1st one is getting used for checking condition purposes and rest 2 are getting printed if condition of 1st capturing groups is meeting the OP's condition.

awk -v RS="^$" '
{
  while(match($0,/<groupId>([^<]*)<\/groupId>\n[[:space:]]*<artifactId>([^<]*)\
<\/artifactId>\n[[:space:]]*<version>([^<]*)<\/version>/,arr)){
    if(arr[1]=="com.mulesoft.connectors" || arr[1]=="org.mule.connectors"){
      if(++count==1){
         print "artifactId value,version"
      }
      print arr[2]","arr[3]
    }
    $0=substr($0,RSTART+RLENGTH)
  }
}
'  Input_file
RavinderSingh13
  • 130,504
  • 14
  • 57
  • 93
  • @Praveen Kumar, Good that you have accepted an answer. You could check other answers also(once you get time) and reply them back how they went, by doing this it may help future users, cheers. – RavinderSingh13 Aug 31 '22 at 08:17
-1
echo "${input_data}" | 

mawk NF=NF FS='<[/]artifactId>[^>]+>'     OFS=',' \
         RS='(<[/]version>\n[^\n]+\n+)?([^\n]+\n+[ \t]+<artifactId>)?'
mule-ibm-mq-connector,1.8.3
mule-file-connector,1.5.2
mule-salesforce-connector,10.8.1
mule-email-connector,1.1.8
mule-connector,1.4.1

the header rows should be trivial

RARE Kpop Manifesto
  • 2,453
  • 3
  • 11