0

I'm trying to replace the following match a pattenr in xml string where the pattern is various types of attributes that are present in any given xml element.

so if the xml string was:

<TEST xlmns="https://www.test.com">
    <XXX>Foo</XXX>
    <YYY>Bar</YYY>
</TEST>

I want to remove the namespaces above using pattenr .*?(?:[a-z][a-z0-9_]*).*?((?:[a-z][a-z0-9_]*))(=)(\".*?\") in the below code:

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            var txt = "<TEST xlmns=\"https://www.test.com\"> <XXX>Foo</XXX> <YYY>Bar</YYY> </TEST>";

            const string pattern = ".*?(?:[a-z][a-z0-9_]*).*?((?:[a-z][a-z0-9_]*))(=)(\".*?\")";    

            var r = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
            var m = r.Match(txt);
            if (m.Success)
            {
                String var1 = m.Groups[1].ToString();
                String c1 = m.Groups[2].ToString();
                String string1 = m.Groups[3].ToString();
                Console.Write( var1.ToString() +  c1.ToString() + string1.ToString()  + "\n");
                Console.WriteLine(RegExReplace(txt,pattern,""));
            }
            Console.ReadLine();
        }

        static String RegExReplace(String input, String pattern, String replacement)
        {
            if (string.IsNullOrEmpty(input))
                return input;

            return Regex.Replace(input, pattern, replacement, RegexOptions.IgnoreCase);
        }
    }
}

But where it matches, in this case <TEST xlmns="https://www.test.com"> is turned into > when it should have been <TEST>

What have i done wrong in the replace method?

abatishchev
  • 98,240
  • 88
  • 296
  • 433
ke3pup
  • 1,835
  • 4
  • 36
  • 66
  • Your biggest problem is that you're trying to parse XML using a regular expression. [Don't do that.](http://stackoverflow.com/a/1732454/87399) Use the right tool for the job, which means an XML library, such as [XDocument](http://stackoverflow.com/a/1542101/87399). – Joe White Mar 23 '14 at 03:24

1 Answers1

0

If you just want to remove namespace, change your regex to:

const string pattern = "xlmns=\".*\"";

If you want to remove all attributes, use the given regex:

const string pattern = "\w+=\".*\"";

Full code:

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            var txt = "<TEST xlmns=\"https://www.test.com\"> <XXX>Foo</XXX> <YYY>Bar</YYY> </TEST>";

            const string pattern = "\w+=\".*\"";    

            var r = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
            var m = r.Match(txt);
            if (m.Success)
            {
                String var1 = m.Groups[1].ToString();
                String c1 = m.Groups[2].ToString();
                String string1 = m.Groups[3].ToString();
                Console.Write( var1.ToString() +  c1.ToString() + string1.ToString()  + "\n");
                Console.WriteLine(RegExReplace(txt,pattern,""));
            }
            Console.ReadLine();
        }

        static String RegExReplace(String input, String pattern, String replacement)
        {
            if (string.IsNullOrEmpty(input))
                return input;

            return Regex.Replace(input, pattern, replacement, RegexOptions.IgnoreCase);
        }
    }
}
Amit Joki
  • 58,320
  • 7
  • 77
  • 95
  • i tried the second pattern you've provided (`const string pattern = "\w+=\".*\"";`) but the code doesn't compile with `Unrecognized Escape Sequence Error` – ke3pup Mar 23 '14 at 11:32
  • mate just copy as it is – Amit Joki Mar 23 '14 at 11:33
  • i just did - still same error (Unrecognized Escape Sequence) which is shown for this line `const string pattern = "\w+=\".*\""; ` – ke3pup Mar 23 '14 at 11:43