-1

I'm tying to write a handy little extension method that will take a string and return it formatted as if it was a lowerCamelCase JSON identifier.

This is what I have so far.. please can I get help improving it?

I need the following behaviour:

  • UserName => userName
  • UserID => userId
  • user => user
  • fishing trip => fishingTrip
  • 123123123 => 123123123
  • aaa123 => aaa123
  • hello world this is me => helloWorldThisIsMe

Bonus - if we could somehow strip out non [A-Za-z0-9] characters? I guess i could just do another round of regex?

Thanks for any help!

    public static string ToJsonIdentifier(string s)
    {
        // special case, s is empty
        if (string.IsNullOrEmpty(s)) return s;

        // clean up the string, remove any non-standard chars
        s = Regex.Replace(s, @"[^A-Za-z0-9\s]+", "");

        // special case s is whitespace
        if (string.IsNullOrWhiteSpace(s)) return String.Empty;

        // special case s is only 1 letter
        if (!string.IsNullOrEmpty(s) && s.Length == 1) 
            return s.ToLowerInvariant();

        // detect word boundaries where the case changes and add whitespace there, so the next code splits it up
        s = Regex.Replace(s, "([a-z])([A-Z])", m=> m.Groups[1].Value + " " + m.Groups[2].Value);

        // multiple words, so for each whitespace separated bit, uppercase the first letter, and deal with special cases
        if (s.Contains(" "))
        {
            s = string.Join("", s.Split(' ').ToList().Select(z => 
                {
                    if (string.IsNullOrWhiteSpace(z)) return string.Empty;
                    if (z.Length == 1) return z.ToUpperInvariant();
                    return z.ToUpperInvariant()
                            .Substring(0, 1) + z.Substring(1).ToLowerInvariant();
                }));
        }


        // lowercase the first letter
        return char.ToLower(s[0]) + s.Substring(1);
    }

Research: I've seen these questions, which seem related:

My ongoing attempt: https://dotnetfiddle.net/PR31Hl

user230910
  • 2,353
  • 2
  • 28
  • 50
  • could you show your json and is 's' specifies your json string ? – Prany Jun 11 '18 at 04:25
  • This is an extension method, so you could call it on any string.. I have added in the behaviour I'm expecting, please could you explain more about what you need? – user230910 Jun 11 '18 at 04:27

1 Answers1

1

Your code seems to give the expected result already. What part do you want to improve?

I do not think this is the answer to what you need. But I just wanted to share how I would solve the issue, where I don't use Linq/Regex but simply go by checking each char in the string for memory (and no Linq/Regex library) efficiency. I suppose this should be more lightweight in the process as well. But beware, this might not be easy to read.

public static string ToJsonIdentifier(string s)
{
    // special case, s is empty
    if (string.IsNullOrEmpty(s)) return s;

    var result = new StringBuilder();

    bool isFirst = true; // Is First (non-whitespace) Character Flag
    bool isSpace = false; // Is Whitespace Flag
    bool isUpperCase = false; // Is Uppercase Flag
    foreach(char c in s)
    {
        // filter to be letter or digit only
        if(!char.IsLetterOrDigit(c))
        {
            continue;
        }

        if(isFirst)
        {
            if (!char.IsWhiteSpace(c))
            {
                // if first character, set to lower case
                result.Append(char.ToLower(c));
                isFirst = false; // no more first flag
            }
            // if WhiteSpace, ignore the character
        }
        else if(char.IsWhiteSpace(c))
        {
            isSpace = true; // set the Whitespace flag, so next char should be uppercase
        }
        else if(char.IsUpper(c))
        {
            if (!isUpperCase)
            {
                // if previous char is lower case, set it as it is (as uppercase) 
                result.Append(c);
                isUpperCase = true;
            }
            else
            {
                // if previous char is uppercase, set this to lower instead
                result.Append(char.ToLower(c));
                // and keep the uppercase flag on, so multiple uppercase in the row will be converted to lower, until lower case is found.
            }
        }
        else if(char.IsLower(c))
        {
            if(isSpace) // if previous char is whitespace, set char to be upper case
            {
                isSpace = false; // no more whitespace flag
                result.Append(char.ToUpper(c));
                isUpperCase = true;  // set upper case flag on
            }
            else
            {
                isUpperCase = false; // no more upper case flag
                result.Append(c);
            }
        }
        else if(char.IsDigit(c))
        {
            // reset all flags
            isSpace = false;
            isUpperCase = false;
            result.Append(c);
        }
    }
    return result.ToString();
}
kurakura88
  • 2,185
  • 2
  • 12
  • 18
  • yeah, this is what i was hoping/looking for - non regex, single pass through the string, seems WAY more efficient than my method – user230910 Jun 12 '18 at 04:30