16

I was wondering if there is any way I could easily split a string at spaces, except when the space is inside quotation marks?

For example, changing

Foo bar random "letters lol" stuff

into

Foo, bar, random, "letters lol", stuff

Jonathan Hall
  • 75,165
  • 16
  • 143
  • 189
MOBlox
  • 191
  • 1
  • 4

3 Answers3

18

Think about it. You have a string in comma separated values (CSV) file format, RFC4180, except that your separator, outside quote pairs, is a space (instead of a comma). For example,

package main

import (
    "encoding/csv"
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    fmt.Printf("String:\n%q\n", s)

    // Split string
    r := csv.NewReader(strings.NewReader(s))
    r.Comma = ' ' // space
    fields, err := r.Read()
    if err != nil {
        fmt.Println(err)
        return
    }

    fmt.Printf("\nFields:\n")
    for _, field := range fields {
        fmt.Printf("%q\n", field)
    }
}

Playground: https://play.golang.org/p/Ed4IV97L7H

Output:

String:
"Foo bar random \"letters lol\" stuff"

Fields:
"Foo"
"bar"
"random"
"letters lol"
"stuff"
peterSO
  • 158,998
  • 31
  • 281
  • 276
  • That isn't the output I really wanted. I wanted, for example, the `letters lol` to be in quotation marks and the other not. Also, I didn't do a very good job with the question... I was going to use it to split `blockdata 8539 58 584 {CustomName:"Foo"}` into `blockdata`, `8539`, `58`, `584`, `{CustomName:"Foo"}... Should have mentioned that. – MOBlox Nov 27 '17 at 07:57
13
  1. Using strings.FieldsFunc try this:
package main

import (
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    quoted := false
    a := strings.FieldsFunc(s, func(r rune) bool {
        if r == '"' {
            quoted = !quoted
        }
        return !quoted && r == ' '
    })

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

  1. Using simple strings.Builder and range over string and keeping or not keeping " at your will, try this
package main

import (
    "fmt"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    a := []string{}
    sb := &strings.Builder{}
    quoted := false
    for _, r := range s {
        if r == '"' {
            quoted = !quoted
            sb.WriteRune(r) // keep '"' otherwise comment this line
        } else if !quoted && r == ' ' {
            a = append(a, sb.String())
            sb.Reset()
        } else {
            sb.WriteRune(r)
        }
    }
    if sb.Len() > 0 {
        a = append(a, sb.String())
    }

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
    // not keep '"': // Foo, bar, random, letters lol, stuff
}


  1. Using scanner.Scanner, try this:
package main

import (
    "fmt"
    "strings"
    "text/scanner"
)

func main() {
    var s scanner.Scanner
    s.Init(strings.NewReader(`Foo bar random "letters lol" stuff`))
    slice := make([]string, 0, 5)
    tok := s.Scan()
    for tok != scanner.EOF {
        slice = append(slice, s.TokenText())
        tok = s.Scan()
    }
    out := strings.Join(slice, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}

  1. Using csv.NewReader which removes " itself, try this:
package main

import (
    "encoding/csv"
    "fmt"
    "log"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`
    r := csv.NewReader(strings.NewReader(s))
    r.Comma = ' '
    record, err := r.Read()
    if err != nil {
        log.Fatal(err)
    }

    out := strings.Join(record, ", ")
    fmt.Println(out) // Foo, bar, random, letters lol, stuff
}

  1. Using regexp, try this:
package main

import (
    "fmt"
    "regexp"
    "strings"
)

func main() {
    s := `Foo bar random "letters lol" stuff`

    r := regexp.MustCompile(`[^\s"]+|"([^"]*)"`)
    a := r.FindAllString(s, -1)

    out := strings.Join(a, ", ")
    fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}
wasmup
  • 14,541
  • 6
  • 42
  • 58
2

You could use regex

This (go playground) will cover all use cases for multiple words inside quotes and multiple quoted entries in your array:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    s := `Foo bar random "letters lol" stuff "also will" work on "multiple quoted stuff"`       
    r := regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`) 
    arr := r.FindAllString(s, -1)       
    fmt.Println("your array: ", arr)    
}

Output will be:

[Foo, bar, random, "letters lol", stuff, "also will", work, on, "multiple quoted stuff"]

If you want to learn more about regex here is a great SO answer with super handy resources at the end - Learning Regular Expressions

Hope this helps

Blue Bot
  • 2,278
  • 5
  • 23
  • 33
  • Seems like you only need: `[^\s"]+|"([^"]*)"`. I guess the intent was to handle single quotes also, but that isn't in the test (and doesn't work due to missing `'` at the end). – Brent Bradburn Dec 03 '19 at 04:17