1

Given below code

package main

import (
  "fmt"
  "sort"
  "strings"
)

func main() {

  s := []string{"eat", "tea", "tan", "ate", "nat", "bat"}
  result := groupAnagrams(s)
  fmt.Println(result)
}

func groupAnagrams(s []string) (out [][]string) {
  tmp := map[string][]string{}
  for _, v := range s {
    x := strings.Split(v, "")
    sort.Strings(x)
    anagram := strings.Join(x, "")
    items, ok := tmp[anagram]
    if ok {
      items = append(items, v)
      tmp[anagram] = items
      continue
    }
    tmp[anagram] = []string{v}
  }

  var keys []string
  for key := range tmp {
    keys = append(keys, key)
  }

  sort.Strings(keys) 

  for _, key := range keys {
    sort.Strings(tmp[key])
    out = append(out, tmp[key])
  }
  return
}

And its tests here https://play.golang.org/p/k8F1-FAC_au

can you help figuring the complexity ?

In my understanding, and without checking thoroughly the documentation.

  • for _, v := range s { // o(n)
  • sort.Strings(keys) //o(log n)
  • x := strings.Split(v, "") / anagram := strings.Join(x, "") //o(n)

Are those correct ? Am i missing some ? How to compute the total ?

Do you account for total allocations when computing the complexity of a code ?

  • 1
    Computational complexity isn't a precision measurement, it's a worst-case order of magnitude. In this case it looks like the total would be O(n), because that's the worst case of any part of the algorithm (nothing in the algorithm would drive it up to O(n^2) for example). – Adrian Aug 12 '21 at 14:59
  • @Adrian thank you for that explanation and recall of the basics. – clement auger Aug 12 '21 at 15:02
  • To your added question "Do you account for total allocations when computing the complexity of a code" - that's a different measure. You could measure Big-O for time or space, but you can't really do both in one measurement. – Adrian Aug 12 '21 at 15:18
  • @Adrian many thanks again. to my understanding it is a measure of benchmarks. but maybe there is more formal way to evaluate that aspects of an algorithm. – clement auger Aug 12 '21 at 15:22
  • @Adrian Isn't sorting like O(n log n) in the average/worst case? – Riwen Aug 12 '21 at 15:55
  • @Riwen you're right, I missed the sort of the whole result. Yes that would make it O(n log(n)). – Adrian Aug 12 '21 at 15:58
  • @Riwen but, i wonder about that because the end results, in my test example has 3 times less keys than the original input. Plus, this sort is based on data that are hard to predict, as far as i understand it. How does that account ? – clement auger Aug 12 '21 at 16:23

2 Answers2

0

(not an answer, more like a formatted comment)

You get to choose what counts as "1 operation".

For example : in your for _, v := range s { ... } loop, I wouldn't count the processing of one single v value :

    x := strings.Split(v, "")
    sort.Strings(x)
    anagram := strings.Join(x, "")
    items, ok := tmp[anagram]
    if ok {
      items = append(items, v)
      tmp[anagram] = items
      continue
    }
    tmp[anagram] = []string{v}

as "1 operation". More like something that depends on len(v).

So the length of the items in your starting set will probably appear in your end formula.

LeGEC
  • 46,477
  • 5
  • 57
  • 104
  • thankyou for your concern and explanation. I would like to plus 1, but as you figured this is not really my "cup of tea", so i dont know how others feel about this. But yes, it makes sense to me, and following that i would say it is 2*o(n) (strings.Spliit+strings.Join)*2 if i might say it in those words and be understood). – clement auger Aug 12 '21 at 16:20
0

this is not an answer, but, little insight as to anyone else having to deal with such things. may that help you.

I slightly revised stuff here and their, then gave a shot to a Godel-inspired scheme as described at https://stackoverflow.com/a/396077/11892070

-- main.go --
package main

import (
    "fmt"
    "sort"
    "strings"
)

func main() {
    input := []string{"tan", "nat", "⌘", "日本語", "語日本"}

    freq := map[rune]int{}
    for _, word := range input {
        x, err := hashWord(word, freq)
        fmt.Println(word, "=>", x, "err=", err)
    }
}

func groupAnagramsUsingSort(s []string, tmp map[string][]string, out [][]string) [][]string {
    for k := range tmp {
        delete(tmp, k)
    }
    for i := 0; i < len(out); i++ {
        out[i] = out[i][:0]
    }
    out = out[:0]

    for _, v := range s {
        x := strings.Split(v, "")
        sort.Strings(x)
        anagram := strings.Join(x, "")
        items, ok := tmp[anagram]
        if ok {
            items = append(items, v)
            tmp[anagram] = items
            continue
        }
        tmp[anagram] = []string{v}
    }

    for key := range tmp {
        out = append(out, tmp[key])
    }
    return out
}

func groupAnagramsUsingHash(s []string, tmp map[int][]string, out [][]string) [][]string {
    for k := range tmp {
        delete(tmp, k)
    }
    for i := 0; i < len(out); i++ {
        out[i] = out[i][:0]
    }
    out = out[:0]

    freq := map[rune]int{}
    for _, v := range s {
        hash, _ := hashWord(v, freq)
        items, ok := tmp[hash]
        if ok {
            items = append(items, v)
            tmp[hash] = items
            continue
        }
        tmp[hash] = []string{v}
    }
    for key := range tmp {
        out = append(out, tmp[key])
    }
    return out
}

var primes = []int{2, 41, 37, 47, 3, 67, 71, 23, 5, 101, 61, 17, 19, 13, 31, 43, 97, 29, 11, 7, 73, 83, 79, 89, 59, 53}

var ErrNonASCII = fmt.Errorf("non ascii letter detected")

func getFrequencyMap(word string, freq map[rune]int) (map[rune]int, error) {
    for k := range freq {
        delete(freq, k)
    }
    for _, r := range word {
        if r-97 < 0 || int(r-97) > len(primes) {
            return nil, ErrNonASCII
        }
        x := freq[r]
        freq[r] = x + 1
    }
    return freq, nil
}

func hashWord(word string, freq map[rune]int) (int, error) {
    var err error
    freq, err = getFrequencyMap(word, freq)
    if err != nil {
        return -1, err
    }
    product := 1
    for letter, r := range freq {
        product = product * primes[letter-97]
        for e := 1; e < r; e++ {
            product = product * product
        }
    }
    return product, nil
}
-- main_test.go --
package main

import (
    "reflect"
    "sort"
    "testing"
)

type expectation struct {
    input []string
    want  [][]string
}

var expectations = []expectation{
    expectation{
        input: []string{"eat", "tea", "tan", "ate", "nat", "bat"},
        want: [][]string{
            []string{"ate", "eat", "tea"},
            []string{"bat"},
            []string{"nat", "tan"},
        },
    },
    expectation{
        input: []string{"eaft", "tea", "taen", "ate", "nate", "batf"},
        want: [][]string{
            []string{"batf"},
            []string{"eaft"},
            []string{"tea", "ate"},
            []string{"taen", "nate"},
        },
    },
    expectation{
        input: []string{""},
        want: [][]string{
            []string{""},
        },
    },
    expectation{
        input: []string{"a"},
        want: [][]string{
            []string{"a"},
        },
    },
}

func TestUsingSort(t *testing.T) {
    tmp := map[string][]string{}
    out := [][]string{}

    for _, expectation := range expectations {
        out = groupAnagramsUsingSort(expectation.input, tmp, out)
        if len(out) != len(expectation.want) {
            t.Fatalf("unexpected output,\nwanted=%#v\ngot   =%#v\n", expectation.want, out)
        }
        for i := 0; i < len(out); i++ {
            sort.Strings(out[i])
            sort.Strings(expectation.want[i])
        }
        sort.Slice(out, func(i int, j int) bool {
            return len(out[i]) < len(out[j])
        })
        sort.Slice(expectation.want, func(i int, j int) bool {
            return len(expectation.want[i]) < len(expectation.want[j])
        })
        sort.Slice(out, func(i int, j int) bool {
            return (len(out[i]) > 0 &&
                len(out[j]) > 0 &&
                out[i][0] < out[j][0])
        })
        sort.Slice(expectation.want, func(i int, j int) bool {
            return (len(expectation.want[i]) > 0 &&
                len(expectation.want[j]) > 0 &&
                expectation.want[i][0] < expectation.want[j][0])
        })
        for i := 0; i < len(out); i++ {
            if !reflect.DeepEqual(out[i], expectation.want[i]) {
                t.Fatalf("unexpected output,\nwanted=%#v\ngot   =%#v\n", expectation.want, out)
            }
        }
    }
}

func TestUsingHash(t *testing.T) {
    tmp := map[int][]string{}
    out := [][]string{}

    for _, expectation := range expectations {
        out = groupAnagramsUsingHash(expectation.input, tmp, out)
        if len(out) != len(expectation.want) {
            t.Fatalf("unexpected output,\nwanted=%#v\ngot   =%#v\n", expectation.want, out)
        }
        for i := 0; i < len(out); i++ {
            sort.Strings(out[i])
            sort.Strings(expectation.want[i])
        }
        sort.Slice(out, func(i int, j int) bool {
            return len(out[i]) < len(out[j])
        })
        sort.Slice(expectation.want, func(i int, j int) bool {
            return len(expectation.want[i]) < len(expectation.want[j])
        })
        sort.Slice(out, func(i int, j int) bool {
            return (len(out[i]) > 0 &&
                len(out[j]) > 0 &&
                out[i][0] < out[j][0])
        })
        sort.Slice(expectation.want, func(i int, j int) bool {
            return (len(expectation.want[i]) > 0 &&
                len(expectation.want[j]) > 0 &&
                expectation.want[i][0] < expectation.want[j][0])
        })
        for i := 0; i < len(out); i++ {
            if !reflect.DeepEqual(out[i], expectation.want[i]) {
                t.Fatalf("unexpected output,\nwanted=%#v\ngot   =%#v\n", expectation.want, out)
            }
        }
    }
}

func BenchmarkUsingSort(b *testing.B) {
    tmp := map[string][]string{}
    out := [][]string{}
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
        for _, expectation := range expectations {
            out = groupAnagramsUsingSort(expectation.input, tmp, out)
            _ = out
        }
    }
}

func BenchmarkUsingHash(b *testing.B) {
    tmp := map[int][]string{}
    out := [][]string{}
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
        for _, expectation := range expectations {
            out = groupAnagramsUsingHash(expectation.input, tmp, out)
            _ = out
        }
    }
}

Benchmark result

$ go test -bench=. -v .
=== RUN   TestUsingSort
--- PASS: TestUsingSort (0.00s)
=== RUN   TestUsingHash
--- PASS: TestUsingHash (0.00s)
goos: linux
goarch: amd64
BenchmarkUsingSort
BenchmarkUsingSort-4      344438          3315 ns/op         787 B/op         29 allocs/op
BenchmarkUsingHash
BenchmarkUsingHash-4      410810          2911 ns/op         496 B/op         17 allocs/op
PASS
ok      _/home/clementauger/tmp 2.408s