-1

I want to download files in parallel in go, but my code never exits:

package main

import (
    "fmt"
    "io"
    "net/http"
    "os"
    "path/filepath"
    "sync"
)

func download_file(file_path string, wg sync.WaitGroup) {
    defer wg.Done()
    resp, _ := http.Get(file_path)
    defer resp.Body.Close()
    filename := filepath.Base(file_path)
    file, _ := os.Create(filename)
    defer file.Close()

    size, _ := io.Copy(file, resp.Body)
    fmt.Println(filename, size, resp.Status)
}

func main() {
    var wg sync.WaitGroup

    file_list := []string{
        "http://i.imgur.com/dxGb2uZ.jpg",
        "http://i.imgur.com/RSU6NxX.jpg",
        "http://i.imgur.com/hUWgS2S.jpg",
        "http://i.imgur.com/U8kaix0.jpg",
        "http://i.imgur.com/w3cEYpY.jpg",
        "http://i.imgur.com/ooSCD9T.jpg"}
    fmt.Println(len(file_list))
    for _, url := range file_list {
        wg.Add(1)
        fmt.Println(wg)
        go download_file(url, wg)

    }
    wg.Wait()
}

What's the reason? I've looked here: Golang download multiple files in parallel using goroutines but I found no solution. What is the best way to debug such code?

Community
  • 1
  • 1
grotos
  • 493
  • 2
  • 6
  • 11
  • 7
    You're making a copy of your `WaitGroup`; you should be passing `download_file` a pointer to it. –  May 09 '15 at 11:28
  • Go has some syntactic sugar to call methods with pointer receivers on non-pointer values but you should really think about what you are passing around and a method call always makes a copy. – Volker May 09 '15 at 12:12
  • 6
    Also, you are ignoring errors. Don't do that. – peterSO May 09 '15 at 12:34

2 Answers2

1

As Tim Cooper said you need to pass the WaitGroup as a pointer. If you run the go vet tool on your code it will give you this warning:

$ go vet ex.go
ex.go:12: download_file passes Lock by value: sync.WaitGroup contains sync.Mutex
exit status 1

I recommend using an editor that can do this for you when you save a file. For example go-plus for Atom.

As for the code I think you should restructure it like this:

package main

import (
    "fmt"
    "io"
    "net/http"
    "os"
    "path/filepath"
    "sync"
)

func downloadFile(filePath string) error {
    resp, err := http.Get(filePath)
    if err != nil {
        return err
    }
    defer resp.Body.Close()

    name := filepath.Base(filePath)

    file, err := os.Create(name)
    if err != nil {
        return err
    }
    defer file.Close()

    size, err := io.Copy(file, resp.Body)
    if err != nil {
        return err
    }
    fmt.Println(name, size, resp.Status)
    return nil
}

func main() {
    var wg sync.WaitGroup

    fileList := []string{
        "http://i.imgur.com/dxGb2uZ.jpg",
        "http://i.imgur.com/RSU6NxX.jpg",
        "http://i.imgur.com/hUWgS2S.jpg",
        "http://i.imgur.com/U8kaix0.jpg",
        "http://i.imgur.com/w3cEYpY.jpg",
        "http://i.imgur.com/ooSCD9T.jpg"}
    fmt.Println("downloading", len(fileList), "files")
    for _, url := range fileList {
        wg.Add(1)
        go func(url string) {
            err := downloadFile(url)
            if err != nil {
                fmt.Println("[error]", url, err)
            }
            wg.Done()
        }(url)
    }
    wg.Wait()
}

I don't like passing WaitGroups around and prefer to keep functions simple, blocking and sequential and then stitch together the concurrency at a higher level. This gives you the option of doing it all sequentially without having to change downloadFile.

I also added error handling and fixed names so they are camelCase.

Caleb
  • 9,272
  • 38
  • 30
1

Adding to Calab's response, there's absolutely nothing wrong with your approach, all you had to do is to pass a pointer to the sync.WaitGroup.

func download_file(file_path string, wg *sync.WaitGroup) {
    defer wg.Done()
    ......
}
.....
        go download_file(url, &wg)
.....

playground

OneOfOne
  • 95,033
  • 20
  • 184
  • 185