5

Is it possible to download and save files in parallel using goroutines?

Below is my code which downloads files from my dropbox:

package main

import (
    "encoding/json"
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "net/url"
    "os"
    "path/filepath"
)

const app_key string = "<app_key>"
const app_secret string = "<app_secret>"

var code string

type TokenResponse struct {
    AccessToken string `json:"access_token"`
}

type File struct {
    Path string
}

type FileListResponse struct {
    FileList []File `json:"contents"`
}

func download_file(file File, token TokenResponse) {

    download_file := fmt.Sprintf("https://api-content.dropbox.com/1/files/dropbox/%s?access_token=%s", file.Path, token.AccessToken)

    resp, _ := http.Get(download_file)
    defer resp.Body.Close()

    filename := filepath.Base(file.Path)
    out, err := os.Create(filename)
    if err != nil {
        panic(err)
    }
    defer out.Close()

    io.Copy(out, resp.Body)
}

func main() {

    authorize_url := fmt.Sprintf("https://www.dropbox.com/1/oauth2/authorize?response_type=code&client_id=%s", app_key)

    // Get code
    fmt.Printf("1. Go to: %s\n", authorize_url)
    fmt.Println("2. Click 'Allow' (you might have to log in first)")
    fmt.Println("3. Copy the authorization code.")
    fmt.Printf("Enter the authorization code here: ")
    fmt.Scanf("%s", &code)
    // End get code

    // Get access token
    data := url.Values{}
    data.Add("code", code)
    data.Add("grant_type", "authorization_code")
    data.Add("client_id", app_key)
    data.Add("client_secret", app_secret)

    resp, _ := http.PostForm("https://api.dropbox.com/1/oauth2/token", data)
    defer resp.Body.Close()

    contents, _ := ioutil.ReadAll(resp.Body)

    var tr TokenResponse

    json.Unmarshal(contents, &tr)
    // End get access token

    // Get file list
    file_list_url := fmt.Sprintf("https://api.dropbox.com/1/metadata/dropbox/Camera Uploads?access_token=%s", tr.AccessToken)

    resp2, _ := http.Get(file_list_url)
    defer resp2.Body.Close()

    contents2, _ := ioutil.ReadAll(resp2.Body)

    var flr FileListResponse
    json.Unmarshal(contents2, &flr)
    // End get file list

    for i, file := range flr.FileList {

        download_file(file, tr)

        if i >= 2 {
            break
        }
    }
}

It doesn't work when I prefix the download_file function with the go command.

go download_file(file, tr)
Neil
  • 8,925
  • 10
  • 44
  • 49

2 Answers2

13

That's because your main goroutine is exiting. You need to add a WaitGroup to wait until all the goroutines exit. For example,

var wg sync.WaitGroup
for i, file := range flr.FileList {
    wg.Add(1)

    go download_file(file, tr, wg)

    if i >= 2 {
        break
    }
}
wg.Wait()

...
func download_file(file File, token TokenResponse, wg sync.WaitGroup) {
    ...
    wg.Done()
}
Rob Napier
  • 286,113
  • 34
  • 456
  • 610
  • +1. Funny, I mentioned that same Wait Group sync technique earlier today: http://stackoverflow.com/a/23632204/6309 – VonC May 13 '14 at 15:34
  • What happens if something goes wrong within `download_file` (prior to `wg.Done()`? Will the `wg.Done()` call still happen? – srt32 Jan 11 '15 at 21:52
  • 2
    There's no magic here. You need to call `wg.Done()` when you're done. If you fail to call it, it won't be called. If you want to ensure it's called even if you return from the function early, use `defer` just like you would for other kinds of "catch" behavior. – Rob Napier Jan 11 '15 at 21:58
  • 7
    You should pass wg by reference. You are sending a copy and calling Done() on the copy and not the original WaitGroup. https://stackoverflow.com/questions/36407206/best-way-of-using-sync-waitgroup-with-external-function – Tristan Jul 29 '17 at 04:36
0

This project might help you and others that are looking into achieving concurrency in Go:

Idea is to create a cluster in Go to execute parallel jobs. https://github.com/waqar-alamgir/mini-go-cluster

Source can be tweaked to download files concurrently.

Waqar Alamgir
  • 9,828
  • 4
  • 30
  • 36