1

First I would like to say i've already viewed Golang download multiple files in parallel using goroutines and Example for sync.WaitGroup correct? and i've used them as a guide in my code. However I'm not certain that it's working for me. I'm trying to download files from multiple buckets on aws. This is what I have (some lines will be blank for security reasons).

package main

import (
    "fmt"
    "os"
    "os/user"
    "path/filepath"
    "sync"
    "time"

    "github.com/aws/aws-sdk-go/aws"
    "github.com/aws/aws-sdk-go/aws/session"
    "github.com/aws/aws-sdk-go/service/s3"
    "github.com/aws/aws-sdk-go/service/s3/s3manager"
)

var (
    //Bucket         = ""                                               // Download from this bucket
    Prefix         = "" // Using this key prefix
    LocalDirectory = "s3logs"                                                    // Into this directory
)

// create a single session to be used
var sess = session.New()

// used to control concurrency
var wg sync.WaitGroup

func main() {
    start := time.Now()
    //map of buckets to region
    regBuckets := map[string]string{

    }



    // download the files for each bucket
    for region, bucket := range regBuckets {
        fmt.Println(region)
        wg.Add(1)
        go getLogs(region, bucket, LocalDirectory, &wg)
    }
    wg.Wait()
    elapsed := time.Since(start)
    fmt.Printf("\nTime took %s\n", elapsed)

}

// function to get data from buckets

func getLogs(region string, bucket string, directory string, wg *sync.WaitGroup) {
    client := s3.New(sess, &aws.Config{Region: aws.String(region)})
    params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: &Prefix}
    manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
        d.PartSize = 6 * 1024 * 1024 // 6MB per part
        d.Concurrency = 5
    })
    d := downloader{bucket: bucket, dir: directory, Downloader: manager}
    client.ListObjectsPages(params, d.eachPage)
    wg.Done()
}

// downloader object and methods
type downloader struct {
    *s3manager.Downloader
    bucket, dir string
}

func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
    for _, obj := range page.Contents {
        d.downloadToFile(*obj.Key)
    }
    return true
}

func (d *downloader) downloadToFile(key string) {
    // Create the directories in the path
    // desktop path
    user, errs := user.Current()
    if errs != nil {
        panic(errs)
    }
    homedir := user.HomeDir
    desktop := homedir + "/Desktop/" + d.dir
    file := filepath.Join(desktop, key)
    if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
        panic(err)
    }

    // Setup the local file
    fd, err := os.Create(file)
    if err != nil {
        panic(err)
    }
    defer fd.Close()

    // Download the file using the AWS SDK
    fmt.Printf("Downloading s3://%s/%s to %s...\n", d.bucket, key, file)
    params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
    d.Download(fd, params)
    _, e := d.Download(fd, params)
    if e != nil {
        panic(e)
    }
}

In the regBuckets hashmap I place a list of bucket names : regions In the for loop below I print the bucket name. So If i have two buckets I want to download the items from both buckets at the same time. I was testing this with a print statement. I excepted to see the name of the first bucket and soon after the name of the second bucket. However it seems like instead of downloading the files from multiple buckets is parallel it's downloading them in order, e.g bucket 1 when bucket 1 is done the for loop continues and then bucket 2...etc So i need help making sure I'm downloading in parallel because I have roughly 10 buckets and speed is important. I also wonder if it's because i'm using a single session. Any idea?

Community
  • 1
  • 1
reticentroot
  • 3,612
  • 2
  • 22
  • 39
  • I just ran into this as well. I suspect that ListObjectsPages is not goroutine safe but it could be the single session as well. I thought that having a session was supposed to make the SDK goroutine safe though. – mentat Apr 28 '16 at 16:22

0 Answers0