First I would like to say i've already viewed Golang download multiple files in parallel using goroutines and Example for sync.WaitGroup correct? and i've used them as a guide in my code. However I'm not certain that it's working for me. I'm trying to download files from multiple buckets on aws. This is what I have (some lines will be blank for security reasons).
package main
import (
"fmt"
"os"
"os/user"
"path/filepath"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
//Bucket = "" // Download from this bucket
Prefix = "" // Using this key prefix
LocalDirectory = "s3logs" // Into this directory
)
// create a single session to be used
var sess = session.New()
// used to control concurrency
var wg sync.WaitGroup
func main() {
start := time.Now()
//map of buckets to region
regBuckets := map[string]string{
}
// download the files for each bucket
for region, bucket := range regBuckets {
fmt.Println(region)
wg.Add(1)
go getLogs(region, bucket, LocalDirectory, &wg)
}
wg.Wait()
elapsed := time.Since(start)
fmt.Printf("\nTime took %s\n", elapsed)
}
// function to get data from buckets
func getLogs(region string, bucket string, directory string, wg *sync.WaitGroup) {
client := s3.New(sess, &aws.Config{Region: aws.String(region)})
params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: &Prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 6 * 1024 * 1024 // 6MB per part
d.Concurrency = 5
})
d := downloader{bucket: bucket, dir: directory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
wg.Done()
}
// downloader object and methods
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
// desktop path
user, errs := user.Current()
if errs != nil {
panic(errs)
}
homedir := user.HomeDir
desktop := homedir + "/Desktop/" + d.dir
file := filepath.Join(desktop, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
fmt.Printf("Downloading s3://%s/%s to %s...\n", d.bucket, key, file)
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
d.Download(fd, params)
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
}
In the regBuckets
hashmap I place a list of bucket names : regions
In the for loop below I print the bucket name. So If i have two buckets I want to download the items from both buckets at the same time. I was testing this with a print statement. I excepted to see the name of the first bucket and soon after the name of the second bucket. However it seems like instead of downloading the files from multiple buckets is parallel it's downloading them in order, e.g bucket 1 when bucket 1 is done the for loop continues and then bucket 2...etc So i need help making sure I'm downloading in parallel because I have roughly 10 buckets and speed is important. I also wonder if it's because i'm using a single session. Any idea?