22

I want to stream a multipart/form-data (large) file upload directly to AWS S3 with as little memory and file disk footprint as possible. How can I achieve this? Resources online only explain how to upload a file and store it locally on the server.

machete
  • 1,127
  • 2
  • 13
  • 26

5 Answers5

40

You can use upload manager to stream the file and upload it, you can read comments in source code you can also configure params to set the part size, concurrency & max upload parts, below is a sample code for reference.

package main

import (
    "fmt"
    "os"

    "github.com/aws/aws-sdk-go/aws/credentials"

    "github.com/aws/aws-sdk-go/aws"
    "github.com/aws/aws-sdk-go/aws/session"
    "github.com/aws/aws-sdk-go/service/s3/s3manager"
)

var filename = "file_name.zip"
var myBucket = "myBucket"
var myKey = "file_name.zip"
var accessKey = ""
var accessSecret = ""

func main() {
    var awsConfig *aws.Config
    if accessKey == "" || accessSecret == "" {
        //load default credentials
        awsConfig = &aws.Config{
            Region: aws.String("us-west-2"),
        }
    } else {
        awsConfig = &aws.Config{
            Region:      aws.String("us-west-2"),
            Credentials: credentials.NewStaticCredentials(accessKey, accessSecret, ""),
        }
    }

    // The session the S3 Uploader will use
    sess := session.Must(session.NewSession(awsConfig))

    // Create an uploader with the session and default options
    //uploader := s3manager.NewUploader(sess)

    // Create an uploader with the session and custom options
    uploader := s3manager.NewUploader(sess, func(u *s3manager.Uploader) {
        u.PartSize = 5 * 1024 * 1024 // The minimum/default allowed part size is 5MB
        u.Concurrency = 2            // default is 5
    })

    //open the file
    f, err := os.Open(filename)
    if err != nil {
        fmt.Printf("failed to open file %q, %v", filename, err)
        return
    }
    //defer f.Close()

    // Upload the file to S3.
    result, err := uploader.Upload(&s3manager.UploadInput{
        Bucket: aws.String(myBucket),
        Key:    aws.String(myKey),
        Body:   f,
    })

    //in case it fails to upload
    if err != nil {
        fmt.Printf("failed to upload file, %v", err)
        return
    }
    fmt.Printf("file uploaded to, %s\n", result.Location)
}
maaz
  • 4,371
  • 2
  • 30
  • 48
  • Thank you for the answer. If my file is below 5 MB it still will be streamed to S3? But as I understand anyway this file will appear on S3 only after been fully uploaded? – Vitaly Zdanevich Apr 03 '19 at 13:49
  • yes, it will be streamed in one part. correct, the file will appear once fully uploaded. – maaz Apr 03 '19 at 18:06
10

you can do this using minio-go :

n, err := s3Client.PutObject("bucket-name", "objectName", object, size, "application/octet-stream")

PutObject() automatically does multipart upload internally. Example

koolhead17
  • 1,944
  • 1
  • 12
  • 20
  • 2
    I think it's not the proper answer, cause here we can't have control on parts, while the AWS API gives us access to uploads each part separately and send initial/complete/abort upload commands. – meshkati Jul 31 '19 at 12:02
  • The example linked here doesn't actually use `PubObject` to stream. – fIwJlxSzApHEZIl Aug 06 '20 at 19:52
1

Another option is to mount the S3 bucket with goofys and then stream your writes to the mountpoint. goofys does not buffer the content locally so it will work fine with large files.

khc
  • 344
  • 2
  • 8
1

Was trying to do this with the aws-sdk v2 package so had to change the code of @maaz a bit. Am leaving it here for others -


type TokenMeta struct {
    AccessToken  string 
    SecretToken  string 
    SessionToken string 
    BucketName   string
}


// Create S3Client struct with the token meta and use it as a receiver for this method
func (s3Client S3Client) StreamUpload(fileToUpload string, fileKey string) error {
    accessKey := s3Client.TokenMeta.AccessToken
    secretKey := s3Client.TokenMeta.SecretToken

    awsConfig, err := config.LoadDefaultConfig(context.TODO(),
        config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, s3Client.TokenMeta.SessionToken)),
    )
    if err != nil {
        return fmt.Errorf("error creating aws config: %v", err)
    }

    client := s3.NewFromConfig(awsConfig)
    uploader := manager.NewUploader(client, func(u *manager.Uploader) {
        u.PartSize = 5 * 1024 * 1024
        u.BufferProvider = manager.NewBufferedReadSeekerWriteToPool(10 * 1024 * 1024)
    })

    f, err := os.Open(fileToUpload)
    if err != nil {
        return fmt.Errorf("failed to open fileToUpload %q, %v", fileToUpload, err)
    }
    defer func(f *os.File) {
        err := f.Close()
        if err != nil {
            fmt.Errorf("error closing fileToUpload: %v", err)
        }
    }(f)

    inputObj := &s3.PutObjectInput{
        Bucket: aws.String(s3Client.TokenMeta.BucketName),
        Key:    aws.String(fileKey),
        Body:   f,
    }
    uploadResult, err := uploader.Upload(context.TODO(), inputObj)
    if err != nil {
        return fmt.Errorf("failed to uploadResult fileToUpload, %v", err)
    }

    fmt.Printf("%s uploaded to, %s\n", fileToUpload, uploadResult.Location)
    return nil
}
leoOrion
  • 1,833
  • 2
  • 26
  • 52
-2

I didn't try it but if i were you id try the multi part upload option .

you can read the doc multipartupload .

here is go example for multipart upload and multipart upload abort.

MIkCode
  • 2,655
  • 5
  • 28
  • 46
  • hm looks like I can only use ReaderSeeker for the body which I think implies that direct streaming isn't possible – machete Dec 09 '15 at 19:24