41

I currently upload single objects to S3 using like so:

var options = {
        Bucket: bucket,
        Key: s3Path,
        Body: body,
        ACL: s3FilePermissions
};

S3.putObject(options,
function (err, data) {
    //console.log(data);
});

But when I have a large resources folder for example, I use the AWS CLI tool.
I was wondering, is there a native way to do the same thing with the aws sdk (upload entire folders to s3)?

LifeQuery
  • 3,202
  • 1
  • 26
  • 35

8 Answers8

36

Old-school recursive way I whipped up in a hurry. Only uses core node modules and standard AWS sdk.

var AWS = require('aws-sdk');
var path = require("path");
var fs = require('fs');

const uploadDir = function(s3Path, bucketName) {

    let s3 = new AWS.S3();

    function walkSync(currentDirPath, callback) {
        fs.readdirSync(currentDirPath).forEach(function (name) {
            var filePath = path.join(currentDirPath, name);
            var stat = fs.statSync(filePath);
            if (stat.isFile()) {
                callback(filePath, stat);
            } else if (stat.isDirectory()) {
                walkSync(filePath, callback);
            }
        });
    }

    walkSync(s3Path, function(filePath, stat) {
        let bucketPath = filePath.substring(s3Path.length+1);
        let params = {Bucket: bucketName, Key: bucketPath, Body: fs.readFileSync(filePath) };
        s3.putObject(params, function(err, data) {
            if (err) {
                console.log(err)
            } else {
                console.log('Successfully uploaded '+ bucketPath +' to ' + bucketName);
            }
        });

    });
};

uploadDir("path to your folder", "your bucket name");

Special thanks to Ali from this post with helping get the filenames

Jim Chertkov
  • 1,199
  • 12
  • 20
17

async/await + Typescript

If you need a solution that uses modern JavaScript syntax and is compatible with TypeScript, I came up with the following code. The recursive getFiles is borrowed from this answer (After all that years, recursion still gives me headache, lol).

import { promises as fs, createReadStream } from 'fs';
import * as path from 'path';
import { S3 } from 'aws-sdk';

async function uploadDir(s3Path: string, bucketName: string) {
  const s3 = new S3();

  // Recursive getFiles from
  // https://stackoverflow.com/a/45130990/831465
  async function getFiles(dir: string): Promise<string | string[]> {
    const dirents = await fs.readdir(dir, { withFileTypes: true });
    const files = await Promise.all(
      dirents.map((dirent) => {
        const res = path.resolve(dir, dirent.name);
        return dirent.isDirectory() ? getFiles(res) : res;
      })
    );
    return Array.prototype.concat(...files);
  }

  const files = (await getFiles(s3Path)) as string[];
  const uploads = files.map((filePath) =>
    s3
      .putObject({
        Key: path.relative(s3Path, filePath),
        Bucket: bucketName,
        Body: createReadStream(filePath),
      })
      .promise()
  );
  return Promise.all(uploads);
}

await uploadDir(path.resolve('./my-path'), 'bucketname');
ofhouse
  • 3,047
  • 1
  • 36
  • 42
  • good idea. before upvoting, i have noticed that all files are uploaded to the root folder, flat, and each file in S3 contains the path (as string). is this on purpose? – OhadR Nov 11 '21 at 19:05
  • for windows, add something like: ```const key = path.relative(s3Path, filePath); const posixKey = key.split(path.sep).join(path.posix.sep);``` – OhadR Nov 11 '21 at 19:43
  • 1
    @OhadR Amazon S3 is a flat storage system that does not actually use folders. See this reply for more infos: https://stackoverflow.com/a/51218935/9802294 – spartanz51 May 18 '22 at 19:09
9

here is a cleaned up/debugged/working version of @Jim's solution

    function uploadArtifactsToS3() {
  const artifactFolder = `logs/${config.log}/test-results`;
  const testResultsPath = './test-results';

  const walkSync = (currentDirPath, callback) => {
    fs.readdirSync(currentDirPath).forEach((name) => {
      const filePath = path.join(currentDirPath, name);
      const stat = fs.statSync(filePath);
      if (stat.isFile()) {
        callback(filePath, stat);
      } else if (stat.isDirectory()) {
        walkSync(filePath, callback);
      }
    });
  };

  walkSync(testResultsPath, async (filePath) => {
    let bucketPath = filePath.substring(testResultsPath.length - 1);
    let params = {
      Bucket: process.env.SOURCE_BUCKET,
      Key: `${artifactFolder}/${bucketPath}`,
      Body: fs.readFileSync(filePath)
    };
    try {
      await s3.putObject(params).promise();
      console.log(`Successfully uploaded ${bucketPath} to s3 bucket`);
    } catch (error) {
      console.error(`error in uploading ${bucketPath} to s3 bucket`);
      throw new Error(`error in uploading ${bucketPath} to s3 bucket`);
    }
  });
}
PersianIronwood
  • 639
  • 8
  • 19
  • Careful - this breaks when there are a huge number of folders - Error processing event: Error: EMFILE: too many open files, scandir '/tmp/diaas-uiGPwkBx/standard-launcher' – Martin Bartlett Jan 31 '20 at 13:02
5

I was just contemplating this problem the other day, and was thinking something like this:

...    
var async = require('async'),
    fs = require('fs'),
    path = require("path");

var directoryName = './test',
    directoryPath = path.resolve(directoryName);

var files = fs.readdirSync(directoryPath);
async.map(files, function (f, cb) {
    var filePath = path.join(directoryPath, f);

    var options = {
        Bucket: bucket,
        Key: s3Path,
        Body: fs.readFileSync(filePath),
        ACL: s3FilePermissions
    };

    S3.putObject(options, cb);

}, function (err, results) {
    if (err) console.error(err);
    console.log(results);
});
Ryan Kelley
  • 114
  • 3
  • 4
    `fs.readFileSync(filePath)` this line returns me `Error: EISDIR: illegal operation on a directory, read` like here: http://stackoverflow.com/questions/25883775/node-js-readfilesync-function – Barlas Apaydin May 25 '16 at 12:49
  • 1
    @BarlasApaydin That's because readFileSync is getting a directory; not a file, to read. – Omar Dulaimi May 26 '21 at 16:12
5

Here's a version that contains a Promise on the upload method. This version allows you to perform an action when all uploads are complete Promise.all().then...

const path = require('path');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();

const directoryToUpload = 'directory-name-here';
const bucketName = 'name-of-s3-bucket-here';

// get file paths
const filePaths = [];
const getFilePaths = (dir) => {
  fs.readdirSync(dir).forEach(function (name) {
    const filePath = path.join(dir, name);
    const stat = fs.statSync(filePath);
    if (stat.isFile()) {
      filePaths.push(filePath);
    } else if (stat.isDirectory()) {
      getFilePaths(filePath);
    }
  });
};
getFilePaths(directoryToUpload);

// upload to S3
const uploadToS3 = (dir, path) => {
  return new Promise((resolve, reject) => {
    const key = path.split(`${dir}/`)[1];
    const params = {
      Bucket: bucketName,
      Key: key,
      Body: fs.readFileSync(path),
    };
    s3.putObject(params, (err) => {
      if (err) {
        reject(err);
      } else {
        console.log(`uploaded ${params.Key} to ${params.Bucket}`);
        resolve(path);
      }
    });
  });
};

const uploadPromises = filePaths.map((path) =>
  uploadToS3(directoryToUpload, path)
);
Promise.all(uploadPromises)
  .then((result) => {
    console.log('uploads complete');
    console.log(result);
  })
  .catch((err) => console.error(err));
  • 1
    Ha, I came back to this post years later looking to see if anyone had updated the answers with a clean async version. @ofhouse's answer looks legit, but this is easier to follow. Thanks for taking the time to post this! – Jim Chertkov Jan 28 '21 at 21:48
3

You might try the node-s3-client.

UPDATE: Available on npm here

From the sync a directory to s3 docs:

UPDATE: Added client inialization code.

var client = s3.createClient({
    maxAsyncS3: 20,     // this is the default
    s3RetryCount: 3,    // this is the default
    s3RetryDelay: 1000, // this is the default
    multipartUploadThreshold: 20971520, // this is the default (20 MB)
    multipartUploadSize: 15728640, // this is the default (15 MB)
    s3Options: {
      accessKeyId: "YOUR ACCESS KEY",
      secretAccessKey: "YOUR SECRET ACCESS KEY"
    }
  });

var params = {
  localDir: "some/local/dir",
  deleteRemoved: true, // default false, whether to remove s3 objects
                       // that have no corresponding local file.

  s3Params: {
    Bucket: "s3 bucket name",
    Prefix: "some/remote/dir/",
    // other options supported by putObject, except Body and ContentLength.
    // See: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#putObject-property
  },
};
var uploader = client.uploadDir(params);
uploader.on('error', function(err) {
  console.error("unable to sync:", err.stack);
});
uploader.on('progress', function() {
  console.log("progress", uploader.progressAmount, uploader.progressTotal);
});
uploader.on('end', function() {
  console.log("done uploading");
});
unboundev
  • 357
  • 1
  • 11
  • 3
    This doesn't work. It just hangs on progress. No error, no progress. – Hitesh Joshi May 26 '15 at 07:42
  • Are you sure the client is initialized properly? I've edited the answer to include client initialization. – unboundev May 26 '15 at 20:30
  • 1
    Yes. Else it will show error. SO no error, client is initialized properly but sync won't happen. – Hitesh Joshi May 28 '15 at 06:43
  • 5
    This library doesn't seem to currently support uploads with files in the directory that are larger than 1MB. Until that's supported, this isn't really a great solution for a generic directory sync to S3. Other than that deal-breaking issue, it was a pretty nice library to use. – dsw88 Jul 19 '17 at 19:19
  • 2
    When using this library, please note that this library has not been updated for many years! There are a lot of unrepaired bugs, and I have wasted 5 hours on this library. Someone has re-issued a fixed version: https://www.npmjs.com/package/@auth0/s3 – Black-Hole Jun 19 '19 at 11:59
  • And now the above package has also not been updated in years. – ParkerD Oct 01 '21 at 18:07
1

This works for me (you'll need to add walkSync package):

    async function asyncForEach(array, callback) {
      for (let index = 0; index < array.length; index++) {
        await callback(array[index], index, array);
      }
    }

    const syncS3Directory = async (s3Path, endpoint) => {

    await asyncForEach(walkSync(s3Path, {directories: false}), async (file) => {
    const filePath = Path.join(s3Path, file);
    const fileContent = fs.readFileSync(filePath);
    const params = {
      Bucket: endpoint,
      Key: file,
      Body: fileContent,
      ContentType: "text/html",
    };
    let s3Upload = await s3.upload(params).promise();
    s3Upload ? undefined : Logger.error("Error synchronizing the bucket");
  });

      console.log("S3 bucket synchronized!");

};
0
const AWS = require("aws-sdk");
const fs = require("fs");
const path = require("path");
const async = require("async");
const readdir = require("recursive-readdir");

// AWS CRED
const ID = "<accessKeyId>";
const SECRET = "<secretAccessKey>";

const rootFolder = path.resolve(__dirname, "../");
const uploadFolder = "./sources";

// The name of the bucket that you have created
const BUCKET_NAME = "<Bucket_Name>";

const s3 = new AWS.S3({
  accessKeyId: ID,
  secretAccessKey: SECRET
});

function getFiles(dirPath) {
  return fs.existsSync(dirPath) ? readdir(dirPath) : [];
}

async function uploadToS3(uploadPath) {
  const filesToUpload = await getFiles(path.resolve(rootFolder, uploadPath));

  console.log(filesToUpload);
  return new Promise((resolve, reject) => {
    async.eachOfLimit(
      filesToUpload,
      10,
      async.asyncify(async file => {
        const Key = file.replace(`${rootFolder}/`, "");
        console.log(`uploading: [${Key}]`);
        return new Promise((res, rej) => {
          s3.upload(
            {
              Key,
              Bucket: BUCKET_NAME,
              Body: fs.readFileSync(file)
            },
            err => {
              if (err) {
                return rej(new Error(err));
              }
              res({ result: true });
            }
          );
        });
      }),
      err => {
        if (err) {
          return reject(new Error(err));
        }
        resolve({ result: true });
      }
    );
  });
}

uploadToS3(uploadFolder)
  .then(() => {
    console.log("upload complete!");
    process.exit(0);
  })
  .catch(err => {
    console.error(err.message);
    process.exit(1);
  });
Chanakya Vadla
  • 3,019
  • 2
  • 22
  • 24