1

I am currently attempting to create a cli app to make use of the box.com api

Idea being I could run a command like box cp ~/foo box:/foo and the app would then walk all of the subdirectories of ~/foo and create folders and upload / update files as needed. I got it all working, but the problem is it works too well.

I have set it up to make use of workers pulling from a jobqueue so I can make 6x api calls at a time. This works great for making lots and lots of small requests like recursively walking the directory structure and mapping folders to their equiv "BoxID". The problem is when it hits a patch of > 50mb files the bottleneck of waiting on the response of the post request goes away and I max out my bandwidth (which means no one else at my company can use the internet).

I would like to keep my existing multi-thread setup so I can crank through small requests but somehow limit the bandwidth of my api calls involving upload / download.

I tried a few suggestions from ChatGPT:

// import "github.com/juju/ratelimit"

func (bm *BoxModule) uploadChunkedFilePart(uploadSessionID string, path string, start int64, stop int64, total int64) {
    file, err := os.Open(path)
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    // Read the specific chunk from the file
    file.Seek(start, 0)
    bufferSize := int(stop - start + 1)
    if bufferSize < 0 {
        log.Fatalf("Invalid buffer size for file %s: start = %d, stop = %d", path, start, stop)
    }
    buffer := make([]byte, bufferSize)
    _, err = file.Read(buffer)
    if err != nil {
        log.Fatal(err)
    }

    // Calculate the sha1 for the part
    hasher := sha1.New()
    hasher.Write(buffer)
    sha := base64.StdEncoding.EncodeToString(hasher.Sum(nil))

    // Set the maximum number of retry attempts
    maxRetryAttempts := 3

    keyIndex := bm.ChunkedUploads[uploadSessionID].KeyIndex
    for attempt := 1; attempt <= maxRetryAttempts; attempt++ {
        // Create a rate-limited reader from the buffer with a limit of 1 MB/s
        bucket := ratelimit.NewBucketWithRate(1024*1024, int64(1024*1024)) // 1 MB/s
        limitedReader := ratelimit.Reader(bytes.NewReader(buffer), bucket)

        // Upload the part
        req, _ := http.NewRequest("PUT", "https://upload.box.com/api/2.0/files/upload_sessions/"+uploadSessionID, limitedReader)
        req.Header.Set("Authorization", "Bearer "+bm.getKey(keyIndex))
        req.Header.Set("Digest", "sha="+sha)
        req.Header.Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", start, stop, total))
        req.Header.Set("Content-Type", "application/octet-stream")

        resp, err := bm.client.Do(req)
        if err != nil {
            log.Fatal(err)
        }
        defer resp.Body.Close()

        bodyBytes, _ := ioutil.ReadAll(resp.Body)
        bodyString := string(bodyBytes)

        // ...remaining code...
    }
}

But after implementing it and testing I still seem to be pushing at 1.65gb/s instead of my desired 6mb/s and it has been requested that my backup job not consume the companies entire bandwidth for the duration of the 3.5 TB upload.

What what am I doing wrong in my implementation?

CaffeineAddiction
  • 803
  • 1
  • 14
  • 29

0 Answers0