9

I have a CSV file with ~10k URLs I need to HTTP get. What is the simplest way to limit the concurrency of Go routines to no more than 16 at a time?

func getUrl(url string) {
    request := gorequest.New()
    resp, body, errs := request.Get(each[1]).End()
    _ = resp
    _ = body
    _ = errs
}

func main() {

    csvfile, err := os.Open("urls.csv")
    defer csvfile.Close()
    reader := csv.NewReader(csvfile)
    reader.FieldsPerRecord = -1 
    rawCSVdata, err := reader.ReadAll()

    completed := 0
    for _, each := range rawCSVdata {
        go getUrl(each[1])
        completed++
    }
}
ewhitt
  • 897
  • 1
  • 12
  • 18

1 Answers1

11

A producer-consumers pattern:

package main

import (
    "encoding/csv"
    "os"
    "sync"

    "github.com/parnurzeal/gorequest"
)

const workersCount = 16

func getUrlWorker(urlChan chan string) {
    for url := range urlChan {
        request := gorequest.New()
        resp, body, errs := request.Get(url).End()
        _ = resp
        _ = body
        _ = errs
    }
}

func main() {
    csvfile, err := os.Open("urls.csv")
    if err != nil {
        panic(err)
    }
    defer csvfile.Close()

    reader := csv.NewReader(csvfile)
    reader.FieldsPerRecord = -1
    rawCSVdata, err := reader.ReadAll()

    var wg sync.WaitGroup
    urlChan := make(chan string)

    wg.Add(workersCount)

    for i := 0; i < workersCount; i++ {
        go func() {
            getUrlWorker(urlChan)
            wg.Done()
        }()
    }

    completed := 0
    for _, each := range rawCSVdata {
        urlChan <- each[1]
        completed++
    }
    close(urlChan)

    wg.Wait()
}
HectorJ
  • 5,814
  • 3
  • 34
  • 52