I have been trying to access StackOverflow with the amount of 30 requests / second but it not working. It has been blocked after a few seconds. Although the document of StackOverflow said the max rate limit of StackExchange is 30 req /s.
The libraries i used to access is gocolly Here is my code:
package main
import (
"fmt"
"log"
"strconv"
"time"
"github.com/gocolly/colly"
"github.com/gocolly/colly/debug"
)
func finish() {
fmt.Println("Finish")
}
func main() {
c := colly.NewCollector(
colly.AllowedDomains("stackoverflow.com"),
colly.MaxDepth(1),
colly.Async(true),
colly.Debugger(&debug.LogDebugger{}),
)
c.Limit(&colly.LimitRule{DomainGlob: "*stackoverflow.*", Parallelism: 10, Delay: 1 * time.Second})
c.OnRequest(func(r *colly.Request) {
r.Headers.Set("User-Agent", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
})
c.OnError(func(_ *colly.Response, err error) {
log.Println("Something went wrong:", err)
})
c.OnResponse(func(r *colly.Response) {
fmt.Println("Visited", r.Request.URL)
})
c.OnHTML("#questions", func(e *colly.HTMLElement) {
e.ForEach(".s-post-summary.js-post-summary", func(i int, el *colly.HTMLElement) {
link := el.ChildAttr("a[href]", "href")
e.Request.Visit("https://stackoverflow.com" + link)
})
})
for i := 0; i <= 1000; i++ {
var link = "https://stackoverflow.com/questions?tab=votes&page=" + strconv.Itoa(i)
c.Visit(link)
c.Wait()
}
finish()
}
I hope someone can help me.