2

I'm working on AWS Lambda with Go lang. I'm deploying Go lang code, use chromedp , with Docker image and got websocket URL timeout reached error. My lambda setting is with 3008 MB RAM memory, 512MB storage, and 15 minutes timeout. Can you find what is wrong and how to fix this? Here is file main.go and Dockerfile

File main.go (chromedp part)

func getPage(URL string, lineNum string, stationNm string) {
    // settings for crawling
    ctx, cancle := chromedp.NewContext(
        context.Background(),
        chromedp.WithLogf(log.Printf),
    )
    defer cancle()

    opts := []chromedp.ExecAllocatorOption{
        chromedp.DisableGPU,
        chromedp.NoSandbox,
        chromedp.Headless,
        chromedp.Flag("no-zygote", true),
        chromedp.Flag("single-process", true),
        chromedp.Flag("homedir", "/tmp"),
        chromedp.Flag("data-path", "/tmp/data-path"),
        chromedp.Flag("disk-cache-dir", "/tmp/cache-dir"),
        chromedp.Flag("remote-debugging-port", "9222"),
        chromedp.Flag("remote-debugging-address", "0.0.0.0"),
        chromedp.Flag("disable-dev-shm-usage", true),
    }

    allocCtx, cancel := chromedp.NewExecAllocator(ctx, opts...)
    defer cancel()

    ctx, cancel = chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
    defer cancel()

    var htmlContent string

    ch := chromedp.WaitNewTarget(ctx, func(i *target.Info) bool {
        return strings.Contains(i.URL, "/timetable/web/")
    })

}

File Dockerfile

FROM public.ecr.aws/lambda/provided:al2 AS build

ENV GO111MODULE=on \
    CGO_ENABLED=0 \
    GOOS=linux \
    GOARCH=amd64

# Get rid of the extension warning
RUN mkdir -p /opt/extensions
RUN yum -y install golang
RUN go env -w GOPROXY=direct

# Clone git, copying go.mod, go.sum, main.go
WORKDIR /var/task/
RUN yum install git -y
RUN git clone https://github.com/seedspirit/NaverCrawler-CICD-go.git
RUN cp NaverCrawler-CICD-go/main.go /var/task/
RUN cp NaverCrawler-CICD-go/go.mod /var/task/
RUN cp NaverCrawler-CICD-go/go.sum /var/task/

# cache dependencies
RUN go mod download
RUN go build -o main .

FROM public.ecr.aws/lambda/provided:al2
COPY --from=build /var/task/main /var/task/main

# Install Chrome dependencies
RUN curl https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm -o chrome.rpm && \
    yum install -y ./chrome.rpm && \
    yum install -y fontconfig libX11 GConf2 dbus-x11

ENTRYPOINT ["/var/task/main"]
Vy Do
  • 46,709
  • 59
  • 215
  • 313
kim
  • 35
  • 3

2 Answers2

1

It's recommended to use chromedp/headless-shell because it's small and more suitable for AWS Lambda.

I just tested a simple demo with chromedp/headless-shell, and it works.

Dockerfile:

FROM golang:1.20.4-alpine3.17 AS builder

WORKDIR /app

COPY go.mod go.sum ./
RUN go mod download

COPY . .

RUN go build -o main

FROM chromedp/headless-shell:113.0.5672.93

WORKDIR /app

COPY --from=builder /app/main .

ENTRYPOINT [ "./main" ]

main.go:

package main

import (
    "context"
    "encoding/json"
    "fmt"
    "log"
    "os"

    "github.com/aws/aws-lambda-go/lambda"
    "github.com/chromedp/chromedp"
)

func Handler(_ context.Context, _ json.RawMessage) error {
    opts := []chromedp.ExecAllocatorOption{
        chromedp.NoSandbox,
        chromedp.Flag("disable-setuid-sandbox", true),
        chromedp.Flag("disable-dev-shm-usage", true),
        chromedp.Flag("single-process", true),
        chromedp.Flag("no-zygote", true),
    }
    ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
    defer cancel()

    ctx, cancel = chromedp.NewContext(ctx, chromedp.WithDebugf(log.Printf))
    defer cancel()

    var content string
    if err := chromedp.Run(ctx, chromedp.Tasks{
        chromedp.Navigate("https://example.com/"),
        chromedp.Text("body > div > p:nth-child(2)", &content),
    }); err != nil {
        log.Fatal(err)
    }
    fmt.Println(content)
    return nil
}

func main() {
    if _, exists := os.LookupEnv("AWS_LAMBDA_RUNTIME_API"); exists {
        lambda.Start(Handler)
    } else {
        err := Handler(context.Background(), nil)
        if err != nil {
            log.Fatal(err)
        }
    }
}

This example is based on https://github.com/Andiedie/chromedp-aws-lambda-example. Please note that the chromedp.ExecAllocatorOptions listed is copied directly from that repository. It works, but I'm not sure whether this is the best list of options. Maybe you have to adjust them according to your needs.

Zeke Lu
  • 6,349
  • 1
  • 17
  • 23
0

For people come here! I Solved in this way

Dockerfile

FROM golang:1.20.4-alpine3.17 AS builder

ENV GO111MODULE=on \
    CGO_ENABLED=0 \
    GOOS=linux \
    GOARCH=amd64

WORKDIR /app

RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/*

COPY go.mod go.sum main.go ./
RUN go mod download

COPY . .

RUN go build -o main

FROM chromedp/headless-shell:113.0.5672.93

WORKDIR /app

COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt

COPY --from=builder /app/main .

ENTRYPOINT [ "./main" ]

function

func getPage(URL string, lineNum string, stationNm string) {
    // settings for crawling
    opts := append(chromedp.DefaultExecAllocatorOptions[:],
        chromedp.NoSandbox,
        chromedp.Flag("disable-setuid-sandbox", true),
        chromedp.Flag("disable-dev-shm-usage", true),
        chromedp.Flag("single-process", true),
        chromedp.Flag("no-zygote", true),
    )

    alloCtx, _ := chromedp.NewExecAllocator(context.Background(), opts...)

    ctx, cancel := chromedp.NewContext(alloCtx, chromedp.WithLogf(log.Printf))
    defer cancel()

    var htmlContent string

    ch := chromedp.WaitNewTarget(ctx, func(i *target.Info) bool {
        return strings.Contains(i.URL, "/timetable/web/")
    })

    
    err := chromedp.Run(ctx,
        chromedp.Navigate(URL),
        chromedp.WaitVisible(".end_footer_area"),
        chromedp.Click("button"),
    )
    checkErr(err)

    newContext, cancel := chromedp.NewContext(ctx, chromedp.WithTargetID(<-ch))
    defer cancel()
    if err := chromedp.Run(newContext,
        chromedp.WaitReady(".table_schedule", chromedp.ByQuery),
        chromedp.OuterHTML(".schedule_wrap", &htmlContent, chromedp.ByQuery),
    ); err != nil {
        panic(err)
    }

    crawler(htmlContent, lineNum, stationNm)
}
kim
  • 35
  • 3