I have the following dockerfile with app code:
# Dockerfile
FROM public.ecr.aws/lambda/python:3.9
RUN yum update -y
RUN yum install -y \
Xvfb \
wget \
gtk3 \
dbus-glib \
libpci \
unzip \
gcc \
openssl-devel \
zlib-devel \
libffi-devel \
libgtk-3-0 \
alsa-lib-devel \
# I think needed for pandas
libxml2 \
libxml2-devel \
g++ \
yum -y clean all
RUN yum -y groupinstall development
WORKDIR /opt
RUN wget -O- "https://download.mozilla.org/?product=firefox-latest-ssl&os=linux64&lang=en-US" | tar -jx -C /usr/local/
# Borrowed from here: https://github.com/aws-samples/container-web-scraper-example/blob/master/code/Dockerfile
RUN ln -s /usr/local/firefox/firefox /usr/bin/firefox
RUN wget https://github.com/mozilla/geckodriver/releases/download/v0.31.0/geckodriver-v0.31.0-linux64.tar.gz
RUN tar -xf geckodriver-v0.31.0-linux64.tar.gz
RUN ls -lta
RUN rm geckodriver-v0.31.0-linux64.tar.gz
RUN chmod +x geckodriver
RUN export DISPLAY=:99
RUN Xvfb -ac -nolisten inet6 :99 &
WORKDIR /var/task
# Install selenium
COPY lambda_reqs.txt .
RUN pip3 install -r lambda_reqs.txt
# Copy lambda's main script
COPY app.py .
CMD ["app.lambda_handler"]
app.py
# app.py
import os
import boto3
from io import StringIO
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
import pandas as pd
executable_path = '/opt/geckodriver'
options = Options()
options.headless = True
options.add_argument("--no-sandbox")
options.add_argument("--single-process")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Firefox(options=options,
executable_path='/opt/geckodriver'
service_log_path=os.path.devnull,
)
def lambda_handler(event, context):
"""
Invoke AWS Lambda Function
:param event:
:param context:
:return:
"""
# More sample code than actual
driver.get("https://www.google.com/")
element_text = driver.page_source
When I go to try and run the container in AWS Lambda console I get the following:
[ERROR] TimeoutException: Message: Failed to read marionette port
Traceback (most recent call last):
File "/var/lang/lib/python3.9/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1030, in _gcd_import
File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 850, in exec_module
File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
File "/var/task/app.py", line 19, in <module>
driver = webdriver.Firefox(options=options, executable_path='/opt/geckodriver', service_log_path=os.path.devnull)
File "/var/lang/lib/python3.9/site-packages/selenium/webdriver/firefox/webdriver.py", line 177, in __init__
RemoteWebDriver.__init__(
File "/var/lang/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 275, in __init__
self.start_session(capabilities, browser_profile)
File "/var/lang/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 365, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/var/lang/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 430, in execute
self.error_handler.check_response(response)
File "/var/lang/lib/python3.9/site-packages/selenium/webdriver/remote/errorhandler.py", line 247, in check_response
raise exception_class(message, screen, stacktrace)
I've been googling around trying to find of ways to get around this or address the issue, but I'm a bit stumped so I figured I would just post and ask directly if there was a way to address the marionette port issue here.
Side Note: I've tried doing some of work use chrome and the chromedriver, but I get issues that are referenced here: ElementNotInteractableException: element not interactable: element has zero size appears since upgrade to chromedriver 83 and I can't seem to install chrome 8.1 via dockerfile
Any assistance would be appreciated. Thank you in advance.