Selenium webdriver arm64

This commit is contained in:
Luciano Gervasoni
2025-04-07 12:28:11 +02:00
parent 7acdf6bc77
commit 3867538826
3 changed files with 63 additions and 21 deletions

View File

@@ -1,12 +1,57 @@
FROM python:3.12 FROM python:3.12
RUN apt update && apt install -y --no-install-recommends chromium chromium-driver curl # Architecture
RUN apt autoclean && rm -rf /var/lib/apt/lists/* #ARG ARCH_G=linux64
#ARG ARCH_F=linux-x86_64
ARG ARCH_G=linux-aarch64
ARG ARCH_F=linux-aarch64
ARG firefox_ver=137.0
ARG geckodriver_ver=0.36.0
RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends --no-install-suggests \
ca-certificates \
&& update-ca-certificates \
\
# Install tools for building
&& toolDeps=" \
curl xz-utils \
" \
&& apt-get install -y --no-install-recommends --no-install-suggests $toolDeps \
\
# Install dependencies for Firefox
&& apt-get install -y --no-install-recommends --no-install-suggests libgl1 libpci3 \
`apt-cache depends firefox-esr | awk '/Depends:/{print$2}'` \
\
# Download and install Firefox
&& curl -fL -o /tmp/firefox.tar.xz \
https://ftp.mozilla.org/pub/firefox/releases/${firefox_ver}/${ARCH_F}/en-GB/firefox-${firefox_ver}.tar.xz \
&& tar -xJf /tmp/firefox.tar.xz -C /tmp/ \
&& mv /tmp/firefox /opt/firefox \
\
# Download and install geckodriver
&& curl -fL -o /tmp/geckodriver.tar.gz \
https://github.com/mozilla/geckodriver/releases/download/v${geckodriver_ver}/geckodriver-v${geckodriver_ver}-${ARCH_G}.tar.gz \
&& tar -xzf /tmp/geckodriver.tar.gz -C /tmp/ \
&& chmod +x /tmp/geckodriver \
&& mv /tmp/geckodriver /usr/local/bin/ \
\
# Cleanup unnecessary stuff
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false $toolDeps \
&& rm -rf /var/lib/apt/lists/* /tmp/*
RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]"
WORKDIR /opt/app WORKDIR /opt/app
RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]"
COPY . /opt/app/ COPY . /opt/app/
# As this image cannot run in non-headless mode anyway, it's better to forcibly enable it, regardless whether WebDriver client requests it in capabilities or not.
ENV MOZ_HEADLESS=1
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
# docker build -f Dockerfile -t selenium_app . # docker build -f Dockerfile -t selenium_app .

View File

@@ -1,12 +1,25 @@
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from utils import get_chrome_options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
import time import time
import os import os
from logger import get_logger from logger import get_logger
logger = get_logger() logger = get_logger()
def get_webdriver():
options = Options()
options.add_argument('--headless') # Optional
options.binary_location = '/opt/firefox/firefox'
service = Service('/usr/local/bin/geckodriver')
driver = webdriver.Firefox(options=options, service=service)
return driver
class MissingKidsFetcher(): class MissingKidsFetcher():
def __init__(self) -> None: def __init__(self) -> None:
pass pass
@@ -18,10 +31,8 @@ class MissingKidsFetcher():
# URLs # URLs
set_urls = set() set_urls = set()
# Initialize
driver = webdriver.Chrome(options=get_chrome_options())
try: try:
driver = get_webdriver()
# Go to URL # Go to URL
driver.get(url) driver.get(url)
# Iterate # Iterate

View File

@@ -1,14 +0,0 @@
from selenium.webdriver.chrome.options import Options
def get_chrome_options():
"""Sets chrome options for Selenium.
Chrome options for headless browser is enabled.
"""
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_prefs = {}
chrome_options.experimental_options["prefs"] = chrome_prefs
chrome_prefs["profile.default_content_settings"] = {"images": 2}
return chrome_options