From 3867538826368815567e3f2bb67ac966be028edc Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Mon, 7 Apr 2025 12:28:11 +0200 Subject: [PATCH] Selenium webdriver arm64 --- app_selenium/Dockerfile | 51 +++++++++++++++++++++++++++++++++--- app_selenium/missing_kids.py | 19 +++++++++++--- app_selenium/utils.py | 14 ---------- 3 files changed, 63 insertions(+), 21 deletions(-) delete mode 100644 app_selenium/utils.py diff --git a/app_selenium/Dockerfile b/app_selenium/Dockerfile index 55af744..741ebe6 100644 --- a/app_selenium/Dockerfile +++ b/app_selenium/Dockerfile @@ -1,12 +1,57 @@ + FROM python:3.12 -RUN apt update && apt install -y --no-install-recommends chromium chromium-driver curl -RUN apt autoclean && rm -rf /var/lib/apt/lists/* +# Architecture +#ARG ARCH_G=linux64 +#ARG ARCH_F=linux-x86_64 +ARG ARCH_G=linux-aarch64 +ARG ARCH_F=linux-aarch64 + +ARG firefox_ver=137.0 +ARG geckodriver_ver=0.36.0 + + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends --no-install-suggests \ + ca-certificates \ + && update-ca-certificates \ + \ + # Install tools for building + && toolDeps=" \ + curl xz-utils \ + " \ + && apt-get install -y --no-install-recommends --no-install-suggests $toolDeps \ + \ + # Install dependencies for Firefox + && apt-get install -y --no-install-recommends --no-install-suggests libgl1 libpci3 \ + `apt-cache depends firefox-esr | awk '/Depends:/{print$2}'` \ + \ + # Download and install Firefox + && curl -fL -o /tmp/firefox.tar.xz \ + https://ftp.mozilla.org/pub/firefox/releases/${firefox_ver}/${ARCH_F}/en-GB/firefox-${firefox_ver}.tar.xz \ + && tar -xJf /tmp/firefox.tar.xz -C /tmp/ \ + && mv /tmp/firefox /opt/firefox \ + \ + # Download and install geckodriver + && curl -fL -o /tmp/geckodriver.tar.gz \ + https://github.com/mozilla/geckodriver/releases/download/v${geckodriver_ver}/geckodriver-v${geckodriver_ver}-${ARCH_G}.tar.gz \ + && tar -xzf /tmp/geckodriver.tar.gz -C /tmp/ \ + && chmod +x /tmp/geckodriver \ + && mv /tmp/geckodriver /usr/local/bin/ \ + \ + # Cleanup unnecessary stuff + && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false $toolDeps \ + && rm -rf /var/lib/apt/lists/* /tmp/* + +RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]" WORKDIR /opt/app -RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]" COPY . /opt/app/ +# As this image cannot run in non-headless mode anyway, it's better to forcibly enable it, regardless whether WebDriver client requests it in capabilities or not. +ENV MOZ_HEADLESS=1 + CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] # docker build -f Dockerfile -t selenium_app . diff --git a/app_selenium/missing_kids.py b/app_selenium/missing_kids.py index c94084d..6774b53 100644 --- a/app_selenium/missing_kids.py +++ b/app_selenium/missing_kids.py @@ -1,12 +1,25 @@ from selenium import webdriver from selenium.webdriver.common.by import By -from utils import get_chrome_options +from selenium.webdriver.firefox.options import Options +from selenium.webdriver.firefox.service import Service import time import os from logger import get_logger logger = get_logger() + +def get_webdriver(): + options = Options() + options.add_argument('--headless') # Optional + options.binary_location = '/opt/firefox/firefox' + + service = Service('/usr/local/bin/geckodriver') + + driver = webdriver.Firefox(options=options, service=service) + return driver + + class MissingKidsFetcher(): def __init__(self) -> None: pass @@ -18,10 +31,8 @@ class MissingKidsFetcher(): # URLs set_urls = set() - # Initialize - driver = webdriver.Chrome(options=get_chrome_options()) - try: + driver = get_webdriver() # Go to URL driver.get(url) # Iterate diff --git a/app_selenium/utils.py b/app_selenium/utils.py deleted file mode 100644 index 062c720..0000000 --- a/app_selenium/utils.py +++ /dev/null @@ -1,14 +0,0 @@ -from selenium.webdriver.chrome.options import Options - -def get_chrome_options(): - """Sets chrome options for Selenium. - Chrome options for headless browser is enabled. - """ - chrome_options = Options() - chrome_options.add_argument("--headless") - chrome_options.add_argument("--no-sandbox") - chrome_options.add_argument("--disable-dev-shm-usage") - chrome_prefs = {} - chrome_options.experimental_options["prefs"] = chrome_prefs - chrome_prefs["profile.default_content_settings"] = {"images": 2} - return chrome_options