Selenium webdriver arm64
This commit is contained in:
@@ -1,12 +1,57 @@
|
|||||||
|
|
||||||
FROM python:3.12
|
FROM python:3.12
|
||||||
|
|
||||||
RUN apt update && apt install -y --no-install-recommends chromium chromium-driver curl
|
# Architecture
|
||||||
RUN apt autoclean && rm -rf /var/lib/apt/lists/*
|
#ARG ARCH_G=linux64
|
||||||
|
#ARG ARCH_F=linux-x86_64
|
||||||
|
ARG ARCH_G=linux-aarch64
|
||||||
|
ARG ARCH_F=linux-aarch64
|
||||||
|
|
||||||
|
ARG firefox_ver=137.0
|
||||||
|
ARG geckodriver_ver=0.36.0
|
||||||
|
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get upgrade -y \
|
||||||
|
&& apt-get install -y --no-install-recommends --no-install-suggests \
|
||||||
|
ca-certificates \
|
||||||
|
&& update-ca-certificates \
|
||||||
|
\
|
||||||
|
# Install tools for building
|
||||||
|
&& toolDeps=" \
|
||||||
|
curl xz-utils \
|
||||||
|
" \
|
||||||
|
&& apt-get install -y --no-install-recommends --no-install-suggests $toolDeps \
|
||||||
|
\
|
||||||
|
# Install dependencies for Firefox
|
||||||
|
&& apt-get install -y --no-install-recommends --no-install-suggests libgl1 libpci3 \
|
||||||
|
`apt-cache depends firefox-esr | awk '/Depends:/{print$2}'` \
|
||||||
|
\
|
||||||
|
# Download and install Firefox
|
||||||
|
&& curl -fL -o /tmp/firefox.tar.xz \
|
||||||
|
https://ftp.mozilla.org/pub/firefox/releases/${firefox_ver}/${ARCH_F}/en-GB/firefox-${firefox_ver}.tar.xz \
|
||||||
|
&& tar -xJf /tmp/firefox.tar.xz -C /tmp/ \
|
||||||
|
&& mv /tmp/firefox /opt/firefox \
|
||||||
|
\
|
||||||
|
# Download and install geckodriver
|
||||||
|
&& curl -fL -o /tmp/geckodriver.tar.gz \
|
||||||
|
https://github.com/mozilla/geckodriver/releases/download/v${geckodriver_ver}/geckodriver-v${geckodriver_ver}-${ARCH_G}.tar.gz \
|
||||||
|
&& tar -xzf /tmp/geckodriver.tar.gz -C /tmp/ \
|
||||||
|
&& chmod +x /tmp/geckodriver \
|
||||||
|
&& mv /tmp/geckodriver /usr/local/bin/ \
|
||||||
|
\
|
||||||
|
# Cleanup unnecessary stuff
|
||||||
|
&& apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false $toolDeps \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /tmp/*
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]"
|
||||||
|
|
||||||
WORKDIR /opt/app
|
WORKDIR /opt/app
|
||||||
RUN pip install --no-cache-dir selenium fastapi "uvicorn[standard]"
|
|
||||||
COPY . /opt/app/
|
COPY . /opt/app/
|
||||||
|
|
||||||
|
# As this image cannot run in non-headless mode anyway, it's better to forcibly enable it, regardless whether WebDriver client requests it in capabilities or not.
|
||||||
|
ENV MOZ_HEADLESS=1
|
||||||
|
|
||||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
|
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
|
||||||
|
|
||||||
# docker build -f Dockerfile -t selenium_app .
|
# docker build -f Dockerfile -t selenium_app .
|
||||||
|
|||||||
@@ -1,12 +1,25 @@
|
|||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from utils import get_chrome_options
|
from selenium.webdriver.firefox.options import Options
|
||||||
|
from selenium.webdriver.firefox.service import Service
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from logger import get_logger
|
from logger import get_logger
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def get_webdriver():
|
||||||
|
options = Options()
|
||||||
|
options.add_argument('--headless') # Optional
|
||||||
|
options.binary_location = '/opt/firefox/firefox'
|
||||||
|
|
||||||
|
service = Service('/usr/local/bin/geckodriver')
|
||||||
|
|
||||||
|
driver = webdriver.Firefox(options=options, service=service)
|
||||||
|
return driver
|
||||||
|
|
||||||
|
|
||||||
class MissingKidsFetcher():
|
class MissingKidsFetcher():
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
pass
|
pass
|
||||||
@@ -18,10 +31,8 @@ class MissingKidsFetcher():
|
|||||||
# URLs
|
# URLs
|
||||||
set_urls = set()
|
set_urls = set()
|
||||||
|
|
||||||
# Initialize
|
|
||||||
driver = webdriver.Chrome(options=get_chrome_options())
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
driver = get_webdriver()
|
||||||
# Go to URL
|
# Go to URL
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
# Iterate
|
# Iterate
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
from selenium.webdriver.chrome.options import Options
|
|
||||||
|
|
||||||
def get_chrome_options():
|
|
||||||
"""Sets chrome options for Selenium.
|
|
||||||
Chrome options for headless browser is enabled.
|
|
||||||
"""
|
|
||||||
chrome_options = Options()
|
|
||||||
chrome_options.add_argument("--headless")
|
|
||||||
chrome_options.add_argument("--no-sandbox")
|
|
||||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
|
||||||
chrome_prefs = {}
|
|
||||||
chrome_options.experimental_options["prefs"] = chrome_prefs
|
|
||||||
chrome_prefs["profile.default_content_settings"] = {"images": 2}
|
|
||||||
return chrome_options
|
|
||||||
Reference in New Issue
Block a user