Selenium kill process to release mem, supervisor conf rotate log file

This commit is contained in:
Luciano Gervasoni
2025-07-28 11:16:15 +02:00
parent 54e41139bb
commit 1502f09e22
4 changed files with 49 additions and 8 deletions

View File

@@ -1,4 +1,4 @@
from utils import get_webdriver
from utils import get_webdriver, kill_process_tree
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@@ -25,7 +25,7 @@ class MissingKidsFetcher():
try:
# Initialize
logger.debug("Initializing driver")
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
logger.debug("Get URL: {}".format(url))
driver.get(url)
@@ -75,6 +75,7 @@ class MissingKidsFetcher():
# atexit.register(driver.quit) # Will always be called on exit
except Exception as e:
logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
kill_process_tree(service.process.pid)
logger.info("Results: {} for URL: {}".format(str(results), url))
return results
@@ -88,7 +89,7 @@ class MissingKidsFetcher():
try:
logger.debug("Initializing driver")
driver = get_webdriver()
driver, service = get_webdriver()
logger.debug("Get URL: {}".format(url))
# Go to URL
driver.get(url)
@@ -153,5 +154,6 @@ class MissingKidsFetcher():
# atexit.register(driver.quit) # Will always be called on exit
except Exception as e:
logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
kill_process_tree(service.process.pid)
return set_urls

View File

@@ -1,4 +1,4 @@
from utils import get_webdriver
from utils import get_webdriver, kill_process_tree
from selenium.webdriver.common.by import By
from urllib.parse import quote
import time
@@ -34,7 +34,7 @@ class SearchFetcher():
url = quote(url_unquoted, safe=":/?=&#")
# Initialize
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
driver.get(url)
time.sleep(2)
@@ -51,6 +51,9 @@ class SearchFetcher():
# Filter by URL host
urls = [u for u in urls if url_host in u]
driver.quit()
kill_process_tree(service.process.pid)
return urls
def _search_breitbart(self, search):
@@ -60,7 +63,7 @@ class SearchFetcher():
url = quote(url_unquoted, safe=":/?=&#")
# Initialize
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
driver.get(url)
time.sleep(4)
@@ -77,6 +80,9 @@ class SearchFetcher():
# Filter by URL host
urls = [u for u in urls if url_host in u]
driver.quit()
kill_process_tree(service.process.pid)
return urls
def _search_zerohedge(self, search):
@@ -86,7 +92,7 @@ class SearchFetcher():
url = quote(url_unquoted, safe=":/?=&#")
# Initialize
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
driver.get(url)
time.sleep(2)
@@ -103,4 +109,7 @@ class SearchFetcher():
# Filter by URL host
urls = [u for u in urls if url_host in u]
driver.quit()
kill_process_tree(service.process.pid)
return urls

View File

@@ -1,6 +1,7 @@
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
import psutil
def get_webdriver():
options = Options()
@@ -10,4 +11,13 @@ def get_webdriver():
service = Service('/usr/local/bin/geckodriver')
driver = webdriver.Firefox(options=options, service=service)
return driver
return driver, service
def kill_process_tree(pid):
try:
parent = psutil.Process(pid)
for child in parent.children(recursive=True):
child.kill()
parent.kill()
except psutil.NoSuchProcess:
pass

View File

@@ -6,29 +6,49 @@ command=gunicorn core.wsgi:application --bind 0.0.0.0:8000
directory=/opt/app
autostart=true
autorestart=true
; Unified log file
stdout_logfile=/opt/logs/server.log
stderr_logfile=/opt/logs/server.log
redirect_stderr=true
; Rotate when file reaches max size
stdout_logfile_maxbytes=20MB
stdout_logfile_backups=1
[program:beat]
command=celery -A core beat -l info --logfile=/opt/logs/beat.log
directory=/opt/app
autostart=true
autorestart=true
; Unified log file
stdout_logfile=/opt/logs/beat.log
stderr_logfile=/opt/logs/beat.log
redirect_stderr=true
; Rotate when file reaches max size
stdout_logfile_maxbytes=20MB
stdout_logfile_backups=1
[program:worker_default]
command=celery -A core worker -l info --logfile=/opt/logs/worker_default.log --concurrency=1 -Q default -n default
directory=/opt/app
autostart=true
autorestart=true
; Unified log file
stdout_logfile=/opt/logs/worker_default.log
stderr_logfile=/opt/logs/worker_default.log
redirect_stderr=true
; Rotate when file reaches max size
stdout_logfile_maxbytes=20MB
stdout_logfile_backups=1
[program:worker_low]
command=celery -A core worker -l info --logfile=/opt/logs/worker_low.log --concurrency=1 -Q low -n low
directory=/opt/app
autostart=true
autorestart=true
; Unified log file
stdout_logfile=/opt/logs/worker_low.log
stderr_logfile=/opt/logs/worker_low.log
redirect_stderr=true
; Rotate when file reaches max size
stdout_logfile_maxbytes=20MB
stdout_logfile_backups=1