Missing kids processing fix

This commit is contained in:
Luciano Gervasoni
2025-07-07 13:22:18 +02:00
parent 4c0dd70bc3
commit 15035c108d

View File

@@ -4,7 +4,7 @@ from django.core.cache import cache
from django.db import IntegrityError
from django.utils import timezone
from datetime import timedelta
from .fetch_utils_url_processor import process_url, get_with_protocol
from .fetch_utils_url_processor import process_url, get_with_protocol, url_host_slowdown
import re
import requests
import os
@@ -104,6 +104,8 @@ class DB_Handler():
##########################################################################
# URL pattern: missingkids.org/poster OR missingkids.org/new-poster
if ("missingkids.org" in obj_url.url) and ("poster" in obj_url.url):
# Sleep required? To avoid too many requests error (original URL, not paywall bypassing endpoint)
url_host_slowdown(obj_url.url, url_host_slowdown_seconds=float(os.getenv("FETCHER_URL_HOST_SLEEP", 5)))
# Request
r = requests.get(obj_url.url, allow_redirects=True)
@@ -113,9 +115,9 @@ class DB_Handler():
# Set duplicate, and insert new canonical form
self._set_duplicate_and_insert_canonical(obj_url, url_canonical)
elif (r.status_code == 200):
self._set_status(self, obj_url, Urls.STATUS_ENUM.VALID)
self._set_status(obj_url, Urls.STATUS_ENUM.VALID)
elif (r.status_code == 404):
self._set_status(self, obj_url, Urls.STATUS_ENUM.INVALID)
self._set_status(obj_url, Urls.STATUS_ENUM.INVALID)
else:
logger.info("Unknown request status: {} for missing kids request: {}".format(r.status_code, obj_url.url))