Missing kids processing fix
This commit is contained in:
@@ -4,7 +4,7 @@ from django.core.cache import cache
|
||||
from django.db import IntegrityError
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
from .fetch_utils_url_processor import process_url, get_with_protocol
|
||||
from .fetch_utils_url_processor import process_url, get_with_protocol, url_host_slowdown
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
@@ -104,6 +104,8 @@ class DB_Handler():
|
||||
##########################################################################
|
||||
# URL pattern: missingkids.org/poster OR missingkids.org/new-poster
|
||||
if ("missingkids.org" in obj_url.url) and ("poster" in obj_url.url):
|
||||
# Sleep required? To avoid too many requests error (original URL, not paywall bypassing endpoint)
|
||||
url_host_slowdown(obj_url.url, url_host_slowdown_seconds=float(os.getenv("FETCHER_URL_HOST_SLEEP", 5)))
|
||||
# Request
|
||||
r = requests.get(obj_url.url, allow_redirects=True)
|
||||
|
||||
@@ -113,9 +115,9 @@ class DB_Handler():
|
||||
# Set duplicate, and insert new canonical form
|
||||
self._set_duplicate_and_insert_canonical(obj_url, url_canonical)
|
||||
elif (r.status_code == 200):
|
||||
self._set_status(self, obj_url, Urls.STATUS_ENUM.VALID)
|
||||
self._set_status(obj_url, Urls.STATUS_ENUM.VALID)
|
||||
elif (r.status_code == 404):
|
||||
self._set_status(self, obj_url, Urls.STATUS_ENUM.INVALID)
|
||||
self._set_status(obj_url, Urls.STATUS_ENUM.INVALID)
|
||||
else:
|
||||
logger.info("Unknown request status: {} for missing kids request: {}".format(r.status_code, obj_url.url))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user