Missing kids processing fix
This commit is contained in:
@@ -4,7 +4,7 @@ from django.core.cache import cache
|
|||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from .fetch_utils_url_processor import process_url, get_with_protocol
|
from .fetch_utils_url_processor import process_url, get_with_protocol, url_host_slowdown
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
@@ -104,6 +104,8 @@ class DB_Handler():
|
|||||||
##########################################################################
|
##########################################################################
|
||||||
# URL pattern: missingkids.org/poster OR missingkids.org/new-poster
|
# URL pattern: missingkids.org/poster OR missingkids.org/new-poster
|
||||||
if ("missingkids.org" in obj_url.url) and ("poster" in obj_url.url):
|
if ("missingkids.org" in obj_url.url) and ("poster" in obj_url.url):
|
||||||
|
# Sleep required? To avoid too many requests error (original URL, not paywall bypassing endpoint)
|
||||||
|
url_host_slowdown(obj_url.url, url_host_slowdown_seconds=float(os.getenv("FETCHER_URL_HOST_SLEEP", 5)))
|
||||||
# Request
|
# Request
|
||||||
r = requests.get(obj_url.url, allow_redirects=True)
|
r = requests.get(obj_url.url, allow_redirects=True)
|
||||||
|
|
||||||
@@ -113,9 +115,9 @@ class DB_Handler():
|
|||||||
# Set duplicate, and insert new canonical form
|
# Set duplicate, and insert new canonical form
|
||||||
self._set_duplicate_and_insert_canonical(obj_url, url_canonical)
|
self._set_duplicate_and_insert_canonical(obj_url, url_canonical)
|
||||||
elif (r.status_code == 200):
|
elif (r.status_code == 200):
|
||||||
self._set_status(self, obj_url, Urls.STATUS_ENUM.VALID)
|
self._set_status(obj_url, Urls.STATUS_ENUM.VALID)
|
||||||
elif (r.status_code == 404):
|
elif (r.status_code == 404):
|
||||||
self._set_status(self, obj_url, Urls.STATUS_ENUM.INVALID)
|
self._set_status(obj_url, Urls.STATUS_ENUM.INVALID)
|
||||||
else:
|
else:
|
||||||
logger.info("Unknown request status: {} for missing kids request: {}".format(r.status_code, obj_url.url))
|
logger.info("Unknown request status: {} for missing kids request: {}".format(r.status_code, obj_url.url))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user