Missing kids selenium fixes
This commit is contained in:
@@ -270,22 +270,37 @@ class DB_Handler():
|
||||
except Exception as e:
|
||||
logger.warning("Exception processing error URLs: {}\n{}".format(e, traceback.format_exc()))
|
||||
|
||||
def process_missing_kids_urls(self, batch_size=None):
|
||||
def process_missing_kids_urls(self, batch_size=None, process_status_only=None):
|
||||
try:
|
||||
logger.debug("Processing MissingKids URLs - batch_size={}".format(batch_size))
|
||||
logger.info("Processing MissingKids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only))
|
||||
|
||||
if (process_status_only is not None):
|
||||
filter = (Q(status=Urls.STATUS_ENUM.VALID) | Q(status=Urls.STATUS_ENUM.INVALID) | Q(status=Urls.STATUS_ENUM.UNKNOWN) | Q(status=Urls.STATUS_ENUM.ERROR))
|
||||
else:
|
||||
if (process_status_only == "valid"):
|
||||
filter = Q(status=Urls.STATUS_ENUM.VALID)
|
||||
elif (process_status_only == "invalid"):
|
||||
filter = Q(status=Urls.STATUS_ENUM.INVALID)
|
||||
elif (process_status_only == "error"):
|
||||
filter = Q(status=Urls.STATUS_ENUM.ERROR)
|
||||
elif (process_status_only == "unknown"):
|
||||
filter = Q(status=Urls.STATUS_ENUM.UNKNOWN)
|
||||
elif (process_status_only == "raw"):
|
||||
filter = Q(status=Urls.STATUS_ENUM.RAW)
|
||||
elif (process_status_only == "duplicate"):
|
||||
filter = Q(status=Urls.STATUS_ENUM.DUPLICATE)
|
||||
else:
|
||||
logger.info("Unknown status to filter: {}".format(process_status_only))
|
||||
|
||||
# Get batch of URLs, %missingkids.org/poster% AND (status='valid' OR status='invalid')
|
||||
missingkids_urls = Urls.objects.order_by("-ts_fetch").filter(
|
||||
(Q(url__contains="missingkids.org/poster") | Q(url__contains="missingkids.org/new-poster"))
|
||||
&
|
||||
(Q(status=Urls.STATUS_ENUM.VALID) | Q(status=Urls.STATUS_ENUM.INVALID) | Q(status=Urls.STATUS_ENUM.UNKNOWN) | Q(status=Urls.STATUS_ENUM.ERROR))
|
||||
filter & (Q(url__contains="missingkids.org/poster") | Q(url__contains="missingkids.org/new-poster"))
|
||||
)
|
||||
|
||||
# Get batch size
|
||||
if (batch_size is not None):
|
||||
missingkids_urls = missingkids_urls[:batch_size]
|
||||
|
||||
# TODO: Cache processed during last X hours, filter them...
|
||||
|
||||
# Per URL
|
||||
for obj_url in missingkids_urls:
|
||||
try:
|
||||
@@ -296,6 +311,7 @@ class DB_Handler():
|
||||
r = requests.post(missingkids_fetch_endpoint, json=data, timeout=120)
|
||||
# Jsonify
|
||||
results = r.json()
|
||||
logger.debug("Selenium results for URL {}: {}".format(obj_url.url, str(results)))
|
||||
|
||||
if (results.get("status") == "valid"):
|
||||
self._set_status(obj_url, Urls.STATUS_ENUM.VALID)
|
||||
|
||||
@@ -60,17 +60,10 @@ def process_error_urls(batch_size=50):
|
||||
logger.info("Task completed: {}".format(task))
|
||||
|
||||
@job('default')
|
||||
def process_missing_kids_urls(batch_size=50):
|
||||
task = "Process Missing Kids URLs"
|
||||
def process_missing_kids_urls(batch_size=None, process_status_only=None):
|
||||
task = "Process Missing Kids URLs - batch_size={} process_status_only={}".format(batch_size, process_status_only)
|
||||
logger.info("Task triggered: {}".format(task))
|
||||
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
|
||||
logger.info("Task completed: {}".format(task))
|
||||
|
||||
@job('default')
|
||||
def process_missing_kids_urls_all(batch_size=None):
|
||||
task = "Process Missing Kids URLs ALL"
|
||||
logger.info("Task triggered: {}".format(task))
|
||||
DB_Handler().process_missing_kids_urls(batch_size=batch_size)
|
||||
DB_Handler().process_missing_kids_urls(batch_size=batch_size, process_status_only=process_status_only)
|
||||
logger.info("Task completed: {}".format(task))
|
||||
|
||||
@job('default')
|
||||
|
||||
Reference in New Issue
Block a user