Process missing kids url based on API endpoint, fix2
This commit is contained in:
@@ -5,6 +5,7 @@ from django.db import IntegrityError
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
from .fetch_utils_url_processor import process_url, verify_missing_kid_url
|
||||
from .utils import get_with_protocol
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
@@ -17,14 +18,6 @@ class DB_Handler():
|
||||
pass
|
||||
|
||||
def insert_raw_urls(self, urls, obj_source, obj_search):
|
||||
def get_with_protocol(url):
|
||||
# http:// -> https://
|
||||
url = url.replace("http://", "https://")
|
||||
# "" -> https://
|
||||
if not (url.startswith("https://")):
|
||||
url = "https://" + url
|
||||
return url
|
||||
|
||||
try:
|
||||
logger.debug("Inserting raw URLs")
|
||||
# Empty?
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from .db_utils import DB_Handler
|
||||
from ..models import Search, Source
|
||||
from .fetch_utils_url_processor import get_with_protocol, url_host_slowdown
|
||||
from .fetch_utils_url_processor import url_host_slowdown
|
||||
from .utils import get_with_protocol
|
||||
import newspaper
|
||||
import traceback
|
||||
from .logger import get_logger
|
||||
|
||||
8
app_urls/fetcher/src/utils.py
Normal file
8
app_urls/fetcher/src/utils.py
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
def get_with_protocol(url):
|
||||
# http:// -> https://
|
||||
url = url.replace("http://", "https://")
|
||||
# "" -> https://
|
||||
if not (url.startswith("https://")):
|
||||
url = "https://" + url
|
||||
return url
|
||||
Reference in New Issue
Block a user