Process missing kids url based on API endpoint, fix2
This commit is contained in:
@@ -5,6 +5,7 @@ from django.db import IntegrityError
|
|||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from .fetch_utils_url_processor import process_url, verify_missing_kid_url
|
from .fetch_utils_url_processor import process_url, verify_missing_kid_url
|
||||||
|
from .utils import get_with_protocol
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
@@ -17,14 +18,6 @@ class DB_Handler():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def insert_raw_urls(self, urls, obj_source, obj_search):
|
def insert_raw_urls(self, urls, obj_source, obj_search):
|
||||||
def get_with_protocol(url):
|
|
||||||
# http:// -> https://
|
|
||||||
url = url.replace("http://", "https://")
|
|
||||||
# "" -> https://
|
|
||||||
if not (url.startswith("https://")):
|
|
||||||
url = "https://" + url
|
|
||||||
return url
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.debug("Inserting raw URLs")
|
logger.debug("Inserting raw URLs")
|
||||||
# Empty?
|
# Empty?
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from .db_utils import DB_Handler
|
from .db_utils import DB_Handler
|
||||||
from ..models import Search, Source
|
from ..models import Search, Source
|
||||||
from .fetch_utils_url_processor import get_with_protocol, url_host_slowdown
|
from .fetch_utils_url_processor import url_host_slowdown
|
||||||
|
from .utils import get_with_protocol
|
||||||
import newspaper
|
import newspaper
|
||||||
import traceback
|
import traceback
|
||||||
from .logger import get_logger
|
from .logger import get_logger
|
||||||
|
|||||||
8
app_urls/fetcher/src/utils.py
Normal file
8
app_urls/fetcher/src/utils.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
def get_with_protocol(url):
|
||||||
|
# http:// -> https://
|
||||||
|
url = url.replace("http://", "https://")
|
||||||
|
# "" -> https://
|
||||||
|
if not (url.startswith("https://")):
|
||||||
|
url = "https://" + url
|
||||||
|
return url
|
||||||
Reference in New Issue
Block a user