Process missing kids url based on API endpoint, fix2

This commit is contained in:
Luciano Gervasoni
2025-09-08 16:20:39 +02:00
parent 079b2473f8
commit ef51a96db6
3 changed files with 11 additions and 9 deletions

View File

@@ -5,6 +5,7 @@ from django.db import IntegrityError
from django.utils import timezone
from datetime import timedelta
from .fetch_utils_url_processor import process_url, verify_missing_kid_url
from .utils import get_with_protocol
import re
import requests
import os
@@ -17,14 +18,6 @@ class DB_Handler():
pass
def insert_raw_urls(self, urls, obj_source, obj_search):
def get_with_protocol(url):
# http:// -> https://
url = url.replace("http://", "https://")
# "" -> https://
if not (url.startswith("https://")):
url = "https://" + url
return url
try:
logger.debug("Inserting raw URLs")
# Empty?

View File

@@ -1,6 +1,7 @@
from .db_utils import DB_Handler
from ..models import Search, Source
from .fetch_utils_url_processor import get_with_protocol, url_host_slowdown
from .fetch_utils_url_processor import url_host_slowdown
from .utils import get_with_protocol
import newspaper
import traceback
from .logger import get_logger

View File

@@ -0,0 +1,8 @@
def get_with_protocol(url):
# http:// -> https://
url = url.replace("http://", "https://")
# "" -> https://
if not (url.startswith("https://")):
url = "https://" + url
return url