Clean old url content, use django db connection

This commit is contained in:
Luciano Gervasoni
2025-04-14 10:48:33 +02:00
parent 0cd84496cf
commit 43c6c3aabf
5 changed files with 80 additions and 41 deletions

View File

@@ -2,6 +2,8 @@ from ..models import Urls, UrlContent, UrlsSourceSearch, UrlsDuplicate, StatusPa
from django.db.models import Q
from django.core.cache import cache
from django.db import IntegrityError
from django.utils import timezone
from datetime import timedelta
from .url_processor import process_url, get_with_protocol
import re
import traceback
@@ -271,4 +273,14 @@ class DB_Handler():
logger.info("Verified status of #{} missingkids.org/poster URLs".format(len(missingkids_urls)))
except Exception as e:
logger.warning("Exception processing MissingKids URLs: {}\n{}".format(e, traceback.format_exc()))
def clean_old_url_content(self, older_than_days=60):
try:
# Get cut off date
cutoff_date = timezone.now() - timedelta(days=older_than_days)
# Delete old UrlContent objects
old_url_content = UrlContent.objects.filter(id_url__ts_fetch__lt=cutoff_date)
logger.info("Cleaning URL content older than {} days: #{}".format(older_than_days, len(old_url_content)))
old_url_content.delete()
except Exception as e:
logger.warning("Exception cleaning old URL content: {}\n{}".format(e, traceback.format_exc()))