Working fetch feeds and process raw urls

This commit is contained in:
Luciano Gervasoni
2025-03-13 18:23:28 +01:00
parent 61c31ee9aa
commit 7d7bce1e72
11 changed files with 318 additions and 136 deletions

View File

@@ -1,4 +1,5 @@
from django.db import models
from django.contrib.postgres.fields import ArrayField
# Create your models here.
class Feed(models.Model):
@@ -44,9 +45,16 @@ class UrlContent(models.Model):
title = models.TextField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
content = models.TextField(blank=True, null=True)
tags = models.TextField(blank=True, null=True) # This field type is a guess.
authors = models.TextField(blank=True, null=True) # This field type is a guess.
image_urls = models.TextField(blank=True, null=True) # This field type is a guess.
valid_content = models.BooleanField(blank=True, null=True)
language = models.CharField(max_length=2, blank=True, null=True)
keywords = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
tags = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
authors = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
image_main_url = models.TextField(blank=True, null=True)
images_url = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
videos_url = ArrayField(models.TextField(blank=True, null=True)) # This field type is a guess.
url_host = models.TextField(blank=True, null=True)
site_name = models.TextField(blank=True, null=True)
class Meta:
managed = False
@@ -54,9 +62,17 @@ class UrlContent(models.Model):
class Urls(models.Model):
class STATUS_ENUM(models.TextChoices):
RAW = "raw", "Raw"
ERROR = "error", "Error"
VALID = "valid", "Valid"
UNKNOWN = "unknown", "Unknown"
INVALID = "invalid", "Invalid"
DUPLICATE = "duplicate", "Duplicate"
url = models.TextField(unique=True)
ts_fetch = models.DateTimeField(auto_now_add=True)
status = models.TextField(default='raw') # This field type is a guess.
status = models.TextField(choices=STATUS_ENUM.choices, default=STATUS_ENUM.RAW) # This field type is a guess.
class Meta:
managed = False