Refactor searches, env vars fetcher config, urls webpage update
This commit is contained in:
39
README.md
39
README.md
@@ -1,38 +1 @@
|
||||
# Requirements
|
||||
```
|
||||
conda create -n matitos python=3.12
|
||||
conda activate matitos
|
||||
pip install ipykernel django requests ollama psycopg[binary] # openai
|
||||
```
|
||||
|
||||
# Development
|
||||
|
||||
* app_web
|
||||
```
|
||||
|
||||
# 1) Change models.py
|
||||
python manage.py inspectdb
|
||||
|
||||
# 2)
|
||||
python manage.py makemigrations
|
||||
# 3)
|
||||
python manage.py migrate --fake
|
||||
|
||||
# ?
|
||||
python manage.py migrate --fake sessions zero
|
||||
python manage.py migrate --fake-initial
|
||||
|
||||
|
||||
python manage.py createsuperuser
|
||||
```
|
||||
|
||||
* app_img_gen
|
||||
```
|
||||
docker build -t image_generation .
|
||||
docker run --rm -it -p 12343:80 image_generation
|
||||
```
|
||||
|
||||
# Deploy
|
||||
```
|
||||
python app_web/manage.py runserver
|
||||
```
|
||||
# Matitos
|
||||
36
app_img_gen/README.md
Normal file
36
app_img_gen/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
```
|
||||
docker build -t image_generation .
|
||||
docker run --rm -it -p 12343:80 image_generation
|
||||
```
|
||||
|
||||
```
|
||||
import requests
|
||||
import cv2
|
||||
import base64
|
||||
import numpy as np
|
||||
|
||||
endpoint = "http://192.168.2.64:12343/image"
|
||||
|
||||
prompt = "Majestic mountain landscape with snow-capped peaks, autumn foliage in vibrant reds and oranges, a turquoise river winding through a valley, crisp and serene atmosphere, ultra-realistic style."
|
||||
prompt = "A group of kids happily playing in a joy environment"
|
||||
#prompt = "A bitcoin behaving like a king, surrounded by small alternative coins. Detailed, geometric style"
|
||||
|
||||
json = {
|
||||
"prompt": prompt,
|
||||
"num_inference_steps": 10,
|
||||
"size": "512x512",
|
||||
"seed": 123456,
|
||||
}
|
||||
|
||||
for inf_step in [1, 4, 10, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]:
|
||||
json["num_inference_steps"] = inf_step
|
||||
|
||||
%time r = requests.post(endpoint, json=json)
|
||||
print("Status code", r.status_code)
|
||||
|
||||
# Image
|
||||
png_as_np = np.frombuffer(base64.b64decode(r.text), dtype=np.uint8)
|
||||
image_bgr = cv2.imdecode(png_as_np, cv2.IMREAD_COLOR)
|
||||
|
||||
cv2.imwrite("sample_img_{}.png".format(json["num_inference_steps"]), image_bgr)
|
||||
```
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -11,41 +11,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"db_postgres\n",
|
||||
"db_redis\n",
|
||||
"\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 2/0\n",
|
||||
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n",
|
||||
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.1s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container dozzle \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
|
||||
" ⠿ Container db_redis \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n",
|
||||
" ⠿ Container db_postgres \u001b[39mStarting\u001b[0m \u001b[34m0.2s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container dozzle \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container db_redis \u001b[32mStarted\u001b[0m \u001b[34m0.2s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container db_postgres \u001b[32mStarted\u001b[0m \u001b[34m0.2s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container dozzle \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
" \u001b[32m✔\u001b[0m Container adminer \u001b[32mRunning\u001b[0m \u001b[34m0.0s \u001b[0m\n",
|
||||
"\u001b[?25h"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!docker rm -f db_postgres db_redis; docker compose -f docker/docker-compose.yml up -d ; sleep 5"
|
||||
"!docker rm -f db_postgres db_redis; docker compose -f ../docker/docker-compose.yml up -d ; sleep 5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -62,7 +37,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -99,6 +74,8 @@
|
||||
" id SMALLSERIAL PRIMARY KEY,\n",
|
||||
" search TEXT NOT NULL UNIQUE,\n",
|
||||
" type SEARCH_TYPE NOT NULL\n",
|
||||
" -- language_country CHAR(5), -- Language: ISO 639-1 Code. Country: ISO 3166 ALPHA-2. e.g.: en-us. Required for search\n",
|
||||
" -- UNIQUE(search, language_country)\n",
|
||||
" );\n",
|
||||
" CREATE INDEX idx_search_type ON SEARCH(type);\n",
|
||||
" \n",
|
||||
@@ -106,7 +83,13 @@
|
||||
" id SMALLSERIAL PRIMARY KEY,\n",
|
||||
" source TEXT NOT NULL UNIQUE\n",
|
||||
" );\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" -- CREATE TABLE SEARCH_LANGUAGE (\n",
|
||||
" -- language CHAR(2) NOT NULL, -- ISO 639-1 Code, e.g. \"en\"\n",
|
||||
" -- country CHAR(2) NOT NULL, -- ISO 3166 ALPHA-2, e.g. \"us\"\n",
|
||||
" -- PRIMARY KEY (language, country)\n",
|
||||
" -- );\n",
|
||||
" \n",
|
||||
" CREATE TABLE URLS_SOURCE_SEARCH (\n",
|
||||
" id_url INTEGER REFERENCES URLS(id),\n",
|
||||
" id_source SMALLINT REFERENCES SOURCE(id) ON UPDATE CASCADE ON DELETE RESTRICT,\n",
|
||||
@@ -158,6 +141,8 @@
|
||||
" cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('breitbart.com', 'url_host');\" )\n",
|
||||
" # Search keywords\n",
|
||||
" cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search');\" )\n",
|
||||
" # cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search', 'en-us');\" )\n",
|
||||
" # cur.execute( \"INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search', 'en-gb');\" )\n",
|
||||
" \n",
|
||||
" # Status update based on pattern matching (with priority to apply in order). Regex test https://regex101.com/\n",
|
||||
" # cur.execute( \"INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 75, 'valid');\".format(\".*{}.*\".format(re.escape(\"missingkids.org/poster/\"))) )\n",
|
||||
@@ -169,51 +154,6 @@
|
||||
" cur.execute( \"INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');\".format(\".*{}.*\".format(re.escape(\"radio.foxnews.com/\"))) )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if INSERT_SAMPLE_DATA:\n",
|
||||
" # Connect to an existing database\n",
|
||||
" with psycopg.connect(connection_info) as conn:\n",
|
||||
" # Open a cursor to perform database operations\n",
|
||||
" with conn.cursor() as cur:\n",
|
||||
" # Autocommit at end of transaction (Atomic insert of URLs and sources)\n",
|
||||
" with conn.transaction() as tx:\n",
|
||||
" # Valid\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.foxnews.com/us/husband-ruby-franke-utah-mommy-blogger-convicted-child-abuse-regrets-wifes-fall-fame', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.bbc.com/news/articles/ckg843y8y7no', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.wilx.com/2025/03/05/lenawee-county-man-arrested-possessing-child-abuse-material/', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.dw.com/en/trauma-how-child-abuse-victims-deal-with-parenthood/a-71833895', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://nypost.com/2025/03/06/us-news/colorado-day-care-worker-hit-with-51-charges-of-child-abuse-harassment-for-slapping-toddler/', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.fox35orlando.com/news/tavares-police-florida-boys-10-9-abused-sheer-brutality', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.google.com', 'invalid')\")\n",
|
||||
"\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.missingkids.org/poster/USVA/VA25-0820/1', 'valid')\")\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('https://www.missingkids.org/poster/NCMC/2045193/1', 'valid')\")\n",
|
||||
"\n",
|
||||
" cur.execute(\"INSERT INTO SOURCE (source) values ('news.google.com')\")\n",
|
||||
" cur.execute(\"INSERT INTO SOURCE (source) values ('qwant.com')\")\n",
|
||||
"\n",
|
||||
" cur.execute(\"INSERT INTO URLS_SOURCE (id_url, id_source, id_search) values (1, 1, 1)\")\n",
|
||||
"\n",
|
||||
" for j in range(5):\n",
|
||||
" import time\n",
|
||||
" time.sleep(0.25)\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('www.super_{}.org', 'invalid')\".format(j))\n",
|
||||
" \n",
|
||||
" # Long URLs \n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('www.super_url.org/superextrakmsdimsdf/349mvlsdfsdfwr/akivsdmimnsdifmisdf_23dj9sdgj9sdgj8sdf8ds8f.html', 'invalid')\".format(j))\n",
|
||||
" cur.execute(\"INSERT INTO URLS (url, status) values ('www.super_url.org/superextrakmsdimsdf/349mvlsdfsdfwr/akivsdmimnsdifmisdf.html', 'invalid')\".format(j))\n",
|
||||
"\n",
|
||||
" # URL Content\n",
|
||||
" language, content = \"en\", \"Bla Bla Bla!!!\"*25\n",
|
||||
" cur.execute(\"INSERT INTO URL_CONTENT (id_url, date_published, title, description, content, language, tags, authors, images_url) values (%s, %s, 'Mommy blogger turned child abuser', %s, 'Hello there!', %s, %s, %s, %s)\", \n",
|
||||
" (1, datetime.now(tz=timezone.utc), content, language, [\"child abuse\", \"social media\"], [\"Audrey Conklin\"], [\"https://a57.foxnews.com/static.foxnews.com/foxnews.com/content/uploads/2023/08/1440/810/image-58.jpg?ve=1&tl=1\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -223,41 +163,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\t urls\n",
|
||||
"[]\n",
|
||||
"\t urls_duplicate\n",
|
||||
"[]\n",
|
||||
"\t urls_source_search\n",
|
||||
"[]\n",
|
||||
"\t source\n",
|
||||
"[]\n",
|
||||
"\t search\n",
|
||||
"[(1,\n",
|
||||
" 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n",
|
||||
" 'rss_feed'),\n",
|
||||
" (2, 'missingkids.org/poster', 'url_host'),\n",
|
||||
" (3, 'missingkids.org/new-poster', 'url_host'),\n",
|
||||
" (4, 'breitbart.com', 'url_host'),\n",
|
||||
" (5, 'child abuse', 'keyword_search')]\n",
|
||||
"\t status_pattern_matching\n",
|
||||
"[('.*youtube\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*tiktok\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*twitter\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*reddit\\\\.com/.*', 50, 'invalid'),\n",
|
||||
" ('.*libreddit\\\\.de/.*', 50, 'invalid'),\n",
|
||||
" ('.*radio\\\\.foxnews\\\\.com/.*', 50, 'invalid')]\n",
|
||||
"\t url_content\n",
|
||||
"[]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
@@ -274,23 +182,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(1,\n",
|
||||
" 'https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC',\n",
|
||||
" 'rss_feed'),\n",
|
||||
" (2, 'missingkids.org/poster', 'url_host'),\n",
|
||||
" (3, 'missingkids.org/new-poster', 'url_host'),\n",
|
||||
" (4, 'breitbart.com', 'url_host'),\n",
|
||||
" (5, 'child abuse', 'keyword_search')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
@@ -301,23 +195,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
" # Open a cursor to perform database operations\n",
|
||||
" with conn.cursor() as cur:\n",
|
||||
" pprint( cur.execute(\"SELECT * FROM URLS LIMIT 150;\").fetchall() )\n",
|
||||
" pprint( cur.execute(\"SELECT * FROM URLS LIMIT 50;\").fetchall() )\n",
|
||||
" #pprint( cur.execute(\"SELECT id_url, title, valid_content FROM URL_CONTENT LIMIT 10;\").fetchall() )"
|
||||
]
|
||||
},
|
||||
@@ -326,34 +212,9 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n!docker rm -f db_redis; docker compose -f docker/docker-compose.yml up -d\\n\\n# Connect to an existing database\\nwith psycopg.connect(connection_info) as conn:\\n # Open a cursor to perform database operations\\n with conn.cursor() as cur:\\n pprint( cur.execute(\"TRUNCATE URLS, URL_CONTENT, URLS_SOURCE_SEARCH, URLS_DUPLICATE;\") )\\n # cur.execute( \"INSERT INTO SEARCH (search, type) VALUES (\\'missingkids.org\\', \\'url_host\\');\" )\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"!docker rm -f db_redis; docker compose -f docker/docker-compose.yml up -d\n",
|
||||
"!docker rm -f db_redis; docker compose -f ../docker/docker-compose.yml up -d\n",
|
||||
"\n",
|
||||
"# Connect to an existing database\n",
|
||||
"with psycopg.connect(connection_info) as conn:\n",
|
||||
@@ -10,6 +10,10 @@ pip install feedparser python-dateutil newspaper4k[all] lxml[html_clean] googlen
|
||||
pip install ollama
|
||||
```
|
||||
|
||||
* Database
|
||||
* Database initialization -> 1-DB.ipynb
|
||||
|
||||
|
||||
* From automated inspectdb
|
||||
```
|
||||
# 1) Inspect DB, generate models.py
|
||||
@@ -72,23 +76,26 @@ class Meta:
|
||||
|
||||
* Environment variables
|
||||
```
|
||||
# Database
|
||||
DB_NAME=${DB_NAME:-matitos}
|
||||
DB_USER=${DB_NAME:-supermatitos}
|
||||
DB_PASSWORD=${DB_NAME:-supermatitos}
|
||||
DB_HOST=${DB_NAME:-localhost}
|
||||
DB_PORT=${DB_NAME:-5432}
|
||||
|
||||
REDIS_HOST=${REDIS_HOST:-localhost}
|
||||
REDIS_PORT=${REDIS_PORT:-6379}
|
||||
|
||||
# Default RQ job timeout
|
||||
RQ_DEFAULT_TIMEOUT=${REDIS_PORT:-900}
|
||||
# Default RQ job queue TTL
|
||||
RQ_DEFAULT_RESULT_TTL=${RQ_DEFAULT_RESULT_TTL:-3600}
|
||||
# Job timeout: 30 min
|
||||
JOB_DEFAULT_TIMEOUT=${RQ_DEFAULT_TIMEOUT:-1800}
|
||||
|
||||
# Logs path
|
||||
PATH_LOGS_ERROR=logs/log_app_fetcher_error.log
|
||||
PATH_LOGS=logs/log_app_fetcher.log
|
||||
PATH_LOGS_PARAMETERIZATION="logs/log_app_fetcher_{}.log"
|
||||
|
||||
# Fetcher
|
||||
FETCHER_GNEWS_DECODE_SLEEP=2
|
||||
FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=4
|
||||
FETCHER_BETWEEN_SEARCHES_SLEEP=5
|
||||
FETCHER_URL_HOST_SLEEP=5
|
||||
```
|
||||
|
||||
* Deploy
|
||||
@@ -110,30 +117,14 @@ http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=publ
|
||||
|
||||
* Scheduled tasks
|
||||
```
|
||||
# 1) Modify the scheduled tasks on the admin panel:
|
||||
|
||||
Names: Fetch Feeds, Fetch Parser, Fetch Search
|
||||
Callable: api.tasks.fetch_feeds, api.tasks.fetch_parser, api.tasks.fetch_search
|
||||
Task type: Repetable task (or cron...)
|
||||
Queue: Default
|
||||
Interval: 15min, 2h, 30min
|
||||
|
||||
Names: Process raw URLs, Process error URLs, Process MissingKids URLs
|
||||
Callable: api.tasks.process_raw_urls, api.tasks.process_error_urls, api.tasks.process_missing_kids_urls_50
|
||||
Task type: Repetable task (or cron...)
|
||||
Queue: Low, Low, Default
|
||||
Interval: 1h, 4h, 2h
|
||||
|
||||
# 2) Export
|
||||
# python manage.py export > scheduled_tasks.json
|
||||
|
||||
|
||||
# Or simply import saved definitions
|
||||
# Import tasks
|
||||
python manage.py import --filename scheduled_tasks.json
|
||||
|
||||
# Modify using the admin panel, then save
|
||||
# python manage.py export > scheduled_tasks.json
|
||||
```
|
||||
|
||||
* Utils
|
||||
* Utils. TODO: To endpoint...
|
||||
```
|
||||
python manage.py rqstats
|
||||
python manage.py rqstats --interval=1 # Refreshes every second
|
||||
```
|
||||
@@ -1,5 +1,6 @@
|
||||
# Generated by Django 5.1.7 on 2025-03-13 17:01
|
||||
# Generated by Django 5.2 on 2025-04-02 16:44
|
||||
|
||||
import django.contrib.postgres.fields
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
@@ -12,22 +13,12 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Feed',
|
||||
fields=[
|
||||
('id', models.SmallAutoField(primary_key=True, serialize=False)),
|
||||
('rss_feed', models.TextField(unique=True)),
|
||||
],
|
||||
options={
|
||||
'db_table': 'feed',
|
||||
'managed': False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Search',
|
||||
fields=[
|
||||
('id', models.SmallAutoField(primary_key=True, serialize=False)),
|
||||
('keyword_search', models.TextField(unique=True)),
|
||||
('search', models.TextField(unique=True)),
|
||||
('type', models.TextField(choices=[('rss_feed', 'RSS_Feed'), ('keyword_search', 'Keyword_Search'), ('url_host', 'URL_Host')])),
|
||||
],
|
||||
options={
|
||||
'db_table': 'search',
|
||||
@@ -67,28 +58,7 @@ class Migration(migrations.Migration):
|
||||
],
|
||||
options={
|
||||
'db_table': 'urls',
|
||||
'managed': False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WebsiteOfInterest',
|
||||
fields=[
|
||||
('id', models.SmallAutoField(primary_key=True, serialize=False)),
|
||||
('url_host', models.TextField(unique=True)),
|
||||
],
|
||||
options={
|
||||
'db_table': 'website_of_interest',
|
||||
'managed': False,
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='WebsiteToFilter',
|
||||
fields=[
|
||||
('id', models.SmallAutoField(primary_key=True, serialize=False)),
|
||||
('url_host', models.TextField(unique=True)),
|
||||
],
|
||||
options={
|
||||
'db_table': 'website_to_filter',
|
||||
'ordering': ['-ts_fetch'],
|
||||
'managed': False,
|
||||
},
|
||||
),
|
||||
@@ -102,12 +72,12 @@ class Migration(migrations.Migration):
|
||||
('content', models.TextField(blank=True, null=True)),
|
||||
('valid_content', models.BooleanField(blank=True, null=True)),
|
||||
('language', models.CharField(blank=True, max_length=2, null=True)),
|
||||
('keywords', models.TextField(blank=True, null=True)),
|
||||
('tags', models.TextField(blank=True, null=True)),
|
||||
('authors', models.TextField(blank=True, null=True)),
|
||||
('image_main', models.TextField(blank=True, null=True)),
|
||||
('images_url', models.TextField(blank=True, null=True)),
|
||||
('videos_url', models.TextField(blank=True, null=True)),
|
||||
('keywords', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(blank=True, null=True), size=None)),
|
||||
('tags', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(blank=True, null=True), size=None)),
|
||||
('authors', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(blank=True, null=True), size=None)),
|
||||
('image_main_url', models.TextField(blank=True, null=True)),
|
||||
('images_url', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(blank=True, null=True), size=None)),
|
||||
('videos_url', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(blank=True, null=True), size=None)),
|
||||
('url_host', models.TextField(blank=True, null=True)),
|
||||
('site_name', models.TextField(blank=True, null=True)),
|
||||
],
|
||||
@@ -127,12 +97,12 @@ class Migration(migrations.Migration):
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='UrlsSource',
|
||||
name='UrlsSourceSearch',
|
||||
fields=[
|
||||
('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='api.urls')),
|
||||
],
|
||||
options={
|
||||
'db_table': 'urls_source',
|
||||
'db_table': 'urls_source_search',
|
||||
'managed': False,
|
||||
},
|
||||
),
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
# Generated by Django 5.1.7 on 2025-03-19 09:06
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('api', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.DeleteModel(
|
||||
name='Feed',
|
||||
),
|
||||
migrations.DeleteModel(
|
||||
name='WebsiteOfInterest',
|
||||
),
|
||||
migrations.DeleteModel(
|
||||
name='WebsiteToFilter',
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='urls',
|
||||
options={'managed': False, 'ordering': ['-ts_fetch']},
|
||||
),
|
||||
]
|
||||
@@ -1,27 +0,0 @@
|
||||
# Generated by Django 4.2.20 on 2025-03-20 16:12
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('api', '0002_delete_feed_delete_websiteofinterest_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='UrlsSourceSearch',
|
||||
fields=[
|
||||
('id_url', models.OneToOneField(db_column='id_url', on_delete=django.db.models.deletion.DO_NOTHING, primary_key=True, serialize=False, to='api.urls')),
|
||||
],
|
||||
options={
|
||||
'db_table': 'urls_source_search',
|
||||
'managed': False,
|
||||
},
|
||||
),
|
||||
migrations.DeleteModel(
|
||||
name='UrlsSource',
|
||||
),
|
||||
]
|
||||
@@ -109,3 +109,32 @@ class UrlsSourceSearch(models.Model):
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} {}".format(self.id_source, self.id_search, self.id_url)
|
||||
|
||||
""" # TODO: Migrate to django 5.2
|
||||
class UrlsDuplicate(models.Model):
|
||||
pk = models.CompositePrimaryKey('id_url_canonical', 'id_url_duplicated')
|
||||
id_url_canonical = models.ForeignKey(Urls, models.DO_NOTHING, db_column='id_url_canonical')
|
||||
id_url_duplicated = models.ForeignKey(Urls, models.DO_NOTHING, db_column='id_url_duplicated', related_name='urlsduplicate_id_url_duplicated_set')
|
||||
|
||||
class Meta:
|
||||
managed = False
|
||||
db_table = 'urls_duplicate'
|
||||
unique_together = (('id_url_canonical', 'id_url_duplicated'),)
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} ".format(self.id_url_duplicated, self.id_url_canonical)
|
||||
|
||||
class UrlsSourceSearch(models.Model):
|
||||
pk = models.CompositePrimaryKey('id_url', 'id_source', 'id_search')
|
||||
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url')
|
||||
id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
|
||||
id_search = models.ForeignKey(Search, models.DO_NOTHING, db_column='id_search')
|
||||
|
||||
class Meta:
|
||||
managed = False
|
||||
db_table = 'urls_source_search'
|
||||
unique_together = (('id_url', 'id_source', 'id_search'),)
|
||||
|
||||
def __str__(self):
|
||||
return "{} {} {}".format(self.id_source, self.id_search, self.id_url)
|
||||
"""
|
||||
@@ -1,59 +1,17 @@
|
||||
from .db_utils import DB_Handler
|
||||
from ..models import Search, Source
|
||||
from ..models import Search
|
||||
from django.db.models import Q
|
||||
import traceback
|
||||
import time
|
||||
from .fetch_search_utils import search_gnews, search_ddg, search_googlenews_general, search_googlenews_news, search_googlenews_rss
|
||||
import os
|
||||
from .fetch_search_instances import ListSearchInstances
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
'''
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
# Generic fetcher (fetches articles, writes to DB)
|
||||
class FetcherAbstract(ABC):
|
||||
@abstractmethod
|
||||
def _fetch_raw_urls_list(self):
|
||||
pass
|
||||
|
||||
def fetch_articles(self, db_writer):
|
||||
logger.debug("Starting fetch() for {}".format(self.name))
|
||||
# Fetch articles
|
||||
list_news = self._fetch()
|
||||
logger.info("Found #{} articles for search: {}".format(len(list_news), self.name))
|
||||
# Write to DB
|
||||
db_writer.write_batch(list_news, self.name)
|
||||
|
||||
|
||||
self._fetch_raw_urls_list()
|
||||
raw_urls, source = search_googlenews_rss(keyword_search, language="en", country="US")
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
'''
|
||||
|
||||
|
||||
class FetchSearcher():
|
||||
def __init__(self) -> None:
|
||||
logger.debug("Initializing Fetcher Searcher")
|
||||
|
||||
def _get_source_object(self, source):
|
||||
# TODO: Cache
|
||||
# self.cached_sources = {}
|
||||
# Get source object
|
||||
obj_source, created = Source.objects.get_or_create(source=source)
|
||||
return obj_source
|
||||
|
||||
def _post_process_urls(self, raw_urls, obj_search):
|
||||
# Searching URL Host based? Make sure results belong to that site
|
||||
if (obj_search.type == Search.TYPE_ENUM.URL_HOST):
|
||||
# Get clean URL host
|
||||
url_host_clean = obj_search.search.replace("www.", "").replace("http://", "").replace("https://", "")
|
||||
# Ensure URL host in URL
|
||||
raw_urls = [u for u in raw_urls if url_host_clean in u]
|
||||
|
||||
return raw_urls
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
logger.debug("Starting FetchSearcher.run()")
|
||||
@@ -65,58 +23,36 @@ class FetchSearcher():
|
||||
# Search
|
||||
for obj_search in list_search_obj:
|
||||
# TODO: language & country customization
|
||||
# TODO: allintitle: "child abuse"
|
||||
# TODO: intitle: "child abuse"
|
||||
|
||||
# Search
|
||||
keyword_search = "{}{}".format("site:" if obj_search.type == Search.TYPE_ENUM.URL_HOST else "", obj_search.search)
|
||||
|
||||
if (obj_search.type == Search.TYPE_ENUM.KEYWORD_SEARCH):
|
||||
# Add search with intitle keyword
|
||||
# TODO: allintitle: "child abuse"
|
||||
# TODO: intitle: "child abuse"
|
||||
pass
|
||||
# language, country = obj_search.language_country.split("-")
|
||||
|
||||
logger.debug("Starting keyword search: {}".format(keyword_search))
|
||||
logger.debug("Search type: {}".format(obj_search.type))
|
||||
|
||||
# news.google.com/rss
|
||||
time.sleep(5)
|
||||
raw_urls, source = search_googlenews_rss(keyword_search, language="en", country="US")
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
|
||||
|
||||
# DDG News
|
||||
time.sleep(5)
|
||||
raw_urls, source = search_ddg(keyword_search, category="news", timelimit="d", max_results=None, region = "en-US")
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
# DB writer
|
||||
db_writer = DB_Handler()
|
||||
|
||||
# GNews
|
||||
time.sleep(5)
|
||||
raw_urls, source = search_gnews(keyword_search, language="en", country="US")
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
# Keyword arguments
|
||||
args = {
|
||||
"language": "en",
|
||||
"country": "US",
|
||||
"period": "7d",
|
||||
"max_results": 100,
|
||||
"max_pages": 1,
|
||||
}
|
||||
|
||||
# DDG Text (week, 20 results)
|
||||
time.sleep(5)
|
||||
raw_urls, source = search_ddg(keyword_search, category="text", timelimit="d", max_results=20, region = "en-US")
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
|
||||
# GoogleNews news
|
||||
time.sleep(5)
|
||||
raw_urls, source = search_googlenews_news(keyword_search, period="1d", language="en", country="US")
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
|
||||
# GoogleNews general
|
||||
time.sleep(5)
|
||||
raw_urls, source = search_googlenews_general(keyword_search, period="1d", language="en", country="US", max_pages=2)
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
# Write to DB
|
||||
DB_Handler().insert_raw_urls(raw_urls, self._get_source_object(source), obj_search)
|
||||
|
||||
for SearchInstance in ListSearchInstances:
|
||||
# Sleep between requests, avoid too many requests...
|
||||
time.sleep(int(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
|
||||
SearchInstance(args).fetch_articles(db_writer, obj_search)
|
||||
|
||||
# TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master
|
||||
except Exception as e:
|
||||
|
||||
259
app_urls/api/src/fetch_search_instances.py
Normal file
259
app_urls/api/src/fetch_search_instances.py
Normal file
@@ -0,0 +1,259 @@
|
||||
import time
|
||||
import feedparser
|
||||
import os
|
||||
from ..models import Search, Source
|
||||
from .fetch_utils import decode_gnews_urls
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
from gnews import GNews
|
||||
from duckduckgo_search import DDGS
|
||||
from GoogleNews import GoogleNews
|
||||
|
||||
###########################################################################
|
||||
###########################################################################
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
# Generic fetcher (fetches articles, writes to DB)
|
||||
class FetcherAbstract(ABC):
|
||||
@abstractmethod
|
||||
def _fetch_raw_urls(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _get_name(self):
|
||||
pass
|
||||
|
||||
def _get_source_object(self, source):
|
||||
# TODO: Cache
|
||||
# self.cached_sources = {}
|
||||
# Get source object
|
||||
obj_source, created = Source.objects.get_or_create(source=source)
|
||||
return obj_source
|
||||
|
||||
def _post_process_urls(self, raw_urls, obj_search):
|
||||
# Searching URL Host based? Make sure results belong to that site
|
||||
if (obj_search.type == Search.TYPE_ENUM.URL_HOST):
|
||||
# Get clean URL host
|
||||
url_host_clean = obj_search.search.replace("www.", "").replace("http://", "").replace("https://", "")
|
||||
# Ensure URL host in URL
|
||||
raw_urls = [u for u in raw_urls if url_host_clean in u]
|
||||
|
||||
return raw_urls
|
||||
|
||||
def fetch_articles(self, db_writer, obj_search):
|
||||
# Search
|
||||
keyword_search = "{}{}".format("site:" if obj_search.type == Search.TYPE_ENUM.URL_HOST else "", obj_search.search)
|
||||
# Source name
|
||||
source_name = self._get_name()
|
||||
|
||||
logger.debug("Starting search: {} - {}".format(keyword_search, source_name))
|
||||
# Fetch
|
||||
raw_urls = self._fetch_raw_urls(keyword_search)
|
||||
# Post-process
|
||||
raw_urls = self._post_process_urls(raw_urls, obj_search)
|
||||
|
||||
# Write to DB
|
||||
db_writer.insert_raw_urls(raw_urls, self._get_source_object(source_name), obj_search)
|
||||
|
||||
###########################################################################
|
||||
|
||||
class SearchGNews(FetcherAbstract):
|
||||
def __init__(self, args={"language":"en", "country":"US", "period":"7d", "max_results":100}):
|
||||
super().__init__()
|
||||
# Parameters
|
||||
self.language = args.get("language")
|
||||
self.country = args.get("country")
|
||||
self.period = args.get("period")
|
||||
self.max_results = args.get("max_results")
|
||||
|
||||
def _get_name(self):
|
||||
# [source] [period] [language-country] [max_results]
|
||||
return "gnews {} {}-{} results={}".format("news", self.period, self.language, self.country, self.max_results).replace("results=None", "").strip()
|
||||
|
||||
def _fetch_raw_urls(self, keyword_search):
|
||||
try:
|
||||
# Get news
|
||||
results_gnews = GNews(language=self.language, country=self.country, period=self.period, max_results=self.max_results).get_news(keyword_search)
|
||||
# Get list of encoded urls
|
||||
encoded_urls = [e.get("url") for e in results_gnews]
|
||||
# Decode
|
||||
urls = decode_gnews_urls(encoded_urls)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}".format(self._get_name(), str(e)))
|
||||
urls = []
|
||||
return urls
|
||||
|
||||
class SearchDuckDuckGoGeneral(FetcherAbstract):
|
||||
def __init__(self, args={"language":"wt", "country":"wt", "max_results":100}):
|
||||
super().__init__()
|
||||
# Parameters
|
||||
self.language = args.get("language")
|
||||
self.country = args.get("country")
|
||||
self.max_results = args.get("max_results")
|
||||
self.region = "{}-{}".format(self.language, self.country).lower()
|
||||
self.period = None
|
||||
|
||||
def _get_name(self):
|
||||
# [source] [language-country] [max_results]
|
||||
return "ddg-general {} results={}".format(self.region, self.max_results).replace("results=None", "").strip()
|
||||
|
||||
def _fetch_raw_urls(self, keyword_search):
|
||||
try:
|
||||
news = DDGS().text(keyword_search, region=self.region, timelimit=self.period, max_results=self.max_results)
|
||||
urls = [e.get("href") for e in news]
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}".format(self._get_name(), str(e)))
|
||||
urls = []
|
||||
return urls
|
||||
|
||||
class SearchDuckDuckGoNews(FetcherAbstract):
|
||||
def __init__(self, args={"language":"wt", "country":"wt", "max_results":100}):
|
||||
super().__init__()
|
||||
# Parameters
|
||||
self.language = args.get("language")
|
||||
self.country = args.get("country")
|
||||
self.max_results = args.get("max_results")
|
||||
self.region = "{}-{}".format(self.language, self.country).lower()
|
||||
self.period = None
|
||||
|
||||
def _get_name(self):
|
||||
# [source] [language-country] [max_results]
|
||||
return "ddg-news {} results={}".format(self.region, self.max_results).replace("results=None", "").strip()
|
||||
|
||||
def _fetch_raw_urls(self, keyword_search):
|
||||
try:
|
||||
news = DDGS().news(keyword_search, region=self.region, timelimit=self.period, max_results=self.max_results)
|
||||
urls = [e.get("url") for e in news]
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}".format(self._get_name(), str(e)))
|
||||
urls = []
|
||||
return urls
|
||||
|
||||
class SearchGoogleNews(FetcherAbstract):
|
||||
def __init__(self, args={"language":"en", "country":"US", "period":"7d"}):
|
||||
super().__init__()
|
||||
# Parameters
|
||||
self.language = args.get("language")
|
||||
self.country = args.get("country")
|
||||
self.period = args.get("period")
|
||||
|
||||
def _get_name(self):
|
||||
# [source] [period] [language-country]
|
||||
return "googlenews {} {}-{}".format(self.period, self.language, self.country)
|
||||
|
||||
def _fetch_raw_urls(self, keyword_search):
|
||||
try:
|
||||
# Initialize
|
||||
googlenews = GoogleNews(period=self.period, lang=self.language, region=self.country)
|
||||
googlenews.enableException(True)
|
||||
# Search
|
||||
googlenews.get_news(keyword_search)
|
||||
# Fetch
|
||||
encoded_urls = googlenews.get_links()
|
||||
# Decode
|
||||
urls = decode_gnews_urls(encoded_urls)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}".format(self._get_name(), str(e)))
|
||||
urls = []
|
||||
return urls
|
||||
|
||||
class SearchGoogleGeneral(FetcherAbstract):
|
||||
def __init__(self, args={"language":"en", "country":"US", "period":"7d", "max_pages":1}):
|
||||
super().__init__()
|
||||
# Parameters
|
||||
self.language = args.get("language")
|
||||
self.country = args.get("country")
|
||||
self.period = args.get("period")
|
||||
self.max_pages = args.get("max_pages")
|
||||
|
||||
def _get_name(self):
|
||||
# [source] [period] [language-country] [pages]
|
||||
return "google-general {} {}-{} pages={}".format(self.period, self.language, self.country, self.max_pages).replace("pages=None", "").strip()
|
||||
|
||||
def _fetch_raw_urls(self, keyword_search):
|
||||
try:
|
||||
# Initialize
|
||||
googlenews = GoogleNews(period=self.period, lang=self.language, region=self.country)
|
||||
googlenews.enableException(True)
|
||||
# Search
|
||||
googlenews.search(keyword_search)
|
||||
|
||||
set_links = set()
|
||||
# Iterate pages
|
||||
for i in range(self.max_pages):
|
||||
# Sleep between pages fetch
|
||||
time.sleep(int(os.getenv("FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP", 4)))
|
||||
# Number of URLs fetched so far
|
||||
num_before = len(set_links)
|
||||
# Get page
|
||||
try:
|
||||
links = googlenews.page_at(i+1)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching page - {}: {}".format(self._get_name(), str(e)))
|
||||
break
|
||||
# Links
|
||||
for l in links:
|
||||
# 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK'
|
||||
set_links.add( l.get("link").split("&ved=")[0] )
|
||||
# Finished?
|
||||
if (num_before == len(set_links)):
|
||||
break
|
||||
# To list
|
||||
urls = list(set_links)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}\n{}".format(self._get_name(), str(e)))
|
||||
urls = []
|
||||
return urls
|
||||
|
||||
class SearchGoogleNewsRSS(FetcherAbstract):
|
||||
def __init__(self, args={"language":"en", "country":"US"}):
|
||||
super().__init__()
|
||||
# Parameters
|
||||
self.language = args.get("language")
|
||||
self.country = args.get("country")
|
||||
|
||||
def _get_name(self):
|
||||
# [source] [language-country]
|
||||
return "googlenews-rss {}-{}".format(self.language, self.country).strip()
|
||||
|
||||
def _fetch_raw_urls(self, keyword_search):
|
||||
try:
|
||||
# Search URL with parameters filled: https://news.google.com/rss/search?q={}&hl=en-US&gl=US&ceid=US:en
|
||||
search_url = "https://news.google.com/rss/search?q={}&hl={}&gl={}&ceid={}:{}".format(keyword_search, "{}-{}".format(self.language, self.country.upper()), self.country.upper(), self.country.upper(), self.language)
|
||||
# Control characters
|
||||
search_url = search_url.replace(" ", "+") # urllib.parse.quote(search_url) # Issue: https%3A//news.google.com/rss/search%3Fq%3Dbreitbart.com%26hl%3Den-US%26gl%3DUS%26ceid%3DUS%3Aen
|
||||
# Initialize
|
||||
encoded_urls = []
|
||||
# Fetch feeds
|
||||
feeds = feedparser.parse(search_url)
|
||||
# Parse
|
||||
for f in feeds.get("entries", []):
|
||||
# Encoded URL
|
||||
encoded_url = f.get("link", None)
|
||||
'''
|
||||
# Available publish date?
|
||||
publish_date_parsed = f.get("published_parsed")
|
||||
if (publish_date_parsed is None):
|
||||
publish_date = f.get("published", None)
|
||||
if (publish_date is not None):
|
||||
publish_date_parsed = dateutil.parser.parse(publish_date)
|
||||
|
||||
# Published date
|
||||
urls_publish_date.append(publish_date_parsed)'
|
||||
'''
|
||||
# Append
|
||||
encoded_urls.append(encoded_url)
|
||||
|
||||
# Decode
|
||||
urls = decode_gnews_urls(encoded_urls)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}".format(self._get_name(), str(e)))
|
||||
urls = []
|
||||
|
||||
return urls
|
||||
###########################################################################
|
||||
|
||||
# List of instances
|
||||
ListSearchInstances = [SearchGNews, SearchDuckDuckGoNews, SearchGoogleNews, SearchDuckDuckGoGeneral, SearchGoogleGeneral, SearchGoogleNewsRSS]
|
||||
@@ -1,197 +0,0 @@
|
||||
from django.core.cache import cache
|
||||
import traceback
|
||||
import random
|
||||
import time
|
||||
import feedparser
|
||||
import urllib
|
||||
import dateutil
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
|
||||
from googlenewsdecoder import gnewsdecoder
|
||||
from gnews import GNews
|
||||
from duckduckgo_search import DDGS
|
||||
from GoogleNews import GoogleNews
|
||||
|
||||
###########################################################################
|
||||
def decode_gnews_urls(encoded_urls, interval=2):
|
||||
# DecodeURLs
|
||||
list_decoded_urls = []
|
||||
for url in encoded_urls:
|
||||
# Already cached?
|
||||
decoded_url = cache.get("gnews_decode_{}".format(url))
|
||||
if (decoded_url is not None):
|
||||
logger.debug("Already cached decoded URL: {} -> {}".format(url, decoded_url))
|
||||
# Append decoded URL
|
||||
list_decoded_urls.append(decoded_url)
|
||||
else:
|
||||
try:
|
||||
# Decode URL, with interval time to avoid block
|
||||
decoded_url_dict = gnewsdecoder(url, interval=interval)
|
||||
# Ok?
|
||||
if decoded_url_dict.get("status"):
|
||||
# Append decoded URL
|
||||
decoded_url = decoded_url_dict["decoded_url"]
|
||||
list_decoded_urls.append(decoded_url)
|
||||
# Cache decoded URL
|
||||
cache.set("gnews_decode_{}".format(url), decoded_url, timeout=60*60*12)
|
||||
else:
|
||||
logger.warning("Error decoding news.google.com, URL {}\nMessage: {}".format(url, str(decoded_url)))
|
||||
except Exception as e:
|
||||
logger.warning("Error decoding news.google.com, URL: {}\n{}".format(url, traceback.format_exc()))
|
||||
return list_decoded_urls
|
||||
|
||||
###########################################################################
|
||||
|
||||
def search_gnews(keyword_search, period="1d", language="en", country="US", max_results=100):
|
||||
# [source] [category] [period] [language-country] [max_results]
|
||||
source = "gnews {} {} {}-{} max_results={}".format("news", period, language, country, max_results).replace("None", "").strip()
|
||||
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
|
||||
|
||||
try:
|
||||
# Get news
|
||||
results_gnews = GNews(language=language, country=country).get_news(keyword_search)
|
||||
# Get list of encoded urls
|
||||
encoded_urls = [e.get("url") for e in results_gnews]
|
||||
# Decode
|
||||
logger.debug("Decoding gnews URLs")
|
||||
urls = decode_gnews_urls(encoded_urls)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
|
||||
urls = []
|
||||
return urls, source
|
||||
|
||||
###########################################################################
|
||||
|
||||
def search_ddg(keyword_search, category="news", timelimit="d", max_results=None, region="wt-wt"):
|
||||
# [source] [category] [period] [language-country] [max_results]
|
||||
source = "ddg {} {} {} max_results={}".format(category, timelimit, region, max_results).replace("max_results=None", "").strip()
|
||||
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
|
||||
|
||||
# region="{}-{}".format(langauge, country.lower())
|
||||
# timelimit= # Options: d, w, m
|
||||
# max_results # max number of results. If None, returns results only from the first response. Defaults to None
|
||||
|
||||
try:
|
||||
if (category == "news"):
|
||||
news = DDGS().news(keyword_search, region=region, timelimit=timelimit, max_results=max_results)
|
||||
urls = [e.get("url") for e in news]
|
||||
if (category == "text"):
|
||||
news = DDGS().text(keyword_search, region=region, timelimit=timelimit, max_results=max_results)
|
||||
urls = [e.get("href") for e in news]
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
|
||||
urls = []
|
||||
|
||||
return urls, source
|
||||
###########################################################################
|
||||
|
||||
def search_googlenews_news(keyword_search, period="1d", language="en", country="US"):
|
||||
category = "news"
|
||||
# [source] [category] [period] [language-country]
|
||||
source = "googlenews {} {} {}-{}".format(category, period, language, country).replace("None", "").strip()
|
||||
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
|
||||
|
||||
# Initialize
|
||||
googlenews = GoogleNews(period=period, lang=language, region=country)
|
||||
googlenews.enableException(True)
|
||||
|
||||
try:
|
||||
# Search
|
||||
googlenews.get_news(keyword_search)
|
||||
# Fetch
|
||||
encoded_urls = googlenews.get_links()
|
||||
# Decode
|
||||
logger.debug("Decoding gnews URLs")
|
||||
urls = decode_gnews_urls(encoded_urls)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
|
||||
urls = []
|
||||
|
||||
return urls, source
|
||||
|
||||
def search_googlenews_general(keyword_search, period="1d", language="en", country="US", max_pages=5):
|
||||
category="general"
|
||||
# [source] [category] [period] [language-country] [max_results]
|
||||
source = "googlenews {} {} {}-{} max_pages={}".format(category, period, language, country, max_pages).replace("None", "").strip()
|
||||
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
|
||||
|
||||
# Initialize
|
||||
googlenews = GoogleNews(period=period, lang=language, region=country)
|
||||
googlenews.enableException(True)
|
||||
|
||||
try:
|
||||
set_links = set()
|
||||
# Search
|
||||
googlenews.search(keyword_search)
|
||||
|
||||
# Iterate pages
|
||||
for i in range(max_pages):
|
||||
time.sleep(random.uniform(2, 4.5))
|
||||
num_before = len(set_links)
|
||||
|
||||
# Get page
|
||||
try:
|
||||
links = googlenews.page_at(i+1)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching page in GoogleNews {}: {}".format(source, str(e)))
|
||||
break
|
||||
# Links
|
||||
for l in links:
|
||||
# 'link': 'https://uk.news.yahoo.com/leaving-neverland-2-michael-jackson-lawyer-channel-4-102017088.html&ved=2ahUKEwjl38eJm5aMAxVvqJUCHXgnGzwQxfQBegQICRAC&usg=AOvVaw1osa6b3o_xXfcNinMDpLoK'
|
||||
set_links.add( l.get("link").split("&ved=")[0] )
|
||||
# Finished?
|
||||
if (num_before == len(set_links)):
|
||||
break
|
||||
# To list
|
||||
urls = list(set_links)
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
|
||||
urls = []
|
||||
|
||||
return urls, source
|
||||
|
||||
###########################################################################
|
||||
|
||||
def search_googlenews_rss(keyword_search, language="en", country="US"):
|
||||
# [source] [category] [period] [language-country] [max_results]
|
||||
source = "googlenews-rss {}-{}".format(language, country).replace("None", "").strip()
|
||||
logger.debug("Searching: {} --- Source:{}".format(keyword_search, source))
|
||||
|
||||
# https://news.google.com/rss/search?q={}&hl=en-US&gl=US&ceid=US:en
|
||||
|
||||
try:
|
||||
# Search URL with parameters filled
|
||||
search_url = "https://news.google.com/rss/search?q={}&hl={}&gl={}&ceid={}:{}".format(keyword_search, "{}-{}".format(language, country.upper()), country.upper(), country.upper(), language)
|
||||
# Control characters
|
||||
search_url = search_url.replace(" ", "+") # urllib.parse.quote(search_url) # Issue: https%3A//news.google.com/rss/search%3Fq%3Dbreitbart.com%26hl%3Den-US%26gl%3DUS%26ceid%3DUS%3Aen
|
||||
# Initialize
|
||||
encoded_urls = []
|
||||
# Fetch feeds
|
||||
feeds = feedparser.parse(search_url)
|
||||
# Parse
|
||||
for f in feeds.get("entries", []):
|
||||
# Encoded URL
|
||||
encoded_url = f.get("link", None)
|
||||
'''
|
||||
# Available publish date?
|
||||
publish_date_parsed = f.get("published_parsed")
|
||||
if (publish_date_parsed is None):
|
||||
publish_date = f.get("published", None)
|
||||
if (publish_date is not None):
|
||||
publish_date_parsed = dateutil.parser.parse(publish_date)
|
||||
|
||||
# Published date
|
||||
urls_publish_date.append(publish_date_parsed)'
|
||||
'''
|
||||
# Append
|
||||
encoded_urls.append(encoded_url)
|
||||
|
||||
# Decode
|
||||
urls = decode_gnews_urls(encoded_urls)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Exception fetching {}: {}\n{}".format(source, str(e), traceback.format_exc()))
|
||||
urls = []
|
||||
|
||||
return urls, source
|
||||
35
app_urls/api/src/fetch_utils.py
Normal file
35
app_urls/api/src/fetch_utils.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import traceback
|
||||
import os
|
||||
from django.core.cache import cache
|
||||
from .logger import get_logger
|
||||
logger = get_logger()
|
||||
from googlenewsdecoder import gnewsdecoder
|
||||
|
||||
|
||||
def decode_gnews_urls(encoded_urls, interval=int(os.getenv("FETCHER_GNEWS_DECODE_SLEEP", 2))):
|
||||
logger.debug("Decoding gnews URLs")
|
||||
# DecodeURLs
|
||||
list_decoded_urls = []
|
||||
for url in encoded_urls:
|
||||
# Already cached?
|
||||
decoded_url = cache.get("gnews_decode_{}".format(url))
|
||||
if (decoded_url is not None):
|
||||
logger.debug("Already cached decoded URL: {} -> {}".format(url, decoded_url))
|
||||
# Append decoded URL
|
||||
list_decoded_urls.append(decoded_url)
|
||||
else:
|
||||
try:
|
||||
# Decode URL, with interval time to avoid block
|
||||
decoded_url_dict = gnewsdecoder(url, interval=interval)
|
||||
# Ok?
|
||||
if decoded_url_dict.get("status"):
|
||||
# Append decoded URL
|
||||
decoded_url = decoded_url_dict["decoded_url"]
|
||||
list_decoded_urls.append(decoded_url)
|
||||
# Cache decoded URL
|
||||
cache.set("gnews_decode_{}".format(url), decoded_url, timeout=60*60*12)
|
||||
else:
|
||||
logger.warning("Error decoding news.google.com, URL {}".format(url))
|
||||
except Exception as e:
|
||||
logger.warning("Error decoding news.google.com, URL: {}\n{}".format(url, traceback.format_exc()))
|
||||
return list_decoded_urls
|
||||
@@ -1,34 +1,34 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
''' TODO: PATH LOGS
|
||||
PATH_LOGS_ERROR=logs/log_app_fetcher_error.log
|
||||
PATH_LOGS_INFO=logs/log_app_fetcher_info.log
|
||||
PATH_LOGS_DEBUG=logs/log_app_fetcher_debug.log
|
||||
# PATH_LOGS=logs/log_app_fetcher.log
|
||||
'''
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
# Get env var
|
||||
path_logs_parameterization = os.getenv("PATH_LOGS_PARAMETERIZATION", "logs/log_app_fetcher_{}.log")
|
||||
|
||||
# Directory of logs
|
||||
directory = '/'.join(path_logs_parameterization.split("/")[:-1])
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
|
||||
logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(message)s')
|
||||
logger = logging.getLogger("news_fetcher")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# To file log: INFO / WARNING / ERROR / CRITICAL
|
||||
fh = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_debug.log", mode="a", maxBytes=10000000, backupCount=4)
|
||||
fh = logging.handlers.RotatingFileHandler(filename=path_logs_parameterization.format("debug"), mode="a", maxBytes=10000000, backupCount=4)
|
||||
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
logger.addHandler(fh)
|
||||
|
||||
# To file log: INFO / WARNING / ERROR
|
||||
fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_info.log", mode="a", maxBytes=10000000, backupCount=2)
|
||||
fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
|
||||
fh_.setLevel(logging.INFO)
|
||||
logger.addHandler(fh_)
|
||||
fh = logging.handlers.RotatingFileHandler(filename=path_logs_parameterization.format("info"), mode="a", maxBytes=10000000, backupCount=2)
|
||||
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
|
||||
fh.setLevel(logging.INFO)
|
||||
logger.addHandler(fh)
|
||||
|
||||
# To file log: WARNING / ERROR / CRITICAL
|
||||
fh_ = logging.handlers.RotatingFileHandler(filename="logs/log_app_fetcher_error.log", mode="a", maxBytes=10000000, backupCount=1)
|
||||
fh_.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
|
||||
fh_.setLevel(logging.WARNING)
|
||||
logger.addHandler(fh_)
|
||||
fh = logging.handlers.RotatingFileHandler(filename=path_logs_parameterization.format("warning"), mode="a", maxBytes=10000000, backupCount=1)
|
||||
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
|
||||
fh.setLevel(logging.WARNING)
|
||||
logger.addHandler(fh)
|
||||
|
||||
def get_logger():
|
||||
return logger
|
||||
|
||||
@@ -3,6 +3,7 @@ from .logger import get_logger
|
||||
logger = get_logger()
|
||||
import newspaper
|
||||
import time
|
||||
import os
|
||||
from urllib.parse import unquote
|
||||
import langdetect
|
||||
langdetect.DetectorFactory.seed = 0
|
||||
@@ -40,11 +41,11 @@ def url_host_slowdown(url, url_host_slowdown_seconds):
|
||||
def process_url(url):
|
||||
try:
|
||||
# Slow down if required to avoid too many requests error
|
||||
url_host_slowdown(url, url_host_slowdown_seconds=5)
|
||||
url_host_slowdown(url, url_host_slowdown_seconds=int(os.getenv("FETCHER_URL_HOST_SLEEP", 5)))
|
||||
# Process
|
||||
article = newspaper.article(url)
|
||||
except newspaper.ArticleBinaryDataException:
|
||||
logger.warning("ArticleException for input URL {}\n{}".format(url, str(e)))
|
||||
logger.warning("ArticleException for input URL {}".format(url))
|
||||
return {"override_status": "invalid"}
|
||||
except newspaper.ArticleException as e:
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# from django_rq import job
|
||||
from scheduler import job
|
||||
|
||||
from .src.fetch_feed import FetchFeeds
|
||||
|
||||
@@ -1,607 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>News</title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
||||
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
||||
|
||||
<script>
|
||||
|
||||
function getQueryString(pageNumber, itemsNumber, sources, searches, statuses){
|
||||
// Query parameters. If input is null, get most recent value
|
||||
let queryParams = new URLSearchParams(window.location.search);
|
||||
// page
|
||||
if (pageNumber == null) pageNumber = queryParams.get("page") ?? 1;
|
||||
queryParams.set("page", pageNumber);
|
||||
// items
|
||||
if (itemsNumber == null) itemsNumber = queryParams.get("items") ?? 15;
|
||||
queryParams.set("items", itemsNumber);
|
||||
// sources
|
||||
if (sources == null) sources = queryParams.get("sources") ?? "all";
|
||||
queryParams.set("sources", sources);
|
||||
// searches
|
||||
if (searches == null) searches = queryParams.get("searches") ?? "all";
|
||||
queryParams.set("searches", searches);
|
||||
// status
|
||||
if (statuses == null) statuses = queryParams.get("status") ?? "all";
|
||||
queryParams.set("status", statuses);
|
||||
|
||||
// Encoding fix: %2C -> ,
|
||||
let queryParamsString = queryParams.toString();
|
||||
while (queryParamsString.includes("%2C")) {
|
||||
queryParamsString = queryParamsString.replace("%2C", ",");
|
||||
}
|
||||
return queryParamsString;
|
||||
}
|
||||
|
||||
function loadPage(pageNumber, itemsNumber, sources, searches, statuses) {
|
||||
$("#item-list").fadeTo(100, 0.5); // Smooth fade effect
|
||||
$("#loading").show();
|
||||
|
||||
queryParamsString = getQueryString(pageNumber, itemsNumber, sources, searches, statuses);
|
||||
|
||||
$.ajax({
|
||||
url: "?" + queryParamsString,
|
||||
type: "GET",
|
||||
headers: { "X-Requested-With": "XMLHttpRequest" },
|
||||
success: function (data) {
|
||||
$("#item-list").fadeTo(0, 1).html(data.items_html); // Restore opacity smoothly
|
||||
$("#loading").hide();
|
||||
// Update URL without reloading
|
||||
window.history.pushState({}, "", "?" + queryParamsString);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Pagination
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
$(document).on("click", ".pagination a", function (event) {
|
||||
event.preventDefault();
|
||||
let page = $(this).attr("data-page");
|
||||
loadPage(pageNumber=page, itemsNumber=null, sources=null, searches=null, statuses=null);
|
||||
});
|
||||
|
||||
$(document).ready(function () {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Filter updates
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
const sourcesToggleAll = $("#toggle-all-sources");
|
||||
const sourcesCheckboxes = $(".source-checkbox");
|
||||
const searchesToggleAll = $("#toggle-all-searches");
|
||||
const searchesCheckboxes = $(".search-checkbox");
|
||||
const statusesToggleAll = $("#toggle-all-status");
|
||||
const statusCheckboxes = $(".status-checkbox");
|
||||
|
||||
function updateFilters() {
|
||||
// Get selected sources
|
||||
if (sourcesToggleAll.prop("checked")) {
|
||||
selectedSources = "all";
|
||||
}
|
||||
else {
|
||||
if (sourcesCheckboxes.filter(":checked").length > 0 ){
|
||||
selectedSources = sourcesCheckboxes.filter(":checked").map(function () {
|
||||
return $(this).val();
|
||||
}).get().join(",");
|
||||
}
|
||||
else {
|
||||
selectedSources = "none";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Get selected searches
|
||||
if (searchesToggleAll.prop("checked")) {
|
||||
selectedSearches = "all";
|
||||
}
|
||||
else {
|
||||
if (searchesCheckboxes.filter(":checked").length > 0 ){
|
||||
selectedSearches = searchesCheckboxes.filter(":checked").map(function () {
|
||||
return $(this).val();
|
||||
}).get().join(",");
|
||||
}
|
||||
else {
|
||||
selectedSearches = "none";
|
||||
}
|
||||
}
|
||||
|
||||
// Get selected URL statuses
|
||||
if (statusesToggleAll.prop("checked")) {
|
||||
selectedStatuses = "all";
|
||||
}
|
||||
else {
|
||||
if (statusCheckboxes.filter(":checked").length > 0 ){
|
||||
selectedStatuses = statusCheckboxes.filter(":checked").map(function () {
|
||||
return $(this).val();
|
||||
}).get().join(",");
|
||||
}
|
||||
else {
|
||||
selectedStatuses = "none";
|
||||
}
|
||||
}
|
||||
|
||||
// Get selected items per page
|
||||
let selectedItems = $("input[name='items']:checked").val();
|
||||
|
||||
// Update pagination and reload data
|
||||
loadPage(1, selectedItems, selectedSources, selectedSearches, selectedStatuses);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Change triggers
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Sources
|
||||
sourcesToggleAll.on("change", function () {
|
||||
sourcesCheckboxes.prop("checked", sourcesToggleAll.prop("checked"));
|
||||
updateFilters();
|
||||
});
|
||||
sourcesCheckboxes.on("change", function () {
|
||||
sourcesToggleAll.prop("checked", sourcesCheckboxes.length === sourcesCheckboxes.filter(":checked").length);
|
||||
updateFilters();
|
||||
});
|
||||
// Searches
|
||||
searchesToggleAll.on("change", function () {
|
||||
searchesCheckboxes.prop("checked", searchesToggleAll.prop("checked"));
|
||||
updateFilters();
|
||||
});
|
||||
searchesCheckboxes.on("change", function () {
|
||||
searchesToggleAll.prop("checked", searchesCheckboxes.length === searchesCheckboxes.filter(":checked").length);
|
||||
updateFilters();
|
||||
});
|
||||
// Status
|
||||
statusesToggleAll.on("change", function () {
|
||||
statusCheckboxes.prop("checked", statusesToggleAll.prop("checked"));
|
||||
updateFilters();
|
||||
});
|
||||
statusCheckboxes.on("change", function () {
|
||||
// If all checkboxes are checked, mark "Toggle All" as checked
|
||||
statusesToggleAll.prop("checked", statusCheckboxes.length === statusCheckboxes.filter(":checked").length);
|
||||
updateFilters();
|
||||
});
|
||||
|
||||
// Items change trigger update
|
||||
$(".items").on("change", updateFilters);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Default values
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Sources
|
||||
sourcesCheckboxes.each(function () { $(this).prop("checked", true); });
|
||||
sourcesToggleAll.prop("checked", true);
|
||||
// Searches
|
||||
searchesCheckboxes.each(function () { $(this).prop("checked", true); });
|
||||
searchesToggleAll.prop("checked", true);
|
||||
// Statuses
|
||||
statusCheckboxes.each(function () { $(this).prop("checked", true); });
|
||||
statusesToggleAll.prop("checked", true);
|
||||
// Items
|
||||
// $("input[name='items'][value='" + 15 + "']").prop("checked", true);
|
||||
// loadPage(pageNumber=page, itemsNumber=null, sources=null, searches=null, statuses=null);
|
||||
});
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Theme logic
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
function setTheme(mode) {
|
||||
document.documentElement.setAttribute("data-theme", mode);
|
||||
document.documentElement.setAttribute("data-bs-theme", mode);
|
||||
localStorage.setItem("theme", mode);
|
||||
document.getElementById("theme-icon").innerHTML = mode === "dark" ? "🌞" : "🌙";
|
||||
document.body.classList.toggle("dark-mode", mode === "dark");
|
||||
}
|
||||
|
||||
function toggleTheme() {
|
||||
let currentTheme = document.documentElement.getAttribute("data-theme");
|
||||
setTheme(currentTheme === "dark" ? "light" : "dark");
|
||||
}
|
||||
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
let savedTheme = localStorage.getItem("theme") ||
|
||||
(window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light");
|
||||
setTheme(savedTheme);
|
||||
// Local browser timestamp aware for ts_fetch print
|
||||
document.querySelectorAll(".timestamp").forEach(function (el) {
|
||||
const ts = el.getAttribute("data-ts");
|
||||
if (ts) {
|
||||
const options = {
|
||||
day: "2-digit",
|
||||
month: "2-digit",
|
||||
year: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
second: "2-digit",
|
||||
hour12: false // Use 24-hour format
|
||||
}; // "en-GB" for DD-MM-YYYY
|
||||
const localDate = new Date(ts).toLocaleString("en-GB", options); // Adjust to browser's timezone
|
||||
el.innerHTML = `${localDate}`;
|
||||
}
|
||||
});
|
||||
});
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
</script>
|
||||
|
||||
<style>
|
||||
/* Content Area */
|
||||
#content {
|
||||
margin-left: 170px; /* Match sidebar width */
|
||||
min-width: calc(100vw - 170px); /* Ensure it doesn't shrink into the sidebar */
|
||||
width: calc(100vw - 170px); /* Expands based on screen size */
|
||||
padding: 20px;
|
||||
overflow-x: auto; /* Prevent content from being squeezed */
|
||||
transition: margin-left 0.3s ease;
|
||||
}
|
||||
|
||||
/* Sidebar Styles */
|
||||
#sidebar {
|
||||
height: 100vh;
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 170px; /* Default width */
|
||||
background-color: var(--bg-color);
|
||||
box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
|
||||
padding: 15px;
|
||||
transition: width 0.3s ease;
|
||||
/* Enable scrolling */
|
||||
overflow-y: auto;
|
||||
max-height: 100vh;
|
||||
}
|
||||
|
||||
#sidebar .nav-link {
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
#sidebar .nav-link:hover {
|
||||
background-color: var(--pagination-hover-bg);
|
||||
}
|
||||
|
||||
/* ============================= */
|
||||
/* Responsive Enhancements */
|
||||
/* ============================= */
|
||||
@media (min-width: 1200px) {
|
||||
.table {
|
||||
width: 95%; /* Allows table to take more space */
|
||||
margin: 0 auto; /* Centers the table */
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
#sidebar {
|
||||
width: 70px; /* Collapse sidebar to smaller width */
|
||||
/*padding: 10px;*/
|
||||
}
|
||||
|
||||
#content {
|
||||
margin-left: 70px; /* Adjust margin to match collapsed sidebar */
|
||||
min-width: calc(100vw - 70px); /* Prevent overlap */
|
||||
/*padding: 10px;*/
|
||||
}
|
||||
|
||||
/* Adjust table for small screens */
|
||||
.table-responsive {
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
.table th,
|
||||
.table td {
|
||||
white-space: nowrap; /* Prevent text wrapping in cells */
|
||||
}
|
||||
|
||||
.table a {
|
||||
word-break: break-word; /* Ensure long URLs break properly */
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================= */
|
||||
/* Global Styles */
|
||||
/* ============================= */
|
||||
body {
|
||||
background-color: var(--bg-color);
|
||||
color: var(--text-color);
|
||||
transition: background-color 0.3s, color 0.3s;
|
||||
}
|
||||
|
||||
/* ============================= */
|
||||
/* Light & Dark Mode Variables */
|
||||
/* ============================= */
|
||||
:root {
|
||||
--bg-color: #ffffff;
|
||||
--text-color: #212529;
|
||||
--table-bg: #ffffff;
|
||||
--table-text: #000000;
|
||||
--table-border: #dee2e6;
|
||||
--link-color: #007bff;
|
||||
--pagination-bg: #ffffff;
|
||||
--pagination-border: #dee2e6;
|
||||
--pagination-hover-bg: #f8f9fa;
|
||||
--pagination-active-bg: #007bff;
|
||||
--pagination-active-text: #ffffff;
|
||||
--button-bg: #f8f9fa;
|
||||
--button-border: #ced4da;
|
||||
--button-text: #212529;
|
||||
}
|
||||
|
||||
[data-theme="dark"] {
|
||||
--bg-color: #121212;
|
||||
--text-color: #e0e0e0;
|
||||
--table-bg: #1e1e1e;
|
||||
--table-text: #ffffff;
|
||||
--table-border: #2c2c2c;
|
||||
--link-color: #9ec5fe;
|
||||
--pagination-bg: #1e1e1e;
|
||||
--pagination-border: #444;
|
||||
--pagination-hover-bg: #333;
|
||||
--pagination-active-bg: #007bff;
|
||||
--pagination-active-text: #ffffff;
|
||||
--button-bg: #1e1e1e;
|
||||
--button-border: #444;
|
||||
--button-text: #e0e0e0;
|
||||
}
|
||||
|
||||
/* ============================= */
|
||||
/* Table Styling */
|
||||
/* ============================= */
|
||||
.table-responsive {
|
||||
width: 100%; /* Ensure it spans the full width of its container */
|
||||
max-width: 100%;
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
.table {
|
||||
background-color: var(--table-bg);
|
||||
color: var(--table-text);
|
||||
border: 1px solid var(--table-border);
|
||||
transition: background-color 0.3s, color 0.3s;
|
||||
|
||||
width: 100%; /* Ensures it takes full width of its container */
|
||||
table-layout: auto; /* Allows columns to adjust dynamically */
|
||||
/*white-space: nowrap;*/ /* Prevents text wrapping in cells */
|
||||
}
|
||||
|
||||
.table th,
|
||||
.table td {
|
||||
border-color: var(--table-border);
|
||||
}
|
||||
|
||||
.table thead {
|
||||
background-color: var(--pagination-active-bg);
|
||||
color: var(--pagination-active-text);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .table {
|
||||
background-color: var(--table-bg);
|
||||
color: var(--table-text);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .table th,
|
||||
[data-theme="dark"] .table td {
|
||||
border-color: var(--table-border);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .table thead {
|
||||
background-color: #333;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
th:nth-child(1), td:nth-child(1) { width: 50%; } /* URL column */
|
||||
th:nth-child(2), td:nth-child(2) { width: 27.5%; } /* Fetch Date */
|
||||
th:nth-child(3), td:nth-child(3) { width: 10%; } /* Sources */
|
||||
th:nth-child(4), td:nth-child(4) { width: 10%; } /* Searches */
|
||||
th:nth-child(5), td:nth-child(5) { width: 2.5%; } /* Status */
|
||||
|
||||
/* ============================= */
|
||||
/* Pagination Styling */
|
||||
/* ============================= */
|
||||
.pagination {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
padding: 10px 0;
|
||||
}
|
||||
|
||||
.pagination .page-link {
|
||||
background-color: var(--pagination-bg);
|
||||
border-color: var(--pagination-border);
|
||||
color: var(--text-color);
|
||||
padding: 10px 14px;
|
||||
margin: 0 5px;
|
||||
border-radius: 8px;
|
||||
transition: background-color 0.3s, color 0.3s, transform 0.2s;
|
||||
}
|
||||
|
||||
.pagination .page-link:hover {
|
||||
background-color: var(--pagination-hover-bg);
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.pagination .active .page-link {
|
||||
background-color: var(--pagination-active-bg);
|
||||
color: var(--pagination-active-text);
|
||||
border-color: var(--pagination-active-bg);
|
||||
}
|
||||
|
||||
/* ============================= */
|
||||
/* Theme Toggle Button */
|
||||
/* ============================= */
|
||||
.theme-toggle-btn {
|
||||
background-color: var(--button-bg);
|
||||
border: 1px solid var(--button-border);
|
||||
color: var(--button-text);
|
||||
border-radius: 50%;
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
font-size: 20px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
transition: background-color 0.3s, color 0.3s, transform 0.2s;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.theme-toggle-btn:hover {
|
||||
background-color: var(--pagination-hover-bg);
|
||||
transform: rotate(20deg);
|
||||
}
|
||||
|
||||
.theme-toggle-btn:active {
|
||||
transform: scale(0.95);
|
||||
}
|
||||
|
||||
/* ============================= */
|
||||
/* Loading Spinner Styling */
|
||||
/* ============================= */
|
||||
#loading {
|
||||
position: fixed;
|
||||
left: 50%;
|
||||
top: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
z-index: 1050;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.spinner-border {
|
||||
width: 4rem;
|
||||
height: 4rem;
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!-- Left Sidebar -->
|
||||
<div id="sidebar" class="d-flex flex-column">
|
||||
<ul class="nav flex-column">
|
||||
|
||||
<!-- Theme Toggle Button -->
|
||||
<div class="nav-item">
|
||||
<button onclick="toggleTheme()" class="theme-toggle-btn">
|
||||
<span id="theme-icon">🌙</span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- URLs per page -->
|
||||
<div class="nav-item mt-3">
|
||||
<strong>URLs per page</strong>
|
||||
<div class="card-body">
|
||||
<!-- Individual Status Checkboxes -->
|
||||
{% for url_per_page in list_urls_per_page %}
|
||||
<div class="items-form-check">
|
||||
<input class="form-check-input items" type="radio" name="items" id="value-{{ url_per_page }}" value="{{ url_per_page }}">
|
||||
<label class="form-check-label" for="value-{{ url_per_page }}">{{ url_per_page }}</label>
|
||||
</div>
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="2" class="text-center">No options available.</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Status -->
|
||||
<div class="nav-item mt-3">
|
||||
<strong>Select status</strong>
|
||||
<form id="status-filter-form">
|
||||
<!-- Toggle All Checkbox -->
|
||||
<div class="status-form-check">
|
||||
<input class="form-check-input" type="checkbox" id="toggle-all-status">
|
||||
<label class="form-check-label fw-bold" for="toggle-all-status">
|
||||
Toggle all
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<!-- Individual Status Checkboxes -->
|
||||
{% for status in list_status %}
|
||||
<div class="status-form-check">
|
||||
<input class="form-check-input status-checkbox" type="checkbox" value="{{ status }}" id="status-{{ status }}">
|
||||
<label class="form-check-label" for="status-{{ status }}">
|
||||
{{ status }}
|
||||
</label>
|
||||
</div>
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="2" class="text-center">No statuses available.</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- Sources -->
|
||||
<div class="nav-item mt-3">
|
||||
<strong>Select sources</strong>
|
||||
<form id="source-filter-form">
|
||||
<!-- Toggle All Checkbox -->
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="toggle-all-sources">
|
||||
<label class="form-check-label fw-bold" for="toggle-all-sources">
|
||||
Toggle all
|
||||
</label>
|
||||
</div>
|
||||
<!-- Individual Source Checkboxes -->
|
||||
{% for source in sources %}
|
||||
<div class="form-check">
|
||||
<input class="form-check-input source-checkbox" type="checkbox" value="{{ source.id }}" id="source-{{ source.id }}">
|
||||
<label class="form-check-label" for="source-{{ source.id }}">
|
||||
{{ source.source }}
|
||||
</label>
|
||||
</div>
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="2" class="text-center">No sources available.</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- Searches -->
|
||||
<div class="nav-item mt-3">
|
||||
<strong>Select searches</strong>
|
||||
<form id="search-filter-form">
|
||||
<!-- Toggle All Checkbox -->
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="toggle-all-searches">
|
||||
<label class="form-check-label fw-bold" for="toggle-all-searches">
|
||||
Toggle all
|
||||
</label>
|
||||
</div>
|
||||
<!-- Individual Search Checkboxes -->
|
||||
{% for search in searches %}
|
||||
<div class="form-check">
|
||||
<input class="form-check-input search-checkbox" type="checkbox" value="{{ search.id }}" id="search-{{ search.id }}">
|
||||
<label class="form-check-label" for="search-{{ search.id }}">
|
||||
[{{ search.type }}] {{ search.search }}
|
||||
</label>
|
||||
</div>
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="2" class="text-center">No search available.</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Main Content Area -->
|
||||
<div id="content" class="main-content">
|
||||
<div class="container mt-4">
|
||||
|
||||
<!-- Table -->
|
||||
<div id="item-list">
|
||||
{% include 'urls_partial.html' %}
|
||||
</div>
|
||||
<!-- Loading... -->
|
||||
<div id="loading" class="text-center mt-3" style="display:none;">
|
||||
<div class="spinner-border text-primary" role="status">
|
||||
<span class="visually-hidden">Loading...</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,97 +0,0 @@
|
||||
{% load custom_filters %}
|
||||
|
||||
<div class="table-responsive">
|
||||
<table class="table table-hover">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col"><strong>URL</strong></th>
|
||||
<th scope="col"><strong>Fetch date</strong></th>
|
||||
<th scope="col"><strong>Sources</strong></th>
|
||||
<th scope="col"><strong>Search</strong></th>
|
||||
<th scope="col"><strong>Status</strong></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for item in page_obj %}
|
||||
<tr>
|
||||
<td>
|
||||
<a href="./{{ item.id }}" class="btn btn-primary btn-sm" target="_blank">➤ </a>
|
||||
<a href="{{ item.url }}/" target="_blank">{{ item.url }}</a>
|
||||
</td>
|
||||
<td class="timestamp" data-ts="{{ item.ts_fetch|date:'c' }}">{{ item.ts_fetch }}</td>
|
||||
<td>
|
||||
{% with sources_map|dict_get:item.id as sources %}
|
||||
{% if sources %}
|
||||
{% for source in sources %}
|
||||
<span class="badge bg-secondary">{{ source }}</span>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<span class="text-muted">No sources</span>
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
</td>
|
||||
<td>
|
||||
{% with searches_map|dict_get:item.id as searches %}
|
||||
{% if searches %}
|
||||
{% for search in searches %}
|
||||
<span class="badge bg-secondary">{{ search }}</span>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<span class="text-muted">No searches</span>
|
||||
{% endif %}
|
||||
{% endwith %}
|
||||
</td>
|
||||
<td>
|
||||
{% if item.status == 'raw' %}
|
||||
<span class="badge bg-secondary">{{ item.status|capfirst }}</span>
|
||||
{% elif item.status == 'error' %}
|
||||
<span class="badge bg-danger">{{ item.status|capfirst }}</span>
|
||||
{% elif item.status == 'valid' %}
|
||||
<span class="badge bg-success">{{ item.status|capfirst }}</span>
|
||||
{% elif item.status == 'unknown' %}
|
||||
<span class="badge bg-warning">{{ item.status|capfirst }}</span>
|
||||
{% elif item.status == 'invalid' %}
|
||||
<span class="badge bg-danger">{{ item.status|capfirst }}</span>
|
||||
{% elif item.status == 'duplicate' %}
|
||||
<span class="badge bg-info">{{ item.status|capfirst }}</span>
|
||||
{% else %}
|
||||
<span class="badge bg-light">Unknown</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="4" class="text-center">No items available.</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="d-flex justify-content-center mt-3">
|
||||
<nav>
|
||||
<ul class="pagination">
|
||||
{% if page_obj.has_previous %}
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="#" data-page="1">First</a>
|
||||
</li>
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="#" data-page="{{ page_obj.previous_page_number }}">Previous</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
|
||||
<li class="page-item active">
|
||||
<span class="page-link">Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}</span>
|
||||
</li>
|
||||
|
||||
{% if page_obj.has_next %}
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="#" data-page="{{ page_obj.next_page_number }}">Next</a>
|
||||
</li>
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="#" data-page="{{ page_obj.paginator.num_pages }}">Last</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
@@ -258,10 +258,7 @@ input[type="checkbox"] {
|
||||
<span id="offText" class="off-text">OFF</span>
|
||||
</span>
|
||||
</div>
|
||||
-->
|
||||
|
||||
|
||||
|
||||
-->
|
||||
|
||||
<!-- Pages Per Page Dropdown -->
|
||||
<h3>Pages Per Page</h3>
|
||||
@@ -291,28 +288,14 @@ input[type="checkbox"] {
|
||||
|
||||
<!-- Filter by Status -->
|
||||
<h3>Status</h3>
|
||||
<!--
|
||||
<label for="toggle-all-checkbox">
|
||||
<input type="checkbox" id="toggle-all-checkbox" class="toggle-all-checkbox"> Toggle All
|
||||
</label><br>
|
||||
{% for status in statuses %}
|
||||
<label>
|
||||
<input type="checkbox" name="status" value="{{ status.0 }}"
|
||||
{% if status.0 in selected_status %}checked{% endif %}
|
||||
class="status-checkbox">
|
||||
{{ status.1 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
-->
|
||||
<button type="button" class="toggle-all-btn" data-toggle="status">Toggle All</button><br>
|
||||
{% for status in statuses %}
|
||||
<label>
|
||||
<input type="checkbox" name="status" value="{{ status.0 }}"
|
||||
{% if status.0 in selected_status %}checked{% endif %}>
|
||||
{% if status.0 in selected_status or 'all' in selected_status %}checked{% endif %}>
|
||||
{{ status.1 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
|
||||
|
||||
<!-- Filter by Search -->
|
||||
<h3>Search</h3>
|
||||
@@ -320,11 +303,10 @@ input[type="checkbox"] {
|
||||
{% for search in searches %}
|
||||
<label>
|
||||
<input type="checkbox" name="search" value="{{ search.id }}"
|
||||
{% if search.id|stringformat:"s" in selected_search %}checked{% endif %}>
|
||||
{% if search.id|stringformat:"s" in selected_search or 'all' in selected_search %}checked{% endif %}>
|
||||
[{{ search.type }}] {{ search.search|truncatechars:50 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
|
||||
|
||||
<!-- Filter by Source -->
|
||||
<h3>Source</h3>
|
||||
@@ -332,7 +314,7 @@ input[type="checkbox"] {
|
||||
{% for source in sources %}
|
||||
<label>
|
||||
<input type="checkbox" name="source" value="{{ source.id }}"
|
||||
{% if source.id|stringformat:"s" in selected_source %}checked{% endif %}>
|
||||
{% if source.id|stringformat:"s" in selected_source or 'all' in selected_source %}checked{% endif %}>
|
||||
{{ source.source|truncatechars:50 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
@@ -343,7 +325,7 @@ input[type="checkbox"] {
|
||||
{% for lang in languages %}
|
||||
<label>
|
||||
<input type="checkbox" name="language" value="{{ lang }}"
|
||||
{% if lang|stringformat:"s" in selected_lang %}checked{% endif %}>
|
||||
{% if lang|stringformat:"s" in selected_language or 'all' in selected_language%}checked{% endif %}>
|
||||
{{ lang|truncatechars:50 }}
|
||||
</label><br>
|
||||
{% endfor %}
|
||||
@@ -456,6 +438,7 @@ input[type="checkbox"] {
|
||||
</div>
|
||||
|
||||
<script>
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
//////////////////////////////////////////////
|
||||
@@ -514,16 +497,40 @@ input[type="checkbox"] {
|
||||
});
|
||||
});
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Function to update the form parameter before submitting
|
||||
function updateFormParameter(section) {
|
||||
const checkboxes = document.querySelectorAll(`[name='${section}']`);
|
||||
const allChecked = Array.from(checkboxes).every(checkbox => checkbox.checked);
|
||||
|
||||
// If all are checked, replace them with a hidden input with value "all"
|
||||
if (allChecked) {
|
||||
checkboxes.forEach(checkbox => checkbox.removeAttribute("name"));
|
||||
let hiddenInput = document.createElement("input");
|
||||
hiddenInput.type = "hidden";
|
||||
hiddenInput.name = section;
|
||||
hiddenInput.value = "all";
|
||||
document.getElementById("filterForm").appendChild(hiddenInput);
|
||||
} else {
|
||||
checkboxes.forEach(checkbox => checkbox.setAttribute("name", section));
|
||||
document.querySelectorAll(`input[name="${section}"][type="hidden"]`).forEach(hiddenInput => hiddenInput.remove());
|
||||
}
|
||||
|
||||
// Submit form after changes
|
||||
document.getElementById("filterForm").submit();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Function to toggle all checkboxes in a section
|
||||
function toggleCheckboxes(section) {
|
||||
const checkboxes = document.querySelectorAll(`[name='${section}']`);
|
||||
const allChecked = Array.from(checkboxes).every(checkbox => checkbox.checked);
|
||||
checkboxes.forEach(checkbox => {
|
||||
checkbox.checked = !allChecked;
|
||||
});
|
||||
checkboxes.forEach(cb => cb.checked = !allChecked);
|
||||
/*
|
||||
// Automatically submit the form when a checkbox is toggled
|
||||
document.getElementById('filterForm').submit();
|
||||
*/
|
||||
updateFormParameter(section);
|
||||
}
|
||||
|
||||
// Attach event listeners to "Toggle All" buttons
|
||||
@@ -533,13 +540,15 @@ input[type="checkbox"] {
|
||||
toggleCheckboxes(section);
|
||||
});
|
||||
});
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Automatically submit the form when any checkbox changes
|
||||
document.querySelectorAll('input[type="checkbox"]').forEach(function(checkbox) {
|
||||
checkbox.addEventListener('change', function() {
|
||||
/*
|
||||
document.getElementById('filterForm').submit();
|
||||
*/
|
||||
updateFormParameter(this.name);
|
||||
});
|
||||
});
|
||||
document.getElementById('perPageSelect').addEventListener('change', function() {
|
||||
@@ -548,20 +557,6 @@ input[type="checkbox"] {
|
||||
document.getElementById('timeFilterSelect').addEventListener('change', function() {
|
||||
document.getElementById('filterForm').submit();
|
||||
});
|
||||
|
||||
/*
|
||||
document.getElementById('tableRadio').addEventListener('change', function() {
|
||||
document.getElementById('tableViewContent').classList.remove('d-none');
|
||||
document.getElementById('chartViewContent').classList.add('d-none');
|
||||
document.getElementById('filterForm').submit();
|
||||
});
|
||||
|
||||
document.getElementById('chartRadio').addEventListener('change', function() {
|
||||
document.getElementById('chartViewContent').classList.remove('d-none');
|
||||
document.getElementById('tableViewContent').classList.add('d-none');
|
||||
document.getElementById('filterForm').submit();
|
||||
});
|
||||
*/
|
||||
|
||||
|
||||
</script>
|
||||
|
||||
@@ -4,9 +4,9 @@ from . import views
|
||||
urlpatterns = [
|
||||
path('', views.link_list, name='link_list'),
|
||||
#
|
||||
path('logs_debug', views.logs_debug, name='logs_debug'),
|
||||
path('logs_info', views.logs_info, name='logs_info'),
|
||||
path('logs_error', views.logs_error, name='logs_error'),
|
||||
path('logs/<str:log_type>', views.logs, name='logs'),
|
||||
#
|
||||
path('task/<str:task>', views.trigger_task, name='trigger_task'),
|
||||
#
|
||||
path('charts/', views.charts, name='charts'),
|
||||
path('urls-by-fetch-date/', views.urls_by_fetch_date, name='urls_by_fetch_date'),
|
||||
@@ -17,10 +17,4 @@ urlpatterns = [
|
||||
path('urls/', views.filtered_urls, name='filtered_urls'),
|
||||
path('urls/<int:id>/', views.url_detail_view, name='url_detail'),
|
||||
path('urls/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
|
||||
#
|
||||
#path('url/', views.urls, name='url_detail'),
|
||||
#path('url/<int:id>/', views.url_detail_view, name='url_detail'),
|
||||
#path('url/<int:id>/fetch/', views.fetch_details, name='fetch_details'),
|
||||
#
|
||||
path('task/<str:task>', views.trigger_task, name='trigger_task'),
|
||||
]
|
||||
|
||||
@@ -23,9 +23,9 @@ def link_list(request):
|
||||
# Admin panel
|
||||
"http://localhost:8000/admin",
|
||||
# Logs
|
||||
"http://localhost:8000/logs_debug",
|
||||
"http://localhost:8000/logs_info",
|
||||
"http://localhost:8000/logs_error",
|
||||
"http://localhost:8000/logs/debug",
|
||||
"http://localhost:8000/logs/info",
|
||||
"http://localhost:8000/logs/error",
|
||||
# URLs
|
||||
"http://localhost:8000/urls",
|
||||
# Charts
|
||||
@@ -36,17 +36,13 @@ def link_list(request):
|
||||
return JsonResponse({"links": list_links })
|
||||
|
||||
####################################################################################################
|
||||
def logs_error(request):
|
||||
with open(os.getenv("PATH_LOGS_ERROR", "logs/log_app_fetcher_error.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
def logs_info(request):
|
||||
with open(os.getenv("PATH_LOGS_INFO", "logs/log_app_fetcher_info.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
def logs_debug(request):
|
||||
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_debug.log"), "r") as f:
|
||||
file_content = f.read()
|
||||
def logs(request, log_type):
|
||||
# Capture output: python manage.py rqstats
|
||||
try:
|
||||
with open(os.getenv("PATH_LOGS_DEBUG", "logs/log_app_fetcher_{}.log".format(log_type)), "r") as f:
|
||||
file_content = f.read()
|
||||
except Exception as e:
|
||||
file_content = "Error reading logs for log type :{}".format(log_type)
|
||||
return HttpResponse(file_content, content_type="text/plain")
|
||||
|
||||
####################################################################################################
|
||||
@@ -208,48 +204,77 @@ from .models import Urls, Search, Source
|
||||
from django.db.models import Q
|
||||
from django.utils.timezone import now, timedelta
|
||||
|
||||
|
||||
def filtered_urls(request):
|
||||
statuses = Urls.STATUS_ENUM.choices
|
||||
searches = Search.objects.all()
|
||||
sources = Source.objects.all()
|
||||
# TODO: Cache languages, update once every N
|
||||
languages = UrlContent.objects.distinct('language').values_list('language', flat=True)
|
||||
# languages = [l for l in languages if l is not None]
|
||||
languages = list(UrlContent.objects.distinct('language').values_list('language', flat=True))
|
||||
# Null for visualization
|
||||
languages = ["Null"] + [l for l in languages if l is not None]
|
||||
|
||||
# Get selected parameters
|
||||
selected_status = request.GET.getlist('status')
|
||||
selected_search = request.GET.getlist('search')
|
||||
selected_source = request.GET.getlist('source')
|
||||
selected_language = request.GET.getlist('language')
|
||||
selected_status = request.GET.getlist('status', ["null"])
|
||||
selected_search = request.GET.getlist('search', ["null"])
|
||||
selected_source = request.GET.getlist('source', ["null"])
|
||||
selected_language = request.GET.getlist('language', ["null"])
|
||||
selected_days = request.GET.get("days", 30)
|
||||
per_page = request.GET.get('per_page', 100) # Default is X URLs per page
|
||||
page_number = request.GET.get('page') # Get the current page number
|
||||
|
||||
all_status = [str(status[0]) for status in statuses]
|
||||
all_search = [str(search.id) for search in searches]
|
||||
all_source = [str(source.id) for source in sources]
|
||||
all_languages = languages
|
||||
|
||||
# Override with default filters? [Case: no params update on URL] -> Only on "Home" click, or "Next page"
|
||||
if (len(request.GET.keys()) == 0) or ((len(request.GET.keys()) == 1) and ("page" in request.GET.keys())):
|
||||
selected_status = [str(status[0]) for status in statuses]
|
||||
selected_search = [str(search.id) for search in searches]
|
||||
selected_source = [str(source.id) for source in sources]
|
||||
selected_language = languages
|
||||
selected_status = ["all"]
|
||||
selected_search = ["all"]
|
||||
selected_source = ["all"]
|
||||
selected_language = ["all"]
|
||||
|
||||
|
||||
# print(set(selected_status), set(all_status))
|
||||
"""
|
||||
# List of TODO remove...
|
||||
if (set(selected_status) == set(all_status)):
|
||||
selected_status = ["all"]
|
||||
if (set(selected_search) == set(all_search)):
|
||||
selected_search = ["all"]
|
||||
if (set(selected_source) == set(all_source)):
|
||||
selected_source = ["all"]
|
||||
if (set(selected_language) == set(languages)):
|
||||
selected_language = ["all"]"
|
||||
"""
|
||||
|
||||
# Filter URLs based on selected filters
|
||||
if ('' in selected_status) or ('' in selected_search) or ('' in selected_source):
|
||||
if ('null' in selected_status) or ('null' in selected_search) or ('null' in selected_source) or ('null' in selected_language):
|
||||
urls = []
|
||||
else:
|
||||
query = Q(urlssourcesearch__id_source__in=selected_source) & \
|
||||
Q(urlssourcesearch__id_search__in=selected_search) & \
|
||||
Q(status__in=selected_status) & \
|
||||
Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
|
||||
|
||||
if selected_language:
|
||||
query &= Q(urlcontent__language__in=selected_language)
|
||||
# Filter by date
|
||||
query = Q(ts_fetch__gte=now() - timedelta(days=float(selected_days)))
|
||||
# Additional filters
|
||||
if ("all" not in selected_status):
|
||||
query &= Q(status__in=selected_status)
|
||||
if ("all" not in selected_source):
|
||||
query &= Q(urlssourcesearch__id_source__in=selected_source)
|
||||
if ("all" not in selected_search):
|
||||
query &= Q(urlssourcesearch__id_search__in=selected_search)
|
||||
if ("all" not in selected_language):
|
||||
# URLs with selected languages
|
||||
subquery = Q(urlcontent__language__in=selected_language)
|
||||
if ("Null" in selected_language):
|
||||
# URLs with NULL language
|
||||
subquery |= Q(urlcontent__language__isnull=True)
|
||||
# URLs with no UrlContent record at all (similar to URLs with NULL language)
|
||||
subquery |= Q(urlcontent__id_url__isnull=True)
|
||||
# Update query
|
||||
query &= (subquery)
|
||||
|
||||
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
|
||||
|
||||
# Custom replace search type
|
||||
for s in searches:
|
||||
s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")
|
||||
|
||||
# Pagination
|
||||
paginator = Paginator(urls, per_page) # Paginate the filtered URLs
|
||||
page_obj = paginator.get_page(page_number) # Get the current page object
|
||||
@@ -264,6 +289,9 @@ def filtered_urls(request):
|
||||
url_content_map = {
|
||||
url.id: UrlContent.objects.filter(pk=url).first() for url in page_obj.object_list
|
||||
}
|
||||
# Custom replace search type text
|
||||
for s in searches:
|
||||
s.type = s.type.replace("rss_feed", "rss").replace("url_host", "url").replace("keyword_search", "keyword")
|
||||
|
||||
context = {
|
||||
'urls': page_obj, # Pass the paginated URLs
|
||||
|
||||
@@ -18,7 +18,6 @@ BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY = 'django-insecure-54mqLbW5NlO8OlVDsT3fcbg3Vf6C8Fgcoj8H0hXv3Pr8bpgqvOuiaeqvGn34sGwt'
|
||||
@@ -74,7 +73,6 @@ WSGI_APPLICATION = 'core.wsgi.application'
|
||||
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
@@ -110,30 +108,26 @@ SCHEDULER_QUEUES = {
|
||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
||||
'DB': os.environ.get("REDIS_DB", 0),
|
||||
'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),
|
||||
},
|
||||
'high': {
|
||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
||||
'DB': os.environ.get("REDIS_DB", 0),
|
||||
'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),
|
||||
},
|
||||
'low': {
|
||||
'HOST': os.environ.get("REDIS_HOST", "localhost"),
|
||||
'PORT': os.environ.get("REDIS_PORT", 6379),
|
||||
'DB': os.environ.get("REDIS_DB", 0),
|
||||
'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15),
|
||||
}
|
||||
}
|
||||
SCHEDULER_CONFIG = {
|
||||
'EXECUTIONS_IN_PAGE': 20,
|
||||
'DEFAULT_TIMEOUT': os.environ.get("JOB_DEFAULT_TIMEOUT", 60*30), # 15 minutes
|
||||
'DEFAULT_RESULT_TTL': 60*60*12, # 12 hours
|
||||
'DEFAULT_TIMEOUT': os.environ.get("RQ_DEFAULT_TIMEOUT", 60*15), # 15 minutes
|
||||
'EXECUTIONS_IN_PAGE': 20,
|
||||
'SCHEDULER_INTERVAL': 10, # 10 seconds
|
||||
}
|
||||
|
||||
# Password validation
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
@@ -152,7 +146,6 @@ AUTH_PASSWORD_VALIDATORS = [
|
||||
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/5.1/topics/i18n/
|
||||
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
|
||||
@@ -164,11 +157,9 @@ USE_TZ = True
|
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
# https://docs.djangoproject.com/en/5.1/howto/static-files/
|
||||
|
||||
STATIC_URL = 'static/'
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
|
||||
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||
|
||||
@@ -1,46 +1,4 @@
|
||||
[
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Fetch Feeds",
|
||||
"callable": "api.tasks.fetch_feeds",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-03-27T14:33:56+00:00",
|
||||
"interval": 15,
|
||||
"interval_unit": "minutes",
|
||||
"successful_runs": 215,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-03-27 14:18:58.028684+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Process raw URLs",
|
||||
"callable": "api.tasks.process_raw_urls",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "low",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-03-27T14:35:08+00:00",
|
||||
"interval": 1,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 41,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-03-27 13:35:48.534489+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Process error URLs",
|
||||
@@ -54,54 +12,12 @@
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-03-27T16:36:21+00:00",
|
||||
"scheduled_time": "2025-04-01T12:36:21+00:00",
|
||||
"interval": 4,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 10,
|
||||
"successful_runs": 15,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-03-27 12:37:28.301866+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Fetch Parser",
|
||||
"callable": "api.tasks.fetch_parser",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-03-27T14:25:42+00:00",
|
||||
"interval": 1,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 44,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-03-27 13:25:46.205433+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Fetch Search",
|
||||
"callable": "api.tasks.fetch_search",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-03-27T14:29:33+00:00",
|
||||
"interval": 1,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 46,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-03-27 13:33:00.628827+00:00",
|
||||
"last_successful_run": "2025-04-01 08:37:06.722770+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
@@ -117,12 +33,117 @@
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-03-27T14:37:50+00:00",
|
||||
"scheduled_time": "2025-04-01T10:37:50+00:00",
|
||||
"interval": 2,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 20,
|
||||
"successful_runs": 29,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-03-27 12:38:42.545373+00:00",
|
||||
"last_successful_run": "2025-04-01 08:42:05.864064+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Process MissingKids URLs ALL",
|
||||
"callable": "api.tasks.process_missing_kids_urls_all",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": null,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-04-07T15:59:49+00:00",
|
||||
"interval": 1,
|
||||
"interval_unit": "weeks",
|
||||
"successful_runs": 0,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": null,
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Fetch Feeds",
|
||||
"callable": "api.tasks.fetch_feeds",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-04-01T10:18:56+00:00",
|
||||
"interval": 15,
|
||||
"interval_unit": "minutes",
|
||||
"successful_runs": 288,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-04-01 10:03:58.363856+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Process raw URLs",
|
||||
"callable": "api.tasks.process_raw_urls",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "low",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-04-01T10:20:08+00:00",
|
||||
"interval": 15,
|
||||
"interval_unit": "minutes",
|
||||
"successful_runs": 78,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-04-01 10:05:08.394472+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Fetch Parser",
|
||||
"callable": "api.tasks.fetch_parser",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-04-01T10:25:42+00:00",
|
||||
"interval": 1,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 62,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-04-01 09:25:57.977051+00:00",
|
||||
"last_failed_run": null
|
||||
},
|
||||
{
|
||||
"model": "RepeatableTaskType",
|
||||
"name": "Fetch Search",
|
||||
"callable": "api.tasks.fetch_search",
|
||||
"callable_args": [],
|
||||
"callable_kwargs": [],
|
||||
"enabled": true,
|
||||
"queue": "default",
|
||||
"repeat": null,
|
||||
"at_front": false,
|
||||
"timeout": null,
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-04-01T10:29:33+00:00",
|
||||
"interval": 1,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 63,
|
||||
"failed_runs": 0,
|
||||
"last_successful_run": "2025-04-01 09:37:20.671072+00:00",
|
||||
"last_failed_run": null
|
||||
}
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user