Views update URLs list, job timeout, url content fail debug
This commit is contained in:
@@ -151,26 +151,30 @@ class DB_Handler():
|
|||||||
# Update status
|
# Update status
|
||||||
set_status(obj_url, Urls.STATUS_ENUM.VALID)
|
set_status(obj_url, Urls.STATUS_ENUM.VALID)
|
||||||
|
|
||||||
# Create or update extracted URL data
|
try:
|
||||||
UrlContent.objects.update_or_create(
|
# Create or update extracted URL data
|
||||||
id_url=obj_url,
|
UrlContent.objects.update_or_create(
|
||||||
defaults = {
|
id_url=obj_url,
|
||||||
"date_published" : dict_url_data.get("publish_date"),
|
defaults = {
|
||||||
"title" : dict_url_data.get("title"),
|
"date_published" : dict_url_data.get("publish_date"),
|
||||||
"description" : dict_url_data.get("description"),
|
"title" : dict_url_data.get("title"),
|
||||||
"content" : dict_url_data.get("content"),
|
"description" : dict_url_data.get("description"),
|
||||||
"valid_content" : dict_url_data.get("valid_content"),
|
"content" : dict_url_data.get("content"),
|
||||||
"language" : dict_url_data.get("language"),
|
"valid_content" : dict_url_data.get("valid_content"),
|
||||||
"keywords" : dict_url_data.get("keywords"),
|
"language" : dict_url_data.get("language"),
|
||||||
"tags" : dict_url_data.get("tags"),
|
"keywords" : dict_url_data.get("keywords"),
|
||||||
"authors" : dict_url_data.get("authors"),
|
"tags" : dict_url_data.get("tags"),
|
||||||
"image_main_url" : dict_url_data.get("image_main_url"),
|
"authors" : dict_url_data.get("authors"),
|
||||||
"images_url" : dict_url_data.get("images_url"),
|
"image_main_url" : dict_url_data.get("image_main_url"),
|
||||||
"videos_url" : dict_url_data.get("videos_url"),
|
"images_url" : dict_url_data.get("images_url"),
|
||||||
"url_host" : dict_url_data.get("url_host"),
|
"videos_url" : dict_url_data.get("videos_url"),
|
||||||
"site_name" : dict_url_data.get("site_name"),
|
"url_host" : dict_url_data.get("url_host"),
|
||||||
}
|
"site_name" : dict_url_data.get("site_name"),
|
||||||
)
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Error in update_or_create UrlContent: {}\ndict_url_data: {}\n{}\n{}".format(obj_url.url, dict_url_data, str(e), traceback.format_exc()))
|
||||||
|
|
||||||
|
|
||||||
def process_raw_urls(self, batch_size):
|
def process_raw_urls(self, batch_size):
|
||||||
|
|
||||||
|
|||||||
@@ -17,24 +17,16 @@ def trigger_task(request, task):
|
|||||||
|
|
||||||
####################################################################################################
|
####################################################################################################
|
||||||
def link_list(request):
|
def link_list(request):
|
||||||
prefix = "http://localhost:8000/task"
|
# Base URL path
|
||||||
links = ["fetch_feeds", "fetch_parser", "fetch_search", "fetch_missingkids_5", "fetch_missingkids_all", "process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
|
app_url = request.build_absolute_uri()
|
||||||
|
# Tasks
|
||||||
list_links = [
|
links_fetch = ["fetch_feeds", "fetch_parser", "fetch_search", "fetch_missingkids_5", "fetch_missingkids_all"]
|
||||||
# DB
|
links_process = ["process_raw_urls_50", "process_error_urls_50", "process_missing_kids_urls_50", "process_missing_kids_urls_all"]
|
||||||
"http://localhost:8080/?pgsql=matitos_db&username=supermatitos&db=matitos&ns=public&select=urls&order%5B0%5D=id&limit=500",
|
# List of links
|
||||||
# Admin panel
|
list_links = \
|
||||||
"http://localhost:8000/admin",
|
[ os.path.join(app_url, "admin"), os.path.join(app_url, "urls") ] + \
|
||||||
# Logs
|
[ os.path.join(app_url, "logs", log_type) for log_type in ["debug", "info", "error"] ] + \
|
||||||
"http://localhost:8000/logs/debug",
|
[ os.path.join(app_url, "task", l) for l in links_fetch + links_process ]
|
||||||
"http://localhost:8000/logs/info",
|
|
||||||
"http://localhost:8000/logs/error",
|
|
||||||
# URLs
|
|
||||||
"http://localhost:8000/urls",
|
|
||||||
# Charts
|
|
||||||
"http://localhost:8000/urls/charts",
|
|
||||||
# Fetcher tasks
|
|
||||||
] + [os.path.join(prefix, l) for l in links]
|
|
||||||
# Json
|
# Json
|
||||||
return JsonResponse({"links": list_links })
|
return JsonResponse({"links": list_links })
|
||||||
|
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ services:
|
|||||||
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
|
- REDIS_HOST=${REDIS_HOST:-fetcher_redis}
|
||||||
- REDIS_PORT=${REDIS_PORT:-6379}
|
- REDIS_PORT=${REDIS_PORT:-6379}
|
||||||
# Job timeout: 30 min
|
# Job timeout: 30 min
|
||||||
- JOB_DEFAULT_TIMEOUT=${RQ_DEFAULT_TIMEOUT:-1800}
|
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT:-1800}
|
||||||
# Fetcher
|
# Fetcher
|
||||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2}
|
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2}
|
||||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
|
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
|
||||||
@@ -64,7 +64,7 @@ services:
|
|||||||
# - ./app_urls:/opt/app
|
# - ./app_urls:/opt/app
|
||||||
########################
|
########################
|
||||||
ports:
|
ports:
|
||||||
- 8000 # :8000
|
- 8000:8000
|
||||||
depends_on:
|
depends_on:
|
||||||
- fetcher_db
|
- fetcher_db
|
||||||
- fetcher_redis
|
- fetcher_redis
|
||||||
@@ -76,14 +76,14 @@ services:
|
|||||||
limits:
|
limits:
|
||||||
cpus: '4'
|
cpus: '4'
|
||||||
memory: 4G
|
memory: 4G
|
||||||
labels: # Reverse proxy sample
|
#labels: # Reverse proxy sample
|
||||||
- "traefik.enable=true"
|
# - "traefik.enable=true"
|
||||||
- "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
# - "traefik.http.routers.fetcher.rule=Host(`fetcher.matitos.org`)"
|
||||||
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
# - "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||||
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
# - "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||||
networks:
|
#networks:
|
||||||
- default # This network
|
# - default # This network
|
||||||
- docker_default # Reverse proxy network
|
# - docker_default # Reverse proxy network
|
||||||
|
|
||||||
fetcher_db:
|
fetcher_db:
|
||||||
image: postgres:17
|
image: postgres:17
|
||||||
@@ -108,6 +108,6 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- 6379 #:6379
|
- 6379 #:6379
|
||||||
|
|
||||||
networks:
|
#networks:
|
||||||
docker_default:
|
# docker_default:
|
||||||
external: true
|
# external: true
|
||||||
|
|||||||
Reference in New Issue
Block a user