Docker and deployment to fetcher server

This commit is contained in:
Luciano Gervasoni
2025-06-27 09:14:44 +02:00
parent f659d4adb3
commit 8b689729bf
12 changed files with 148 additions and 222 deletions

View File

@@ -55,3 +55,9 @@ docker compose -f docker-compose-dev.yml down -v
docker compose -f docker-compose-dev.yml build --progress=plain docker compose -f docker-compose-dev.yml build --progress=plain
docker compose -f docker-compose-dev.yml up docker compose -f docker-compose-dev.yml up
``` ```
* Prod mode
```
docker compose -f docker-compose-prod.yml down -v
docker compose -f docker-compose-prod.yml build --progress=plain
docker compose -f docker-compose-prod.yml up -d
```

View File

@@ -54,6 +54,7 @@ class FetchSearcher():
for SearchInstance in ListSearchInstances: for SearchInstance in ListSearchInstances:
# Sleep between requests, avoid too many requests... # Sleep between requests, avoid too many requests...
time.sleep(float(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5))) time.sleep(float(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
# TODO: Random proxy / VPN
SearchInstance(args).fetch_articles(db_writer, obj_search) SearchInstance(args).fetch_articles(db_writer, obj_search)
# TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master # TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master

View File

@@ -1,8 +1,6 @@
import time import time
import feedparser import feedparser
import os import os
from django.utils import timezone
from datetime import timedelta
from ..models import Search, Source from ..models import Search, Source
from .fetch_utils_gnews import decode_gnews_urls from .fetch_utils_gnews import decode_gnews_urls
from .logger import get_logger from .logger import get_logger

View File

@@ -11,7 +11,7 @@ logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(messa
logger = logging.getLogger("fetcher") logger = logging.getLogger("fetcher")
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
# To file log: INFO / WARNING / ERROR / CRITICAL # To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL
fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1) fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1)
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s')) fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
fh.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG)

View File

@@ -74,7 +74,7 @@ def process_missing_kids_urls_all(batch_size=None):
logger.info("Task completed: {}".format(task)) logger.info("Task completed: {}".format(task))
@job('default') @job('default')
def clean_old_url_content(older_than_days=60): def clean_old_url_content(older_than_days=14):
task = "Clean old URL content" task = "Clean old URL content"
logger.info("Task triggered: {}".format(task)) logger.info("Task triggered: {}".format(task))
DB_Handler().clean_old_url_content(older_than_days=older_than_days) DB_Handler().clean_old_url_content(older_than_days=older_than_days)

View File

@@ -24,11 +24,12 @@
[".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50], [".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50],
[".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75], [".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75],
[".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75], [".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75],
[".*radio.foxnews\\.com\\/.*", "invalid", 75], [".*radio\\.foxnews\\.com\\/.*", "invalid", 75],
[".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75], [".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75],
[".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75], [".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75],
[".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50], [".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
[".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50], [".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50],
[".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50] [".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50]
[".*missingkids\\.org\\/poster\\/.*", "valid", 50]
] ]
} }

View File

@@ -29,13 +29,15 @@ def wait_connection():
connected = True connected = True
except psycopg.OperationalError as e: except psycopg.OperationalError as e:
print(str(e))
# Connection not ready... # Connection not ready...
# print(".", end="") # print(".", end="")
time.sleep(2) time.sleep(15)
except Exception as e: except Exception as e:
print(str(e))
# Connection not ready... # Connection not ready...
# print("e", end="") # print("e", end="")
time.sleep(2) time.sleep(15)
print("DB connection ready") print("DB connection ready")
@@ -57,7 +59,8 @@ def initialize_tables():
ts_fetch TIMESTAMPTZ NOT NULL DEFAULT NOW(), ts_fetch TIMESTAMPTZ NOT NULL DEFAULT NOW(),
status URL_STATUS NOT NULL DEFAULT 'raw' -- , status URL_STATUS NOT NULL DEFAULT 'raw' -- ,
-- status_wendy WENDY_STATUS DEFAULT NULL, -- status_wendy WENDY_STATUS DEFAULT NULL,
-- ts_wendy TIMESTAMPTZ DEFAULT NULL -- ts_wendy TIMESTAMPTZ DEFAULT NULL,
-- child_abuse BOOLEAN DEFAULT NULL,
); );
CREATE INDEX idx_urls_status ON urls(status); CREATE INDEX idx_urls_status ON urls(status);
CREATE INDEX idx_urls_ts_fetch ON urls(ts_fetch); CREATE INDEX idx_urls_ts_fetch ON urls(ts_fetch);

View File

@@ -13,7 +13,7 @@
"result_ttl": 86400, "result_ttl": 86400,
"cron_string": null, "cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "scheduled_time": "2025-01-01T00:00:00+00:00",
"interval": 4, "interval": 8,
"interval_unit": "hours", "interval_unit": "hours",
"successful_runs": 0, "successful_runs": 0,
"failed_runs": 0, "failed_runs": 0,
@@ -139,7 +139,7 @@
"result_ttl": 86400, "result_ttl": 86400,
"cron_string": null, "cron_string": null,
"scheduled_time": "2025-01-01T00:00:00+00:00", "scheduled_time": "2025-01-01T00:00:00+00:00",
"interval": 2, "interval": 4,
"interval_unit": "hours", "interval_unit": "hours",
"successful_runs": 0, "successful_runs": 0,
"failed_runs": 0, "failed_runs": 0,

View File

@@ -19,11 +19,6 @@ services:
dns: dns:
- 1.1.1.1 - 1.1.1.1
- 1.0.0.1 - 1.0.0.1
deploy:
resources:
limits:
cpus: '${DEPLOY_CPUS}'
memory: ${DEPLOY_RAM}
fetcher_app_urls: fetcher_app_urls:
image: fetcher_app_urls image: fetcher_app_urls
@@ -70,55 +65,22 @@ services:
- PEXELS_API_KEY=${PEXELS_API_KEY} - PEXELS_API_KEY=${PEXELS_API_KEY}
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT} - OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
######################## ########################
#volumes: # Development mode
# - ./app_urls:/opt/app
########################
ports: ports:
- 8000 # :8000 - 8000
depends_on: depends_on:
- fetcher_db - fetcher_db
- fetcher_redis - fetcher_redis
dns: dns:
- 1.1.1.1 - 1.1.1.1
- 1.0.0.1 - 1.0.0.1
deploy:
resources:
limits:
cpus: '${DEPLOY_CPUS}'
memory: ${DEPLOY_RAM}
labels: # Reverse proxy sample
- "traefik.enable=true"
- "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
- "traefik.http.routers.fetcher.entrypoints=websecure"
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
networks:
- default # This network
- docker_default # Reverse proxy network
fetcher_db:
image: postgres:17
container_name: fetcher_db
restart: unless-stopped
# Set shared memory limit when using docker-compose
shm_size: 128mb
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_USER: ${DB_USER}
POSTGRES_INITDB_ARGS: '--data-checksums'
volumes: # Persistent DB?
- ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
ports:
- 5432 #:5432
fetcher_redis: fetcher_redis:
image: redis:alpine image: redis:alpine
container_name: fetcher_redis container_name: fetcher_redis
restart: unless-stopped restart: unless-stopped
ports: ports:
- 6379 #:6379 - 6379
networks: fetcher_db:
docker_default: container_name: fetcher_db
external: true restart: unless-stopped

View File

@@ -3,22 +3,9 @@ version: '3.9'
services: services:
fetcher_app_selenium: fetcher_app_selenium:
image: fetcher_app_selenium extends:
build: file: docker-compose-base.yml
context: ./app_selenium service: fetcher_app_selenium
args:
- ARCH=${ARCH} # arm64, amd64
container_name: fetcher_app_selenium
restart: unless-stopped
shm_size: 512mb
environment:
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
ports:
- 80:80
dns:
- 1.1.1.1
- 1.0.0.1
deploy: deploy:
resources: resources:
limits: limits:
@@ -26,66 +13,11 @@ services:
memory: ${DEPLOY_RAM} memory: ${DEPLOY_RAM}
fetcher_app_urls: fetcher_app_urls:
image: fetcher_app_urls extends:
build: file: docker-compose-base.yml
context: ./app_urls service: fetcher_app_urls
container_name: fetcher_app_urls #env_files:
restart: unless-stopped # - .env.dev
environment:
# Initialization
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
# Django
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
- DJANGO_DEBUG=${DJANGO_DEBUG}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
# Database
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
- REDIS_HOST=${REDIS_HOST}
- REDIS_PORT=${REDIS_PORT}
# Job timeout: 30 min
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
# Fetcher
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
# Selenium
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
# Ghost
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
- PEXELS_API_KEY=${PEXELS_API_KEY}
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
########################
volumes: # Development mode
- ./app_urls:/opt/app
########################
ports:
- 8000:8000
depends_on:
- fetcher_db
- fetcher_redis
dns:
- 1.1.1.1
- 1.0.0.1
deploy:
resources:
limits:
cpus: '${DEPLOY_CPUS}'
memory: ${DEPLOY_RAM}
#labels: # Reverse proxy sample #labels: # Reverse proxy sample
# - "traefik.enable=true" # - "traefik.enable=true"
# - "traefik.http.routers.fetcher.rule=Host(`urls.yourdomain.com`)" # - "traefik.http.routers.fetcher.rule=Host(`urls.yourdomain.com`)"
@@ -95,11 +27,21 @@ services:
#networks: #networks:
# - default # This network # - default # This network
# - docker_default # Reverse proxy network # - docker_default # Reverse proxy network
ports:
- 8000:8000
volumes: # Development mode
- ./app_urls:/opt/app
deploy:
resources:
limits:
cpus: '${DEPLOY_CPUS}'
memory: ${DEPLOY_RAM}
fetcher_db: fetcher_db:
extends:
file: docker-compose-base.yml
service: fetcher_db
image: postgres:17 image: postgres:17
container_name: fetcher_db
restart: unless-stopped
# Set shared memory limit when using docker-compose # Set shared memory limit when using docker-compose
shm_size: 128mb shm_size: 128mb
environment: environment:
@@ -107,18 +49,14 @@ services:
POSTGRES_PASSWORD: ${DB_PASSWORD} POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_USER: ${DB_USER} POSTGRES_USER: ${DB_USER}
POSTGRES_INITDB_ARGS: '--data-checksums' POSTGRES_INITDB_ARGS: '--data-checksums'
#volumes: # Persistent DB?
# - ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
ports: ports:
- 5432 #:5432 - 5432 #:5432
#volumes: # Persistent DB?
# - ./postgres:/var/lib/postgresql/data
fetcher_redis: fetcher_redis:
image: redis:alpine extends:
container_name: fetcher_redis file: docker-compose-base.yml
restart: unless-stopped service: fetcher_redis
ports: ports:
- 6379 #:6379 - 6379:6379
#networks:
# docker_default:
# external: true

View File

@@ -3,22 +3,9 @@ version: '3.9'
services: services:
fetcher_app_selenium: fetcher_app_selenium:
image: fetcher_app_selenium extends:
build: file: docker-compose-base.yml
context: ./app_selenium service: fetcher_app_selenium
args:
- ARCH=${ARCH} # arm64, amd64
container_name: fetcher_app_selenium
restart: unless-stopped
shm_size: 512mb
environment:
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
ports:
- 80
dns:
- 1.1.1.1
- 1.0.0.1
deploy: deploy:
resources: resources:
limits: limits:
@@ -26,61 +13,11 @@ services:
memory: ${DEPLOY_RAM} memory: ${DEPLOY_RAM}
fetcher_app_urls: fetcher_app_urls:
image: fetcher_app_urls extends:
build: file: docker-compose-base.yml
context: ./app_urls service: fetcher_app_urls
container_name: fetcher_app_urls
restart: unless-stopped
environment:
# Initialization
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
# Django
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
- DJANGO_DEBUG=${DJANGO_DEBUG}
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
# Database
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PORT}
- REDIS_HOST=${REDIS_HOST}
- REDIS_PORT=${REDIS_PORT}
# Job timeout: 30 min
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
# Fetcher
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
# Selenium
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
# Ghost
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
- PEXELS_API_KEY=${PEXELS_API_KEY}
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
########################
#volumes: # Development mode
# - ./app_urls:/opt/app
########################
ports: ports:
- 8000:8000 - 8000:8000
depends_on:
- fetcher_db
- fetcher_redis
dns:
- 1.1.1.1
- 1.0.0.1
deploy: deploy:
resources: resources:
limits: limits:
@@ -88,7 +25,9 @@ services:
memory: ${DEPLOY_RAM} memory: ${DEPLOY_RAM}
fetcher_db: fetcher_db:
container_name: fetcher_db extends:
file: docker-compose-base.yml
service: fetcher_db
image: alpine:latest image: alpine:latest
restart: unless-stopped restart: unless-stopped
deploy: deploy:
@@ -98,22 +37,21 @@ services:
volumes: volumes:
# REQUIREMENT: Add fetcher's SSH public key into the DB's .ssh/authorized_keys machine # REQUIREMENT: Add fetcher's SSH public key into the DB's .ssh/authorized_keys machine
- ~/.ssh:/root/.ssh:ro - ~/.ssh:/root/.ssh:ro
ports:
- 15885:15885
- 5432:5432
command: command:
- sh - sh
- -c - -c
- | - |
apk add --update openssh autossh apk add --update openssh autossh
autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST} # Monitor status on port 15885
### Alternative: autossh -M 15885 -N -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
### autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org # autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
### -M 15882 monitors on port X, if already being used conflict!
###autossh -M 15882 -N -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
###ssh -N -o "StrictHostKeyChecking no" -o "ServerAliveInterval 60" -o "ServerAliveCountMax 3" -o 'PasswordAuthentication no' -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
network_mode: "host"
fetcher_redis: fetcher_redis:
image: redis:alpine extends:
container_name: fetcher_redis file: docker-compose-base.yml
restart: unless-stopped service: fetcher_redis
ports: ports:
- 6379 #:6379 - 6379:6379

79
utils/DB-Dev.ipynb Normal file
View File

@@ -0,0 +1,79 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#!pip install python-dotenv\n",
"from dotenv import load_dotenv\n",
"\n",
"# Specify the path to your .env file (optional if in the current dir)\n",
"load_dotenv(dotenv_path=\".env\", override=True)\n",
"\n",
"import os\n",
"import psycopg\n",
"from sshtunnel import SSHTunnelForwarder\n",
"\n",
"if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
" print(\"SSH tunnel: True\")\n",
"else:\n",
" print(\"SSH tunnel: False\")\n",
"\n",
"connect_info = \"host={} port={} user={} password={} dbname={}\".format(os.environ.get(\"DB_HOST\"), os.environ.get(\"DB_PORT\"), os.environ.get(\"DB_USER\"), os.environ.get(\"DB_PASSWORD\"), os.environ.get(\"DB_NAME\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
" ssh_tunnel = SSHTunnelForwarder(\n",
" (os.environ.get(\"REMOTE_HOST\"), int(os.environ.get(\"REMOTE_SSH_PORT\"))), \n",
" ssh_username=os.environ.get(\"REMOTE_USERNAME\"), ssh_password=os.environ.get(\"REMOTE_PASSWORD\"), \n",
" remote_bind_address=('localhost', int(os.environ.get(\"REMOTE_PORT\"))), local_bind_address=('localhost', int(os.environ.get(\"DB_PORT\"))) \n",
" )\n",
" ssh_tunnel.start()\n",
"\n",
"try:\n",
" with psycopg.connect(connect_info) as conn:\n",
" if True:\n",
" for t in conn.execute(\"\"\"\n",
" SELECT * from URLS WHERE id IN (SELECT id_url FROM URLS_SOURCE_SEARCH INNER JOIN SEARCH ON URLS_SOURCE_SEARCH.id_search = SEARCH.id WHERE SEARCH.search LIKE '%child abuse%') LIMIT 5;\n",
" \"\"\").fetchall():\n",
" print(t)\n",
" \n",
"except Exception as e:\n",
" print(\"Err:\", str(e))\n",
"\n",
"if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
" ssh_tunnel.stop()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "matitos_urls",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}