Docker and deployment to fetcher server
This commit is contained in:
@@ -55,3 +55,9 @@ docker compose -f docker-compose-dev.yml down -v
|
||||
docker compose -f docker-compose-dev.yml build --progress=plain
|
||||
docker compose -f docker-compose-dev.yml up
|
||||
```
|
||||
* Prod mode
|
||||
```
|
||||
docker compose -f docker-compose-prod.yml down -v
|
||||
docker compose -f docker-compose-prod.yml build --progress=plain
|
||||
docker compose -f docker-compose-prod.yml up -d
|
||||
```
|
||||
@@ -54,6 +54,7 @@ class FetchSearcher():
|
||||
for SearchInstance in ListSearchInstances:
|
||||
# Sleep between requests, avoid too many requests...
|
||||
time.sleep(float(os.getenv("FETCHER_BETWEEN_SEARCHES_SLEEP", 5)))
|
||||
# TODO: Random proxy / VPN
|
||||
SearchInstance(args).fetch_articles(db_writer, obj_search)
|
||||
|
||||
# TODO: https://github.com/tasos-py/Search-Engines-Scraper/tree/master
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import time
|
||||
import feedparser
|
||||
import os
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
from ..models import Search, Source
|
||||
from .fetch_utils_gnews import decode_gnews_urls
|
||||
from .logger import get_logger
|
||||
|
||||
@@ -11,7 +11,7 @@ logging.basicConfig(format='%(filename)s | %(levelname)s | %(asctime)s | %(messa
|
||||
logger = logging.getLogger("fetcher")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# To file log: INFO / WARNING / ERROR / CRITICAL
|
||||
# To file log: DEBUG / INFO / WARNING / ERROR / CRITICAL
|
||||
fh = logging.handlers.RotatingFileHandler(filename=os.path.join(logs_directory, "debug.log"), mode="a", maxBytes=10000000, backupCount=1)
|
||||
fh.setFormatter(logging.Formatter('%(levelname)s | %(asctime)s | %(message)s'))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
|
||||
@@ -74,7 +74,7 @@ def process_missing_kids_urls_all(batch_size=None):
|
||||
logger.info("Task completed: {}".format(task))
|
||||
|
||||
@job('default')
|
||||
def clean_old_url_content(older_than_days=60):
|
||||
def clean_old_url_content(older_than_days=14):
|
||||
task = "Clean old URL content"
|
||||
logger.info("Task triggered: {}".format(task))
|
||||
DB_Handler().clean_old_url_content(older_than_days=older_than_days)
|
||||
|
||||
@@ -24,11 +24,12 @@
|
||||
[".*(youtube|tiktok|twitter|reddit)\\.com\\/.*", "invalid", 50],
|
||||
[".*cnbc\\.com\\/(video|quotes)\\/.*", "invalid", 75],
|
||||
[".*foxnews\\.com\\/(video|category)\\/.*", "invalid", 75],
|
||||
[".*radio.foxnews\\.com\\/.*", "invalid", 75],
|
||||
[".*radio\\.foxnews\\.com\\/.*", "invalid", 75],
|
||||
[".*breitbart\\.com\\/(tag|author)\\/.*", "invalid", 75],
|
||||
[".*zerohedge\\.com\\/(user)\\/.*", "invalid", 75],
|
||||
[".*zerohedge\\.com\\/(economics|political|markets|)\\/.*", "valid", 50],
|
||||
[".*breitbart\\.com\\/(economy|entertainment|border|crime|clips)\\/.*", "valid", 50],
|
||||
[".*foxnews\\.com\\/(lifestyle|opinion|sports|world)\\/.*", "valid", 50]
|
||||
[".*missingkids\\.org\\/poster\\/.*", "valid", 50]
|
||||
]
|
||||
}
|
||||
|
||||
@@ -29,13 +29,15 @@ def wait_connection():
|
||||
connected = True
|
||||
|
||||
except psycopg.OperationalError as e:
|
||||
print(str(e))
|
||||
# Connection not ready...
|
||||
# print(".", end="")
|
||||
time.sleep(2)
|
||||
time.sleep(15)
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
# Connection not ready...
|
||||
# print("e", end="")
|
||||
time.sleep(2)
|
||||
time.sleep(15)
|
||||
|
||||
print("DB connection ready")
|
||||
|
||||
@@ -57,7 +59,8 @@ def initialize_tables():
|
||||
ts_fetch TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
status URL_STATUS NOT NULL DEFAULT 'raw' -- ,
|
||||
-- status_wendy WENDY_STATUS DEFAULT NULL,
|
||||
-- ts_wendy TIMESTAMPTZ DEFAULT NULL
|
||||
-- ts_wendy TIMESTAMPTZ DEFAULT NULL,
|
||||
-- child_abuse BOOLEAN DEFAULT NULL,
|
||||
);
|
||||
CREATE INDEX idx_urls_status ON urls(status);
|
||||
CREATE INDEX idx_urls_ts_fetch ON urls(ts_fetch);
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||
"interval": 4,
|
||||
"interval": 8,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 0,
|
||||
"failed_runs": 0,
|
||||
@@ -139,7 +139,7 @@
|
||||
"result_ttl": 86400,
|
||||
"cron_string": null,
|
||||
"scheduled_time": "2025-01-01T00:00:00+00:00",
|
||||
"interval": 2,
|
||||
"interval": 4,
|
||||
"interval_unit": "hours",
|
||||
"successful_runs": 0,
|
||||
"failed_runs": 0,
|
||||
|
||||
@@ -19,11 +19,6 @@ services:
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_app_urls:
|
||||
image: fetcher_app_urls
|
||||
@@ -70,55 +65,22 @@ services:
|
||||
- PEXELS_API_KEY=${PEXELS_API_KEY}
|
||||
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
|
||||
########################
|
||||
#volumes: # Development mode
|
||||
# - ./app_urls:/opt/app
|
||||
########################
|
||||
ports:
|
||||
- 8000 # :8000
|
||||
- 8000
|
||||
depends_on:
|
||||
- fetcher_db
|
||||
- fetcher_redis
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
labels: # Reverse proxy sample
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.fetcher.rule=Host(`${REVERSE_PROXY_URL}`)"
|
||||
- "traefik.http.routers.fetcher.entrypoints=websecure"
|
||||
- "traefik.http.routers.fetcher.tls.certresolver=myresolvercd"
|
||||
- "traefik.http.services.fetcher.loadbalancer.server.port=8000"
|
||||
networks:
|
||||
- default # This network
|
||||
- docker_default # Reverse proxy network
|
||||
|
||||
fetcher_db:
|
||||
image: postgres:17
|
||||
container_name: fetcher_db
|
||||
restart: unless-stopped
|
||||
# Set shared memory limit when using docker-compose
|
||||
shm_size: 128mb
|
||||
environment:
|
||||
POSTGRES_DB: ${DB_NAME}
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
POSTGRES_USER: ${DB_USER}
|
||||
POSTGRES_INITDB_ARGS: '--data-checksums'
|
||||
volumes: # Persistent DB?
|
||||
- ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
|
||||
ports:
|
||||
- 5432 #:5432
|
||||
|
||||
fetcher_redis:
|
||||
image: redis:alpine
|
||||
container_name: fetcher_redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 6379 #:6379
|
||||
- 6379
|
||||
|
||||
networks:
|
||||
docker_default:
|
||||
external: true
|
||||
fetcher_db:
|
||||
container_name: fetcher_db
|
||||
restart: unless-stopped
|
||||
@@ -3,22 +3,9 @@ version: '3.9'
|
||||
services:
|
||||
|
||||
fetcher_app_selenium:
|
||||
image: fetcher_app_selenium
|
||||
build:
|
||||
context: ./app_selenium
|
||||
args:
|
||||
- ARCH=${ARCH} # arm64, amd64
|
||||
container_name: fetcher_app_selenium
|
||||
restart: unless-stopped
|
||||
shm_size: 512mb
|
||||
environment:
|
||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
ports:
|
||||
- 80:80
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_app_selenium
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
@@ -26,66 +13,11 @@ services:
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_app_urls:
|
||||
image: fetcher_app_urls
|
||||
build:
|
||||
context: ./app_urls
|
||||
container_name: fetcher_app_urls
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
# Initialization
|
||||
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||
# Django
|
||||
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
# Database
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
- REDIS_HOST=${REDIS_HOST}
|
||||
- REDIS_PORT=${REDIS_PORT}
|
||||
# Job timeout: 30 min
|
||||
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||
# Fetcher
|
||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
|
||||
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
|
||||
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
|
||||
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
|
||||
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
|
||||
# Selenium
|
||||
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||
# Ghost
|
||||
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
|
||||
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
|
||||
- PEXELS_API_KEY=${PEXELS_API_KEY}
|
||||
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
|
||||
########################
|
||||
volumes: # Development mode
|
||||
- ./app_urls:/opt/app
|
||||
########################
|
||||
ports:
|
||||
- 8000:8000
|
||||
depends_on:
|
||||
- fetcher_db
|
||||
- fetcher_redis
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_app_urls
|
||||
#env_files:
|
||||
# - .env.dev
|
||||
#labels: # Reverse proxy sample
|
||||
# - "traefik.enable=true"
|
||||
# - "traefik.http.routers.fetcher.rule=Host(`urls.yourdomain.com`)"
|
||||
@@ -95,11 +27,21 @@ services:
|
||||
#networks:
|
||||
# - default # This network
|
||||
# - docker_default # Reverse proxy network
|
||||
ports:
|
||||
- 8000:8000
|
||||
volumes: # Development mode
|
||||
- ./app_urls:/opt/app
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '${DEPLOY_CPUS}'
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_db:
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_db
|
||||
image: postgres:17
|
||||
container_name: fetcher_db
|
||||
restart: unless-stopped
|
||||
# Set shared memory limit when using docker-compose
|
||||
shm_size: 128mb
|
||||
environment:
|
||||
@@ -107,18 +49,14 @@ services:
|
||||
POSTGRES_PASSWORD: ${DB_PASSWORD}
|
||||
POSTGRES_USER: ${DB_USER}
|
||||
POSTGRES_INITDB_ARGS: '--data-checksums'
|
||||
#volumes: # Persistent DB?
|
||||
# - ${PATH_DB_DATA}/postgres:/var/lib/postgresql/data
|
||||
ports:
|
||||
- 5432 #:5432
|
||||
#volumes: # Persistent DB?
|
||||
# - ./postgres:/var/lib/postgresql/data
|
||||
|
||||
fetcher_redis:
|
||||
image: redis:alpine
|
||||
container_name: fetcher_redis
|
||||
restart: unless-stopped
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_redis
|
||||
ports:
|
||||
- 6379 #:6379
|
||||
|
||||
#networks:
|
||||
# docker_default:
|
||||
# external: true
|
||||
- 6379:6379
|
||||
|
||||
@@ -3,22 +3,9 @@ version: '3.9'
|
||||
services:
|
||||
|
||||
fetcher_app_selenium:
|
||||
image: fetcher_app_selenium
|
||||
build:
|
||||
context: ./app_selenium
|
||||
args:
|
||||
- ARCH=${ARCH} # arm64, amd64
|
||||
container_name: fetcher_app_selenium
|
||||
restart: unless-stopped
|
||||
shm_size: 512mb
|
||||
environment:
|
||||
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
ports:
|
||||
- 80
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_app_selenium
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
@@ -26,61 +13,11 @@ services:
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_app_urls:
|
||||
image: fetcher_app_urls
|
||||
build:
|
||||
context: ./app_urls
|
||||
container_name: fetcher_app_urls
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
# Initialization
|
||||
- INITIALIZE_DB=${INITIALIZE_DB} # Related to DB persistence
|
||||
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME}
|
||||
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD}
|
||||
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL}
|
||||
# Django
|
||||
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS} # host1,host2
|
||||
- DJANGO_ALLOWED_ORIGINS=${DJANGO_ALLOWED_ORIGINS} # Reverse proxy
|
||||
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY}
|
||||
- DJANGO_DEBUG=${DJANGO_DEBUG}
|
||||
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY}
|
||||
# Database
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- DB_HOST=${DB_HOST}
|
||||
- DB_PORT=${DB_PORT}
|
||||
- REDIS_HOST=${REDIS_HOST}
|
||||
- REDIS_PORT=${REDIS_PORT}
|
||||
# Job timeout: 30 min
|
||||
- JOB_DEFAULT_TIMEOUT=${JOB_DEFAULT_TIMEOUT}
|
||||
# Fetcher
|
||||
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP}
|
||||
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP}
|
||||
- FETCHER_BETWEEN_SEARCHES_SLEEP=${FETCHER_BETWEEN_SEARCHES_SLEEP} # Sleep time between each search
|
||||
- FETCHER_URL_HOST_SLEEP=${FETCHER_URL_HOST_SLEEP} # Sleep time between requests to same URL host
|
||||
- FETCHER_LANGUAGE_DETECTION_MIN_CHAR=${FETCHER_LANGUAGE_DETECTION_MIN_CHAR} # Min amonut of characters to run language detection
|
||||
- FETCHER_INSERT_URL_CACHE_TIME=${FETCHER_INSERT_URL_CACHE_TIME} # Cache time: Insert raw URL
|
||||
- FETCHER_ERROR_URL_CACHE_TIME=${FETCHER_ERROR_URL_CACHE_TIME} # Cache time: Error on processing URL
|
||||
# Selenium
|
||||
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT}
|
||||
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA}
|
||||
# Ghost
|
||||
- GHOST_ADMIN_API_KEY=${GHOST_ADMIN_API_KEY}
|
||||
- GHOST_ADMIN_API_URL=${GHOST_ADMIN_API_URL}
|
||||
- PEXELS_API_KEY=${PEXELS_API_KEY}
|
||||
- OLLAMA_MODEL_DEFAULT=${OLLAMA_MODEL_DEFAULT}
|
||||
########################
|
||||
#volumes: # Development mode
|
||||
# - ./app_urls:/opt/app
|
||||
########################
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_app_urls
|
||||
ports:
|
||||
- 8000:8000
|
||||
depends_on:
|
||||
- fetcher_db
|
||||
- fetcher_redis
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 1.0.0.1
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
@@ -88,7 +25,9 @@ services:
|
||||
memory: ${DEPLOY_RAM}
|
||||
|
||||
fetcher_db:
|
||||
container_name: fetcher_db
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_db
|
||||
image: alpine:latest
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
@@ -98,22 +37,21 @@ services:
|
||||
volumes:
|
||||
# REQUIREMENT: Add fetcher's SSH public key into the DB's .ssh/authorized_keys machine
|
||||
- ~/.ssh:/root/.ssh:ro
|
||||
ports:
|
||||
- 15885:15885
|
||||
- 5432:5432
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
apk add --update openssh autossh
|
||||
autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
|
||||
### Alternative:
|
||||
### autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||
### -M 15882 monitors on port X, if already being used conflict!
|
||||
###autossh -M 15882 -N -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||
###ssh -N -o "StrictHostKeyChecking no" -o "ServerAliveInterval 60" -o "ServerAliveCountMax 3" -o 'PasswordAuthentication no' -o 'GatewayPorts yes' -L 15882:127.0.0.1:15882 matitos@matitos.org
|
||||
network_mode: "host"
|
||||
# Monitor status on port 15885
|
||||
autossh -M 15885 -N -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
|
||||
# autossh -M 15885 -N -o 'GatewayPorts yes' -L 0.0.0.0:5432:127.0.0.1:5432 ${REMOTE_USERNAME}@${REMOTE_HOST}
|
||||
|
||||
fetcher_redis:
|
||||
image: redis:alpine
|
||||
container_name: fetcher_redis
|
||||
restart: unless-stopped
|
||||
extends:
|
||||
file: docker-compose-base.yml
|
||||
service: fetcher_redis
|
||||
ports:
|
||||
- 6379 #:6379
|
||||
- 6379:6379
|
||||
79
utils/DB-Dev.ipynb
Normal file
79
utils/DB-Dev.ipynb
Normal file
@@ -0,0 +1,79 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install python-dotenv\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"# Specify the path to your .env file (optional if in the current dir)\n",
|
||||
"load_dotenv(dotenv_path=\".env\", override=True)\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import psycopg\n",
|
||||
"from sshtunnel import SSHTunnelForwarder\n",
|
||||
"\n",
|
||||
"if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
|
||||
" print(\"SSH tunnel: True\")\n",
|
||||
"else:\n",
|
||||
" print(\"SSH tunnel: False\")\n",
|
||||
"\n",
|
||||
"connect_info = \"host={} port={} user={} password={} dbname={}\".format(os.environ.get(\"DB_HOST\"), os.environ.get(\"DB_PORT\"), os.environ.get(\"DB_USER\"), os.environ.get(\"DB_PASSWORD\"), os.environ.get(\"DB_NAME\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
|
||||
" ssh_tunnel = SSHTunnelForwarder(\n",
|
||||
" (os.environ.get(\"REMOTE_HOST\"), int(os.environ.get(\"REMOTE_SSH_PORT\"))), \n",
|
||||
" ssh_username=os.environ.get(\"REMOTE_USERNAME\"), ssh_password=os.environ.get(\"REMOTE_PASSWORD\"), \n",
|
||||
" remote_bind_address=('localhost', int(os.environ.get(\"REMOTE_PORT\"))), local_bind_address=('localhost', int(os.environ.get(\"DB_PORT\"))) \n",
|
||||
" )\n",
|
||||
" ssh_tunnel.start()\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" with psycopg.connect(connect_info) as conn:\n",
|
||||
" if True:\n",
|
||||
" for t in conn.execute(\"\"\"\n",
|
||||
" SELECT * from URLS WHERE id IN (SELECT id_url FROM URLS_SOURCE_SEARCH INNER JOIN SEARCH ON URLS_SOURCE_SEARCH.id_search = SEARCH.id WHERE SEARCH.search LIKE '%child abuse%') LIMIT 5;\n",
|
||||
" \"\"\").fetchall():\n",
|
||||
" print(t)\n",
|
||||
" \n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"Err:\", str(e))\n",
|
||||
"\n",
|
||||
"if (os.environ.get(\"SSH_TUNNEL_BASED\") == \"true\"):\n",
|
||||
" ssh_tunnel.stop()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "matitos_urls",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user