Min num of sources filter, initialization scripts, docker ready to use dev mode

This commit is contained in:
Luciano Gervasoni
2025-04-04 16:56:27 +02:00
parent 76079d7bd0
commit 9127552bfd
10 changed files with 132 additions and 83 deletions

View File

@@ -1,44 +1,46 @@
# Matitos # Matitos
- Scheduled tasks - URLs Fetcher -> Inserts raw URLs
- Fetcher -> Inserts raw URLs - Fetch parsing URL host
- Fetch parsing URL host - Fetch from RSS feed
- Fetch from RSS feed - Fetch keyword search (Google search & news, DuckDuckGo, ...)
- Fetch keyword search (Google search & news, DuckDuckGo, ...) ++ Sources -> Robustness to TooManyRequests block
++ Sources -> Robustness to TooManyRequests block - Selenium based
- Selenium based - Sites change their logic, request captcha, ...
- Sites change their logic, request captcha, ... - Brave Search API
- Brave Search API - Free up to X requests per day. Need credit card association (no charges)
- Free up to X requests per day. Need credit card association (no charges) - Bing API
- Bing API - Subscription required
- Subscription required - Yandex. No API?
- Yandex. No API?
++ Proxy / VPN?
TooManyRequests, ...
++ Search per locale (nl-NL, fr-FR, en-GB)
- Process URLs -> Updates raw URLs
- Extracts title, description, content, image and video URLs, main image URL, language, keywords, authors, tags, published date
- Determines if it is a valid article content
++ Proxy / VPN? ++ Proxy / VPN?
Bypass geoblock TooManyRequests, ...
- Valid URLs ++ Search per locale (nl-NL, fr-FR, en-GB)
- Generate summary
- One paragraph - URLs Processing -> Updates raw URLs
- At most three paragraphs - Extracts title, description, content, image and video URLs, main image URL, language, keywords, authors, tags, published date
- Classification - Determines if it is a valid article content
- 5W: Who, What, When, Where, Why of a Story ++ Proxy / VPN?
- Related to child abuse? Bypass geoblock
- ...
- Visualization of URLs - Visualization of URLs
- Filter URLs - Filter URLs
- By status, search, source, language - By status, search, source, language, ...
- Charts - Charts
- Valid URLs
- Generate summary
- One paragraph
- At most three paragraphs
- Classification
- 5W: Who, What, When, Where, Why of a Story
- Related to child abuse?
- ...
- Content generation - Content generation
- Select URLs: - URLs Selection
- Valid content - Valid content
- language=en - Language of interest
- published_date during last_week - Published (or fetch) date during last_week
- Use classifications - Fetched by at least N sources
- Use classifications and summaries
- Merge summaries, ... - Merge summaries, ...

View File

@@ -19,31 +19,10 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY --chown=appuser:appuser . /opt/app/ COPY --chown=appuser:appuser . /opt/app/
RUN chmod -R 755 /opt/app RUN chmod -R 755 /opt
RUN chown -R appuser:appuser /opt/app RUN chown -R appuser:appuser /opt
USER appuser USER appuser
# Initialization script
RUN echo '#!/bin/bash' > /opt/app/initialize.sh && \
echo 'if [ "${INITIALIZE_DB}" = false ]; then' >> /opt/app/initialize.sh && \
echo 'echo "Initialization not required"' >> /opt/app/initialize.sh && \
echo 'else' >> /opt/app/initialize.sh && \
echo 'echo "Initializating database"' >> /opt/app/initialize.sh && \
echo 'python db.py --initialize_tables --initialize_data' >> /opt/app/initialize.sh && \
echo 'python manage.py makemigrations fetcher; python manage.py migrate --fake-initial' >> /opt/app/initialize.sh && \
echo 'python manage.py createsuperuser --noinput' >> /opt/app/initialize.sh && \
echo 'python manage.py collectstatic --no-input' >> /opt/app/initialize.sh && \
echo 'python manage.py import --filename scheduled_tasks.json' >> /opt/app/initialize.sh && \
echo 'fi' >> /opt/app/initialize.sh && \
chmod +x /opt/app/initialize.sh
# Serving script
RUN echo '#!/bin/bash' > /opt/app/run.sh && \
# Prod mode:
echo 'gunicorn core.wsgi:application --bind 0.0.0.0:8000 & python manage.py rqworker high default low' >> /opt/app/run.sh && \
# Dev mode:
#echo 'gunicorn core.wsgi:application --reload --bind 0.0.0.0:8000 & python manage.py rqworker high default low' >> /opt/app/run.sh && \
chmod +x /opt/app/run.sh
# Run Djangos server & workers # Run Djangos server & workers
CMD ["sh", "-c", "/opt/app/initialize.sh && /opt/app/run.sh"] CMD ["sh", "-c", "/opt/app/script_initialize.sh && /opt/app/script_run.sh"]

View File

@@ -134,10 +134,18 @@ def initialize_data():
with conn.transaction() as tx: with conn.transaction() as tx:
# Feeds # Feeds
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC', 'rss_feed');" ) cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('https://api.missingkids.org/missingkids/servlet/XmlServlet?act=rss&LanguageCountry=en_US&orgPrefix=NCMC', 'rss_feed');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('https://feeds.feedburner.com/breitbart', 'rss_feed');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('http://feeds.feedburner.com/zerohedge/feed', 'rss_feed');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('https://moxie.foxnews.com/google-publisher/latest.xml', 'rss_feed');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15837362', 'rss_feed');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=100727362', 'rss_feed');" )
# Websites of interest # Websites of interest
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('missingkids.org/poster', 'url_host');" ) cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('missingkids.org/poster', 'url_host');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('missingkids.org/new-poster', 'url_host');" ) cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('missingkids.org/new-poster', 'url_host');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('breitbart.com', 'url_host');" ) cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('breitbart.com', 'url_host');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('zerohedge.com', 'url_host');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('foxnews.com', 'url_host');" )
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('cnbc.com', 'url_host');" )
# Search keywords # Search keywords
cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search');" ) cur.execute( "INSERT INTO SEARCH (search, type) VALUES ('child abuse', 'keyword_search');" )
# TODO: Language per search # TODO: Language per search
@@ -146,12 +154,34 @@ def initialize_data():
# Status update based on pattern matching (with priority to apply in order). Regex test https://regex101.com/ # Status update based on pattern matching (with priority to apply in order). Regex test https://regex101.com/
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("youtube.com/"))) ) cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("youtube.com/"))) )
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("yewtu.be/"))) )
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("tiktok.com/"))) ) cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("tiktok.com/"))) )
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("twitter.com/"))) ) cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("twitter.com/"))) )
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("reddit.com/"))) ) cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("reddit.com/"))) )
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("libreddit.de/"))) ) cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("libreddit.de/"))) )
cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("radio.foxnews.com/"))) ) cur.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('{}', 50, 'invalid');".format(".*{}.*".format(re.escape("radio.foxnews.com/"))) )
""" # TODO: To review with new scheme
# Status update based on pattern matching (with priority to apply in order)
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*cnbc.com/(video|quotes)/.*', 100, 'invalid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*foxnews.com/(video|category)/.*', 100, 'invalid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*breitbart.com/(tag|author)/.*', 100, 'invalid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*zerohedge.com/(economics|political|markets)/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*breitbart.com/(economy|entertainment|border|crime|clips)/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*foxnews.com/(lifestyle|opinion|sports|world)/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*cnbc.com/[0-9]{4}/[0-9]{2}/[0-9]{2}/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*bbc.com/news/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*msn.com/[A-z]{2}-[A-z]{2}/news/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*newschannel9.com/news/.*', 50, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*radaronline.com/p.*', 25, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*okmagazine.com/p.*', 25, 'valid');" )
cursor.execute( "INSERT INTO STATUS_PATTERN_MATCHING (pattern, priority, status) VALUES ('.*9news.com.au/national.*', 25, 'valid');" )
"""
def main(name): def main(name):
print('Hello, %s!' % name) print('Hello, %s!' % name)

View File

@@ -99,6 +99,7 @@ class UrlsDuplicate(models.Model):
class UrlsSourceSearch(models.Model): class UrlsSourceSearch(models.Model):
id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source, id_search) found, that is not supported. The first column is selected. id_url = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url', primary_key=True) # The composite primary key (id_url, id_source, id_search) found, that is not supported. The first column is selected.
#id_url = models.ForeignKey(Urls, models.DO_NOTHING, db_column='id_url')
id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source') id_source = models.ForeignKey(Source, models.DO_NOTHING, db_column='id_source')
id_search = models.ForeignKey(Search, models.DO_NOTHING, db_column='id_search') id_search = models.ForeignKey(Search, models.DO_NOTHING, db_column='id_search')

View File

@@ -331,6 +331,12 @@ input[type="checkbox"] {
</label><br> </label><br>
{% endfor %} {% endfor %}
<!-- Minimum Sources Count Box -->
<h3>Min #Sources</h3>
<div>
<input type="number" id="minSourceCount" name="min_sources" value="{{ selected_min_sources }}" min="1" style="width: 60px; text-align: center;">
</div>
<!-- Filter by language --> <!-- Filter by language -->
<h3>Language</h3> <h3>Language</h3>
<button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br> <button type="button" class="toggle-all-btn" data-toggle="language">Toggle All</button><br>
@@ -538,6 +544,10 @@ input[type="checkbox"] {
} }
}); });
// Min number of sources
//const minSearchCount = document.getElementById('minSourceCount').value;
//params.set('min_search_count', minSearchCount);
// Submit the form after updating all sections // Submit the form after updating all sections
document.getElementById("filterForm").submit(); document.getElementById("filterForm").submit();
} }
@@ -566,6 +576,9 @@ input[type="checkbox"] {
updateFormParameters(); updateFormParameters();
}); });
}); });
document.getElementById('minSourceCount').addEventListener('change', function() {
updateFormParameters();
});
document.getElementById('perPageSelect').addEventListener('change', function() { document.getElementById('perPageSelect').addEventListener('change', function() {
updateFormParameters(); updateFormParameters();
}); });

View File

@@ -198,7 +198,7 @@
</tr> </tr>
<tr> <tr>
<th>Status</th> <th>Status</th>
<td>{{ url_item.status }}</td> <td>{{ url_item.status }} {% if url_canonical != None %}<a href="/urls/{{ url_canonical.id }}" target="_blank">[{{ url_canonical.id }}]</a>{% endif %} </td>
</tr> </tr>
<tr> <tr>
<th>URL host</th> <th>URL host</th>

View File

@@ -6,6 +6,8 @@ from django.contrib.auth.decorators import login_required
import ollama import ollama
from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate from .models import Urls, Source, Search, UrlContent, UrlsSourceSearch, UrlsDuplicate
import os import os
from .src.logger import get_logger
logger = get_logger()
#################################################################################################### ####################################################################################################
def trigger_task(request, task): def trigger_task(request, task):
@@ -94,13 +96,11 @@ def url_detail_view(request, id):
url_item = get_object_or_404(Urls, id=id) url_item = get_object_or_404(Urls, id=id)
url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct()) url_sources = list(Source.objects.filter(urlssourcesearch__id_url=url_item).distinct())
url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct()) url_searches = list(Search.objects.filter(urlssourcesearch__id_url=url_item).distinct())
# url_source_search = UrlsSourceSearch.objects.filter(id_url=url_item)
if (url_item.status == Urls.STATUS_ENUM.DUPLICATE):
url_duplicate = UrlsDuplicate.objects.get(id_url_duplicated=url_item) url_canonical = UrlsDuplicate.objects.get(id_url_duplicated=url_item).id_url_canonical
#id_url_canonical = models.OneToOneField(Urls, models.DO_NOTHING, db_column='id_url_canonical', primary_key=True) # The composite primary key (id_url_canonical, id_url_duplicated) found, that is not supported. The first column is selected. else:
#id_url_duplicated = models.ForeignKey(Urls, models.DO_NOTHING, db_column='id_url_duplicated', related_name='urlsduplicate_id_url_duplicated_set') url_canonical = None
url_duplicate.id_url_duplicated
try: try:
url_content = UrlContent.objects.get(pk=id) url_content = UrlContent.objects.get(pk=id)
@@ -117,6 +117,7 @@ def url_detail_view(request, id):
'models': ollama.get_models(), 'models': ollama.get_models(),
'prompt': ollama.get_prompt(), 'prompt': ollama.get_prompt(),
'url_content': url_content, 'url_content': url_content,
'url_canonical': url_canonical,
} }
return render(request, 'url_detail.html', context) return render(request, 'url_detail.html', context)
@@ -232,6 +233,7 @@ def filtered_urls(request):
selected_source = request.GET.getlist('source', ["null"]) selected_source = request.GET.getlist('source', ["null"])
selected_language = request.GET.getlist('language', ["null"]) selected_language = request.GET.getlist('language', ["null"])
selected_valid_contents = request.GET.getlist('valid_content', ["null"]) selected_valid_contents = request.GET.getlist('valid_content', ["null"])
selected_min_sources = int(request.GET.get('min_sources', 1))
selected_days = request.GET.get("days", 30) selected_days = request.GET.get("days", 30)
per_page = request.GET.get('per_page', 100) # Default is X URLs per page per_page = request.GET.get('per_page', 100) # Default is X URLs per page
page_number = request.GET.get('page') # Get the current page number page_number = request.GET.get('page') # Get the current page number
@@ -298,6 +300,9 @@ def filtered_urls(request):
# Update query # Update query
query &= (subquery) query &= (subquery)
if (selected_min_sources > 1):
query &= Q(pk__in=UrlsSourceSearch.objects.values('id_url').annotate(search_count=Count('id_source', distinct=True)).filter(search_count__gte=selected_min_sources).values('id_url'))
# Run query # Run query
urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch') urls = Urls.objects.filter(query).distinct() # .order_by('-ts_fetch')
@@ -333,6 +338,7 @@ def filtered_urls(request):
'selected_source': selected_source, 'selected_source': selected_source,
'selected_language': selected_language, 'selected_language': selected_language,
'selected_valid_contents': selected_valid_contents, 'selected_valid_contents': selected_valid_contents,
"selected_min_sources": selected_min_sources,
"selected_days": selected_days, "selected_days": selected_days,
# Map # Map
"sources_map": sources_map, "sources_map": sources_map,

12
app_urls/script_initialize.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/bin/bash
if [ "${INITIALIZE_DB}" = false ]; then
echo "Initialization not required"
else
echo "Initializating database"
python db.py --initialize_tables --initialize_data
python manage.py makemigrations fetcher; python manage.py migrate --fake-initial
python manage.py createsuperuser --noinput
python manage.py collectstatic --no-input
python manage.py import --filename scheduled_tasks.json
fi

7
app_urls/script_run.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
if [ "${DJANGO_DEBUG}" = true ] | [ "${DJANGO_DEBUG}" == "True" ]; then
gunicorn core.wsgi:application --reload --log-level debug --bind 0.0.0.0:8000 --timeout 300 & python manage.py rqworker high default low
else
gunicorn core.wsgi:application --bind 0.0.0.0:8000 --timeout 300 & python manage.py rqworker high default low
fi

View File

@@ -7,7 +7,7 @@ services:
build: build:
context: ./app_selenium context: ./app_selenium
container_name: fetcher_app_selenium container_name: fetcher_app_selenium
# restart: unless-stopped restart: unless-stopped
shm_size: 512mb shm_size: 512mb
environment: environment:
- SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4} - SELENIUM_SLEEP_PER_PAGE=${SELENIUM_SLEEP_PER_PAGE:-4}
@@ -28,17 +28,18 @@ services:
build: build:
context: ./app_urls context: ./app_urls
container_name: fetcher_app_urls container_name: fetcher_app_urls
# restart: unless-stopped restart: unless-stopped
environment: environment:
# Initialization # Initialization
- INITIALIZE_DB=${INITIALIZE_DB:-true} - INITIALIZE_DB=${INITIALIZE_DB:-false} # Related to DB persistence
- DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos} - DJANGO_SUPERUSER_USERNAME=${DJANGO_SUPERUSER_USERNAME:-matitos}
- DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos} - DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD:-matitos}
- DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org} - DJANGO_SUPERUSER_EMAIL=${DJANGO_SUPERUSER_EMAIL:-matitos@matitos.org}
# Django # Django
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2
- DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty} - DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:-abc123456789qwerty}
- DJANGO_DEBUG=${DJANGO_DEBUG:-False} - DJANGO_DEBUG=${DJANGO_DEBUG:-False}
- DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:-*} # host1,host2 - PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-/opt/logs}
# Database # Database
- DB_NAME=${DB_NAME:-matitos} - DB_NAME=${DB_NAME:-matitos}
- DB_USER=${DB_USER:-supermatitos} - DB_USER=${DB_USER:-supermatitos}
@@ -49,8 +50,6 @@ services:
- REDIS_PORT=${REDIS_PORT:-6379} - REDIS_PORT=${REDIS_PORT:-6379}
# Job timeout: 30 min # Job timeout: 30 min
- JOB_DEFAULT_TIMEOUT=${RQ_DEFAULT_TIMEOUT:-1800} - JOB_DEFAULT_TIMEOUT=${RQ_DEFAULT_TIMEOUT:-1800}
# Logs path
- PATH_LOGS_DIRECTORY=${PATH_LOGS_DIRECTORY:-logs}
# Fetcher # Fetcher
- FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2} - FETCHER_GNEWS_DECODE_SLEEP=${FETCHER_GNEWS_DECODE_SLEEP-2}
- FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5} - FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP=${FETCHER_GOOGLE_GENERAL_PAGE_ITER_SLEEP:-5}
@@ -60,8 +59,8 @@ services:
- SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80} - SELENIUM_ENDPOINT=${SELENIUM_ENDPOINT:-http://fetcher_app_selenium:80}
- ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org} - ENDPOINT_OLLAMA=${ENDPOINT_OLLAMA:-https://ollamamodel.matitos.org}
######################## ########################
#volumes: # Dev mode volumes: # Dev mode
# - ./app_urls:/opt/app - ./app_urls:/opt/app
######################## ########################
ports: ports:
- 8000:8000 - 8000:8000
@@ -100,12 +99,12 @@ services:
ports: ports:
- 6379 #:6379 - 6379 #:6379
fetcher_dozzle: #fetcher_dozzle:
container_name: fetcher_dozzle # container_name: fetcher_dozzle
image: amir20/dozzle:latest # image: amir20/dozzle:latest
volumes: # volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro # - /var/run/docker.sock:/var/run/docker.sock:ro
ports: # ports:
- 8888:8080 # - 8888:8080
environment: # environment:
- DOZZLE_FILTER="name=fetcher_" # - DOZZLE_FILTER="name=fetcher_"