Files
hotels/check_progress.sh
Фёдор 0cf3297290 Проект аудита отелей: основные скрипты и документация
- Краулеры: smart_crawler.py, regional_crawler.py
- Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py
- РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py
- Отчёты: create_orel_horizontal_report.py
- Обработка: process_all_hotels_embeddings.py
- Документация: README.md, DB_SCHEMA_REFERENCE.md
2025-10-16 10:52:09 +03:00

51 lines
1.6 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
echo "📊 СТАТУС ФОНОВЫХ ПРОЦЕССОВ"
echo "============================================================"
echo ""
# Проверяем процессы
echo "🔍 Активные процессы:"
ps aux | grep -E "smart_crawler|process_all_hotels_embeddings" | grep -v grep | awk '{print " PID: "$2" - "$11" "$12" "$13}'
echo ""
echo "📝 Последние логи краулера:"
tail -5 smart_crawler_output_*.log 2>/dev/null | grep -E "INFO|ERROR" | tail -3
echo ""
echo "📝 Последние логи чанкинизации:"
tail -5 embeddings_processing_*.log 2>/dev/null | grep -E "INFO|ERROR|отелей|chunks" | tail -3
echo ""
echo "📈 Статистика из БД:"
python3 << 'PYEOF'
import psycopg2
from psycopg2.extras import RealDictCursor
from urllib.parse import unquote
DB_CONFIG = {
'host': '147.45.189.234',
'port': 5432,
'database': 'default_db',
'user': 'gen_user',
'password': unquote('2~~9_%5EkVsU%3F2%5CS')
}
conn = psycopg2.connect(**DB_CONFIG, cursor_factory=RealDictCursor)
cur = conn.cursor()
cur.execute("SELECT COUNT(DISTINCT hotel_id) as count FROM hotel_website_raw")
crawled = cur.fetchone()['count']
cur.execute("SELECT COUNT(DISTINCT metadata->>'hotel_id') as count FROM hotel_website_chunks WHERE metadata->>'hotel_id' IS NOT NULL")
chunked = cur.fetchone()['count']
cur.execute("SELECT COUNT(*) as count FROM hotel_website_chunks")
total_chunks = cur.fetchone()['count']
print(f" 🕷️ Краулинг: {crawled:,} отелей")
print(f" 📦 Chunks: {chunked:,} отелей ({total_chunks:,} chunks)")
cur.close()
conn.close()
PYEOF