Files
hotels/check_progress.sh

51 lines
1.6 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
echo "📊 СТАТУС ФОНОВЫХ ПРОЦЕССОВ"
echo "============================================================"
echo ""
# Проверяем процессы
echo "🔍 Активные процессы:"
ps aux | grep -E "smart_crawler|process_all_hotels_embeddings" | grep -v grep | awk '{print " PID: "$2" - "$11" "$12" "$13}'
echo ""
echo "📝 Последние логи краулера:"
tail -5 smart_crawler_output_*.log 2>/dev/null | grep -E "INFO|ERROR" | tail -3
echo ""
echo "📝 Последние логи чанкинизации:"
tail -5 embeddings_processing_*.log 2>/dev/null | grep -E "INFO|ERROR|отелей|chunks" | tail -3
echo ""
echo "📈 Статистика из БД:"
python3 << 'PYEOF'
import psycopg2
from psycopg2.extras import RealDictCursor
from urllib.parse import unquote
DB_CONFIG = {
'host': '147.45.189.234',
'port': 5432,
'database': 'default_db',
'user': 'gen_user',
'password': unquote('2~~9_%5EkVsU%3F2%5CS')
}
conn = psycopg2.connect(**DB_CONFIG, cursor_factory=RealDictCursor)
cur = conn.cursor()
cur.execute("SELECT COUNT(DISTINCT hotel_id) as count FROM hotel_website_raw")
crawled = cur.fetchone()['count']
cur.execute("SELECT COUNT(DISTINCT metadata->>'hotel_id') as count FROM hotel_website_chunks WHERE metadata->>'hotel_id' IS NOT NULL")
chunked = cur.fetchone()['count']
cur.execute("SELECT COUNT(*) as count FROM hotel_website_chunks")
total_chunks = cur.fetchone()['count']
print(f" 🕷️ Краулинг: {crawled:,} отелей")
print(f" 📦 Chunks: {chunked:,} отелей ({total_chunks:,} chunks)")
cur.close()
conn.close()
PYEOF