✨ Major Features: - Complete RAG system for hotel website analysis - Hybrid audit with BGE-M3 embeddings + Natasha NER - Universal horizontal Excel reports with dashboards - Multi-region processing (SPb, Orel, Chukotka, Kamchatka) 📊 Completed Regions: - Орловская область: 100% (36/36) - Чукотский АО: 100% (4/4) - г. Санкт-Петербург: 93% (893/960) - Камчатский край: 87% (89/102) 🔧 Infrastructure: - PostgreSQL with pgvector extension - BGE-M3 embeddings API - Browserless for web scraping - N8N workflows for automation - S3/Nextcloud file storage 📝 Documentation: - Complete DB schemas - API documentation - Setup guides - Status reports
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
import subprocess
|
|
import os
|
|
|
|
os.chdir('/root/engine/public_oversight/hotels')
|
|
|
|
# Проверяем процессы
|
|
result = subprocess.run(['ps', 'aux'], capture_output=True, text=True, timeout=5)
|
|
print("🔍 ПРОЦЕССЫ КРАУЛЕРА:")
|
|
for line in result.stdout.split('\n'):
|
|
if 'mass_crawler.py' in line and 'grep' not in line:
|
|
parts = line.split()
|
|
print(f"\n✅ PID: {parts[1]}")
|
|
print(f" CPU: {parts[2]}%")
|
|
print(f" RAM: {parts[3]}%")
|
|
print(f" Команда: {' '.join(parts[10:])}")
|
|
|
|
# Проверяем последний лог
|
|
import glob
|
|
from datetime import datetime
|
|
|
|
logs = sorted(glob.glob('mass_crawler_*.log'), key=os.path.getmtime, reverse=True)
|
|
if logs:
|
|
latest = logs[0]
|
|
mtime = datetime.fromtimestamp(os.path.getmtime(latest)).strftime('%H:%M:%S')
|
|
size = os.path.getsize(latest)
|
|
|
|
print(f"\n📄 ПОСЛЕДНИЙ ЛОГ: {latest}")
|
|
print(f" Изменён: {mtime}")
|
|
print(f" Размер: {size:,} байт")
|
|
|
|
# Последние 10 строк
|
|
with open(latest, 'r') as f:
|
|
lines = f.readlines()
|
|
print(f"\n📋 ПОСЛЕДНИЕ СТРОКИ ({len(lines)} всего):")
|
|
for line in lines[-10:]:
|
|
if line.strip():
|
|
print(f" {line.rstrip()}")
|
|
|
|
|
|
|
|
|
|
|
|
|