🚀 Full project sync: Hotels RAG & Audit System
✨ Major Features: - Complete RAG system for hotel website analysis - Hybrid audit with BGE-M3 embeddings + Natasha NER - Universal horizontal Excel reports with dashboards - Multi-region processing (SPb, Orel, Chukotka, Kamchatka) 📊 Completed Regions: - Орловская область: 100% (36/36) - Чукотский АО: 100% (4/4) - г. Санкт-Петербург: 93% (893/960) - Камчатский край: 87% (89/102) 🔧 Infrastructure: - PostgreSQL with pgvector extension - BGE-M3 embeddings API - Browserless for web scraping - N8N workflows for automation - S3/Nextcloud file storage 📝 Documentation: - Complete DB schemas - API documentation - Setup guides - Status reports
This commit is contained in:
59
check_spb_status.py
Normal file
59
check_spb_status.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import psycopg2
|
||||
from urllib.parse import unquote
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host='147.45.189.234',
|
||||
port=5432,
|
||||
database='default_db',
|
||||
user='gen_user',
|
||||
password=unquote('2~~9_%5EkVsU%3F2%5CS')
|
||||
)
|
||||
cur = conn.cursor()
|
||||
|
||||
print("\n📊 СТАТУС СПБ ЭМБЕДИНГОВ:\n")
|
||||
|
||||
# Всего отелей СПБ с сайтами
|
||||
cur.execute("""
|
||||
SELECT COUNT(DISTINCT id)
|
||||
FROM hotel_main
|
||||
WHERE region_name = 'г. Санкт-Петербург'
|
||||
AND website_address IS NOT NULL
|
||||
AND website_address != ''
|
||||
""")
|
||||
total_spb = cur.fetchone()[0]
|
||||
|
||||
# С эмбедингами
|
||||
cur.execute("""
|
||||
SELECT COUNT(DISTINCT c.metadata->>'hotel_id')
|
||||
FROM hotel_website_chunks c
|
||||
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
|
||||
WHERE h.region_name = 'г. Санкт-Петербург'
|
||||
AND c.embedding IS NOT NULL
|
||||
""")
|
||||
with_embeddings = cur.fetchone()[0]
|
||||
|
||||
# Chunks
|
||||
cur.execute("""
|
||||
SELECT COUNT(*)
|
||||
FROM hotel_website_chunks c
|
||||
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
|
||||
WHERE h.region_name = 'г. Санкт-Петербург'
|
||||
AND c.embedding IS NOT NULL
|
||||
""")
|
||||
total_chunks = cur.fetchone()[0]
|
||||
|
||||
remaining = total_spb - with_embeddings
|
||||
progress = 100 * with_embeddings / total_spb
|
||||
|
||||
print(f"✅ Отелей СПБ с сайтами: {total_spb}")
|
||||
print(f"🧠 С эмбедингами: {with_embeddings}")
|
||||
print(f"📈 Прогресс: {with_embeddings}/{total_spb} ({progress:.1f}%)")
|
||||
print(f"⏳ Осталось: {remaining} отелей")
|
||||
print(f"📦 Всего chunks: {total_chunks}")
|
||||
|
||||
if remaining == 0:
|
||||
print(f"\n🎉 ГОТОВО! Все отели СПБ обработаны!")
|
||||
else:
|
||||
print(f"\n⚠️ Осталось {remaining} отелей без эмбедингов")
|
||||
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user