✨ Major Features: - Complete RAG system for hotel website analysis - Hybrid audit with BGE-M3 embeddings + Natasha NER - Universal horizontal Excel reports with dashboards - Multi-region processing (SPb, Orel, Chukotka, Kamchatka) 📊 Completed Regions: - Орловская область: 100% (36/36) - Чукотский АО: 100% (4/4) - г. Санкт-Петербург: 93% (893/960) - Камчатский край: 87% (89/102) 🔧 Infrastructure: - PostgreSQL with pgvector extension - BGE-M3 embeddings API - Browserless for web scraping - N8N workflows for automation - S3/Nextcloud file storage 📝 Documentation: - Complete DB schemas - API documentation - Setup guides - Status reports
60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
import psycopg2
|
||
from urllib.parse import unquote
|
||
|
||
conn = psycopg2.connect(
|
||
host='147.45.189.234',
|
||
port=5432,
|
||
database='default_db',
|
||
user='gen_user',
|
||
password=unquote('2~~9_%5EkVsU%3F2%5CS')
|
||
)
|
||
cur = conn.cursor()
|
||
|
||
print("\n📊 СТАТУС СПБ ЭМБЕДИНГОВ:\n")
|
||
|
||
# Всего отелей СПБ с сайтами
|
||
cur.execute("""
|
||
SELECT COUNT(DISTINCT id)
|
||
FROM hotel_main
|
||
WHERE region_name = 'г. Санкт-Петербург'
|
||
AND website_address IS NOT NULL
|
||
AND website_address != ''
|
||
""")
|
||
total_spb = cur.fetchone()[0]
|
||
|
||
# С эмбедингами
|
||
cur.execute("""
|
||
SELECT COUNT(DISTINCT c.metadata->>'hotel_id')
|
||
FROM hotel_website_chunks c
|
||
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
|
||
WHERE h.region_name = 'г. Санкт-Петербург'
|
||
AND c.embedding IS NOT NULL
|
||
""")
|
||
with_embeddings = cur.fetchone()[0]
|
||
|
||
# Chunks
|
||
cur.execute("""
|
||
SELECT COUNT(*)
|
||
FROM hotel_website_chunks c
|
||
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
|
||
WHERE h.region_name = 'г. Санкт-Петербург'
|
||
AND c.embedding IS NOT NULL
|
||
""")
|
||
total_chunks = cur.fetchone()[0]
|
||
|
||
remaining = total_spb - with_embeddings
|
||
progress = 100 * with_embeddings / total_spb
|
||
|
||
print(f"✅ Отелей СПБ с сайтами: {total_spb}")
|
||
print(f"🧠 С эмбедингами: {with_embeddings}")
|
||
print(f"📈 Прогресс: {with_embeddings}/{total_spb} ({progress:.1f}%)")
|
||
print(f"⏳ Осталось: {remaining} отелей")
|
||
print(f"📦 Всего chunks: {total_chunks}")
|
||
|
||
if remaining == 0:
|
||
print(f"\n🎉 ГОТОВО! Все отели СПБ обработаны!")
|
||
else:
|
||
print(f"\n⚠️ Осталось {remaining} отелей без эмбедингов")
|
||
|
||
conn.close()
|