Files
hotels/check_spb_status.py
Фёдор 684fada337 🚀 Full project sync: Hotels RAG & Audit System
 Major Features:
- Complete RAG system for hotel website analysis
- Hybrid audit with BGE-M3 embeddings + Natasha NER
- Universal horizontal Excel reports with dashboards
- Multi-region processing (SPb, Orel, Chukotka, Kamchatka)

📊 Completed Regions:
- Орловская область: 100% (36/36)
- Чукотский АО: 100% (4/4)
- г. Санкт-Петербург: 93% (893/960)
- Камчатский край: 87% (89/102)

🔧 Infrastructure:
- PostgreSQL with pgvector extension
- BGE-M3 embeddings API
- Browserless for web scraping
- N8N workflows for automation
- S3/Nextcloud file storage

📝 Documentation:
- Complete DB schemas
- API documentation
- Setup guides
- Status reports
2025-10-27 22:49:42 +03:00

60 lines
1.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import psycopg2
from urllib.parse import unquote
conn = psycopg2.connect(
host='147.45.189.234',
port=5432,
database='default_db',
user='gen_user',
password=unquote('2~~9_%5EkVsU%3F2%5CS')
)
cur = conn.cursor()
print("\n📊 СТАТУС СПБ ЭМБЕДИНГОВ:\n")
# Всего отелей СПБ с сайтами
cur.execute("""
SELECT COUNT(DISTINCT id)
FROM hotel_main
WHERE region_name = 'г. Санкт-Петербург'
AND website_address IS NOT NULL
AND website_address != ''
""")
total_spb = cur.fetchone()[0]
# С эмбедингами
cur.execute("""
SELECT COUNT(DISTINCT c.metadata->>'hotel_id')
FROM hotel_website_chunks c
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
WHERE h.region_name = 'г. Санкт-Петербург'
AND c.embedding IS NOT NULL
""")
with_embeddings = cur.fetchone()[0]
# Chunks
cur.execute("""
SELECT COUNT(*)
FROM hotel_website_chunks c
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
WHERE h.region_name = 'г. Санкт-Петербург'
AND c.embedding IS NOT NULL
""")
total_chunks = cur.fetchone()[0]
remaining = total_spb - with_embeddings
progress = 100 * with_embeddings / total_spb
print(f"✅ Отелей СПБ с сайтами: {total_spb}")
print(f"🧠 С эмбедингами: {with_embeddings}")
print(f"📈 Прогресс: {with_embeddings}/{total_spb} ({progress:.1f}%)")
print(f"⏳ Осталось: {remaining} отелей")
print(f"📦 Всего chunks: {total_chunks}")
if remaining == 0:
print(f"\n🎉 ГОТОВО! Все отели СПБ обработаны!")
else:
print(f"\n⚠️ Осталось {remaining} отелей без эмбедингов")
conn.close()