Files
hotels/check_spb_status.py

60 lines
1.8 KiB
Python
Raw Permalink Normal View History

import psycopg2
from urllib.parse import unquote
conn = psycopg2.connect(
host='147.45.189.234',
port=5432,
database='default_db',
user='gen_user',
password=unquote('2~~9_%5EkVsU%3F2%5CS')
)
cur = conn.cursor()
print("\n📊 СТАТУС СПБ ЭМБЕДИНГОВ:\n")
# Всего отелей СПБ с сайтами
cur.execute("""
SELECT COUNT(DISTINCT id)
FROM hotel_main
WHERE region_name = 'г. Санкт-Петербург'
AND website_address IS NOT NULL
AND website_address != ''
""")
total_spb = cur.fetchone()[0]
# С эмбедингами
cur.execute("""
SELECT COUNT(DISTINCT c.metadata->>'hotel_id')
FROM hotel_website_chunks c
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
WHERE h.region_name = 'г. Санкт-Петербург'
AND c.embedding IS NOT NULL
""")
with_embeddings = cur.fetchone()[0]
# Chunks
cur.execute("""
SELECT COUNT(*)
FROM hotel_website_chunks c
JOIN hotel_main h ON (c.metadata->>'hotel_id') = h.id::text
WHERE h.region_name = 'г. Санкт-Петербург'
AND c.embedding IS NOT NULL
""")
total_chunks = cur.fetchone()[0]
remaining = total_spb - with_embeddings
progress = 100 * with_embeddings / total_spb
print(f"✅ Отелей СПБ с сайтами: {total_spb}")
print(f"🧠 С эмбедингами: {with_embeddings}")
print(f"📈 Прогресс: {with_embeddings}/{total_spb} ({progress:.1f}%)")
print(f"⏳ Осталось: {remaining} отелей")
print(f"📦 Всего chunks: {total_chunks}")
if remaining == 0:
print(f"\n🎉 ГОТОВО! Все отели СПБ обработаны!")
else:
print(f"\n⚠️ Осталось {remaining} отелей без эмбедингов")
conn.close()