58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
|
|
import psycopg2
|
|||
|
|
from urllib.parse import unquote
|
|||
|
|
|
|||
|
|
conn = psycopg2.connect(
|
|||
|
|
host='147.45.189.234',
|
|||
|
|
port=5432,
|
|||
|
|
database='default_db',
|
|||
|
|
user='gen_user',
|
|||
|
|
password=unquote('2~~9_%5EkVsU%3F2%5CS')
|
|||
|
|
)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
print("\n🔍 АНАЛИЗ ОСТАВШИХСЯ 67 ОТЕЛЕЙ:\n")
|
|||
|
|
|
|||
|
|
# Отели с сайтами но без эмбедингов
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT h.id, h.full_name, h.website_address
|
|||
|
|
FROM hotel_main h
|
|||
|
|
WHERE h.region_name = 'г. Санкт-Петербург'
|
|||
|
|
AND h.website_address IS NOT NULL
|
|||
|
|
AND h.website_address != ''
|
|||
|
|
AND h.id NOT IN (
|
|||
|
|
SELECT (c.metadata->>'hotel_id')::uuid
|
|||
|
|
FROM hotel_website_chunks c
|
|||
|
|
WHERE c.embedding IS NOT NULL
|
|||
|
|
)
|
|||
|
|
LIMIT 10
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
print("📋 Примеры отелей без эмбедингов:")
|
|||
|
|
for row in cur.fetchall():
|
|||
|
|
print(f" - {row[1][:50]}: {row[2]}")
|
|||
|
|
|
|||
|
|
# Есть ли у них данные в hotel_website_processed?
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT COUNT(DISTINCT p.hotel_id)
|
|||
|
|
FROM hotel_website_processed p
|
|||
|
|
JOIN hotel_main h ON p.hotel_id = h.id
|
|||
|
|
WHERE h.region_name = 'г. Санкт-Петербург'
|
|||
|
|
AND h.id NOT IN (
|
|||
|
|
SELECT (c.metadata->>'hotel_id')::uuid
|
|||
|
|
FROM hotel_website_chunks c
|
|||
|
|
WHERE c.embedding IS NOT NULL
|
|||
|
|
)
|
|||
|
|
""")
|
|||
|
|
in_processed = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
print(f"\n📊 Из 67 отелей:")
|
|||
|
|
print(f" ✅ Есть в hotel_website_processed: {in_processed}")
|
|||
|
|
print(f" ❌ Нет в hotel_website_processed: {67 - in_processed}")
|
|||
|
|
|
|||
|
|
if in_processed > 0:
|
|||
|
|
print(f"\n✅ Скрипт должен их обработать!")
|
|||
|
|
else:
|
|||
|
|
print(f"\n❌ У этих отелей не спарсились сайты - эмбединги невозможны")
|
|||
|
|
|
|||
|
|
conn.close()
|