Files
hotels/process_kamchatka_only.py

77 lines
2.4 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
Векторизация ТОЛЬКО Камчатского края (осталось 7 отелей)
"""
import sys
sys.path.insert(0, '/root/engine/public_oversight/hotels')
from process_all_hotels_embeddings import EmbeddingProcessor
import logging
# Настройка логирования
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('kamchatka_embeddings.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def main():
logger.info("🚀 Векторизация Камчатского края")
processor = EmbeddingProcessor()
try:
# Получаем отели Камчатки без chunks
processor.cur.execute("""
SELECT DISTINCT p.hotel_id, m.full_name
FROM hotel_website_processed p
INNER JOIN hotel_main m ON p.hotel_id = m.id
LEFT JOIN hotel_website_chunks c ON p.hotel_id::text = c.metadata->>'hotel_id'
WHERE m.region_name = 'Камчатский край'
AND p.cleaned_text IS NOT NULL
AND LENGTH(p.cleaned_text) > 50
AND c.id IS NULL
ORDER BY m.full_name
""")
hotels = processor.cur.fetchall()
logger.info(f"📊 Найдено отелей Камчатки без chunks: {len(hotels)}")
if not hotels:
logger.info("Все отели Камчатки уже обработаны!")
return
# Обрабатываем
successful = 0
failed = 0
for i, (hotel_id, hotel_name) in enumerate(hotels, 1):
logger.info(f"\n[{i}/{len(hotels)}] 🏨 {hotel_name}")
logger.info(f" ID: {hotel_id}")
if processor.process_hotel(hotel_id):
successful += 1
logger.info(f" ✅ Успешно")
else:
failed += 1
logger.error(f" ❌ Ошибка")
logger.info(f"\n🎉 ЗАВЕРШЕНО!")
logger.info(f" ✅ Успешно: {successful}")
logger.info(f" ❌ Ошибок: {failed}")
except Exception as e:
logger.error(f"❌ Критическая ошибка: {e}")
finally:
processor.close()
if __name__ == "__main__":
main()