Files
hotels/process_kamchatka_only.py
Фёдор 0cf3297290 Проект аудита отелей: основные скрипты и документация
- Краулеры: smart_crawler.py, regional_crawler.py
- Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py
- РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py
- Отчёты: create_orel_horizontal_report.py
- Обработка: process_all_hotels_embeddings.py
- Документация: README.md, DB_SCHEMA_REFERENCE.md
2025-10-16 10:52:09 +03:00

77 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Векторизация ТОЛЬКО Камчатского края (осталось 7 отелей)
"""
import sys
sys.path.insert(0, '/root/engine/public_oversight/hotels')
from process_all_hotels_embeddings import EmbeddingProcessor
import logging
# Настройка логирования
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('kamchatka_embeddings.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def main():
logger.info("🚀 Векторизация Камчатского края")
processor = EmbeddingProcessor()
try:
# Получаем отели Камчатки без chunks
processor.cur.execute("""
SELECT DISTINCT p.hotel_id, m.full_name
FROM hotel_website_processed p
INNER JOIN hotel_main m ON p.hotel_id = m.id
LEFT JOIN hotel_website_chunks c ON p.hotel_id::text = c.metadata->>'hotel_id'
WHERE m.region_name = 'Камчатский край'
AND p.cleaned_text IS NOT NULL
AND LENGTH(p.cleaned_text) > 50
AND c.id IS NULL
ORDER BY m.full_name
""")
hotels = processor.cur.fetchall()
logger.info(f"📊 Найдено отелей Камчатки без chunks: {len(hotels)}")
if not hotels:
logger.info("Все отели Камчатки уже обработаны!")
return
# Обрабатываем
successful = 0
failed = 0
for i, (hotel_id, hotel_name) in enumerate(hotels, 1):
logger.info(f"\n[{i}/{len(hotels)}] 🏨 {hotel_name}")
logger.info(f" ID: {hotel_id}")
if processor.process_hotel(hotel_id):
successful += 1
logger.info(f" ✅ Успешно")
else:
failed += 1
logger.error(f" ❌ Ошибка")
logger.info(f"\n🎉 ЗАВЕРШЕНО!")
logger.info(f" ✅ Успешно: {successful}")
logger.info(f" ❌ Ошибок: {failed}")
except Exception as e:
logger.error(f"❌ Критическая ошибка: {e}")
finally:
processor.close()
if __name__ == "__main__":
main()