Files
hotels/process_orel_only.py
Фёдор 0cf3297290 Проект аудита отелей: основные скрипты и документация
- Краулеры: smart_crawler.py, regional_crawler.py
- Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py
- РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py
- Отчёты: create_orel_horizontal_report.py
- Обработка: process_all_hotels_embeddings.py
- Документация: README.md, DB_SCHEMA_REFERENCE.md
2025-10-16 10:52:09 +03:00

77 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Векторизация ТОЛЬКО Орловской области (осталось 5 отелей)
"""
import sys
sys.path.insert(0, '/root/engine/public_oversight/hotels')
from process_all_hotels_embeddings import EmbeddingProcessor
import logging
# Настройка логирования
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('orel_embeddings.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def main():
logger.info("🚀 Векторизация Орловской области")
processor = EmbeddingProcessor()
try:
# Получаем отели Орла без chunks
processor.cur.execute("""
SELECT DISTINCT p.hotel_id, m.full_name
FROM hotel_website_processed p
INNER JOIN hotel_main m ON p.hotel_id = m.id
LEFT JOIN hotel_website_chunks c ON p.hotel_id::text = c.metadata->>'hotel_id'
WHERE m.region_name = 'Орловская область'
AND p.cleaned_text IS NOT NULL
AND LENGTH(p.cleaned_text) > 50
AND c.id IS NULL
ORDER BY m.full_name
""")
hotels = processor.cur.fetchall()
logger.info(f"📊 Найдено отелей Орла без chunks: {len(hotels)}")
if not hotels:
logger.info("Все отели Орловской области уже обработаны!")
return
# Обрабатываем
successful = 0
failed = 0
for i, (hotel_id, hotel_name) in enumerate(hotels, 1):
logger.info(f"\n[{i}/{len(hotels)}] 🏨 {hotel_name}")
logger.info(f" ID: {hotel_id}")
if processor.process_hotel(hotel_id):
successful += 1
logger.info(f" ✅ Успешно")
else:
failed += 1
logger.error(f" ❌ Ошибка")
logger.info(f"\n🎉 ЗАВЕРШЕНО!")
logger.info(f" ✅ Успешно: {successful}")
logger.info(f" ❌ Ошибок: {failed}")
except Exception as e:
logger.error(f"❌ Критическая ошибка: {e}")
finally:
processor.close()
if __name__ == "__main__":
main()