269 lines
9.2 KiB
Python
269 lines
9.2 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Повторная обработка неудачных отелей СПб через n8n webhook
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import psycopg2
|
|||
|
|
from psycopg2.extras import RealDictCursor
|
|||
|
|
import requests
|
|||
|
|
import json
|
|||
|
|
import time
|
|||
|
|
from urllib.parse import unquote
|
|||
|
|
|
|||
|
|
# Конфигурация
|
|||
|
|
DB_CONFIG = {
|
|||
|
|
'host': '147.45.189.234',
|
|||
|
|
'port': 5432,
|
|||
|
|
'database': 'default_db',
|
|||
|
|
'user': 'gen_user',
|
|||
|
|
'password': unquote('2~~9_%5EkVsU%3F2%5CS')
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
N8N_WEBHOOK_URL = "https://n8n.clientright.pro/webhook/6be4a7b9-a016-4252-841f-0ebca367914f"
|
|||
|
|
REGION = 'г. Санкт-Петербург'
|
|||
|
|
AUDIT_VERSION = 'v1.0_with_rkn'
|
|||
|
|
|
|||
|
|
def get_failed_hotels():
|
|||
|
|
"""Получить список отелей, которые упали в ошибку"""
|
|||
|
|
conn = psycopg2.connect(**DB_CONFIG, cursor_factory=RealDictCursor)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
# Все отели с чанками
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT DISTINCT metadata->>'hotel_id' as hotel_id
|
|||
|
|
FROM hotel_website_chunks
|
|||
|
|
WHERE metadata->>'hotel_id' IN (
|
|||
|
|
SELECT id::text FROM hotel_main WHERE region_name = %s
|
|||
|
|
)
|
|||
|
|
""", (REGION,))
|
|||
|
|
|
|||
|
|
all_hotels = {row['hotel_id'] for row in cur.fetchall()}
|
|||
|
|
|
|||
|
|
# Уже обработанные
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT hotel_id::text
|
|||
|
|
FROM hotel_audit_results
|
|||
|
|
WHERE region_name = %s AND audit_version = %s
|
|||
|
|
""", (REGION, AUDIT_VERSION))
|
|||
|
|
|
|||
|
|
processed_hotels = {row['hotel_id'] for row in cur.fetchall()}
|
|||
|
|
|
|||
|
|
# Неудачники
|
|||
|
|
failed_hotel_ids = list(all_hotels - processed_hotels)
|
|||
|
|
|
|||
|
|
# Получаем полную информацию о неудачниках
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT
|
|||
|
|
hm.id,
|
|||
|
|
hm.full_name as hotel_name,
|
|||
|
|
hm.website_address,
|
|||
|
|
hm.region_name,
|
|||
|
|
hm.rkn_registry_number as registry_number
|
|||
|
|
FROM hotel_main hm
|
|||
|
|
WHERE hm.id::text = ANY(%s)
|
|||
|
|
ORDER BY hm.full_name
|
|||
|
|
""", (failed_hotel_ids,))
|
|||
|
|
|
|||
|
|
hotels = cur.fetchall()
|
|||
|
|
|
|||
|
|
cur.close()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
return hotels
|
|||
|
|
|
|||
|
|
def get_hotel_chunks(hotel_id):
|
|||
|
|
"""Получить чанки для отеля"""
|
|||
|
|
conn = psycopg2.connect(**DB_CONFIG, cursor_factory=RealDictCursor)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT text as chunk_text, metadata
|
|||
|
|
FROM hotel_website_chunks
|
|||
|
|
WHERE metadata->>'hotel_id' = %s
|
|||
|
|
ORDER BY (metadata->>'page_number')::int, (metadata->>'chunk_index')::int
|
|||
|
|
""", (str(hotel_id),))
|
|||
|
|
|
|||
|
|
chunks = cur.fetchall()
|
|||
|
|
|
|||
|
|
cur.close()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
return chunks
|
|||
|
|
|
|||
|
|
def audit_hotel_via_webhook(hotel, chunks):
|
|||
|
|
"""Отправить отель на аудит через n8n webhook"""
|
|||
|
|
payload = {
|
|||
|
|
"hotel_id": str(hotel['id']),
|
|||
|
|
"hotel_name": hotel['hotel_name'],
|
|||
|
|
"website": hotel['website_address'] or "",
|
|||
|
|
"region_name": hotel['region_name'],
|
|||
|
|
"registry_number": hotel['registry_number'] or "",
|
|||
|
|
"chunks": [
|
|||
|
|
{
|
|||
|
|
"text": chunk['chunk_text'],
|
|||
|
|
"metadata": chunk['metadata']
|
|||
|
|
}
|
|||
|
|
for chunk in chunks
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = requests.post(
|
|||
|
|
N8N_WEBHOOK_URL,
|
|||
|
|
json=payload,
|
|||
|
|
timeout=180 # 3 минуты таймаут (для больших отелей)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if response.status_code == 200:
|
|||
|
|
try:
|
|||
|
|
result = response.json()
|
|||
|
|
# n8n может вернуть массив или объект
|
|||
|
|
if isinstance(result, list) and len(result) > 0:
|
|||
|
|
result = result[0]
|
|||
|
|
# Проверяем наличие нужных полей
|
|||
|
|
if not isinstance(result, dict):
|
|||
|
|
return False, f"Response is not a dict: {type(result)}, content: {str(result)[:200]}"
|
|||
|
|
|
|||
|
|
# Преобразуем структуру n8n в структуру для БД
|
|||
|
|
if 'found' in result and 'total_criteria' in result:
|
|||
|
|
# Новый формат от n8n
|
|||
|
|
result['total_score'] = result.get('found', 0)
|
|||
|
|
result['max_score'] = result.get('total_criteria', 17)
|
|||
|
|
result['score_percentage'] = result.get('compliance_percentage', 0.0)
|
|||
|
|
# Преобразуем criteria в criteria_results
|
|||
|
|
if 'criteria' in result:
|
|||
|
|
result['criteria_results'] = result['criteria']
|
|||
|
|
|
|||
|
|
if 'total_score' not in result:
|
|||
|
|
return False, f"Missing required fields in response: {json.dumps(result, ensure_ascii=False)[:200]}"
|
|||
|
|
return True, result
|
|||
|
|
except json.JSONDecodeError:
|
|||
|
|
return False, f"Invalid JSON response: {response.text[:100]}"
|
|||
|
|
else:
|
|||
|
|
return False, f"HTTP {response.status_code}: {response.text[:100]}"
|
|||
|
|
|
|||
|
|
except requests.exceptions.Timeout:
|
|||
|
|
return False, "Timeout after 180 seconds"
|
|||
|
|
except Exception as e:
|
|||
|
|
return False, str(e)
|
|||
|
|
|
|||
|
|
def save_audit_result(hotel, audit_result):
|
|||
|
|
"""Сохранить результат аудита"""
|
|||
|
|
conn = psycopg2.connect(**DB_CONFIG)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
cur.execute("""
|
|||
|
|
INSERT INTO hotel_audit_results (
|
|||
|
|
hotel_id,
|
|||
|
|
hotel_name,
|
|||
|
|
website,
|
|||
|
|
region_name,
|
|||
|
|
total_score,
|
|||
|
|
max_score,
|
|||
|
|
score_percentage,
|
|||
|
|
criteria_results,
|
|||
|
|
audit_version
|
|||
|
|
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|||
|
|
ON CONFLICT (hotel_id, audit_version)
|
|||
|
|
DO UPDATE SET
|
|||
|
|
total_score = EXCLUDED.total_score,
|
|||
|
|
max_score = EXCLUDED.max_score,
|
|||
|
|
score_percentage = EXCLUDED.score_percentage,
|
|||
|
|
criteria_results = EXCLUDED.criteria_results
|
|||
|
|
""", (
|
|||
|
|
hotel['id'],
|
|||
|
|
hotel['hotel_name'],
|
|||
|
|
hotel['website_address'],
|
|||
|
|
hotel['region_name'],
|
|||
|
|
audit_result['total_score'],
|
|||
|
|
audit_result['max_score'],
|
|||
|
|
audit_result['score_percentage'],
|
|||
|
|
json.dumps(audit_result['criteria_results'], ensure_ascii=False),
|
|||
|
|
AUDIT_VERSION
|
|||
|
|
))
|
|||
|
|
|
|||
|
|
conn.commit()
|
|||
|
|
return True
|
|||
|
|
except Exception as e:
|
|||
|
|
conn.rollback()
|
|||
|
|
print(f" ❌ Ошибка сохранения: {e}")
|
|||
|
|
return False
|
|||
|
|
finally:
|
|||
|
|
cur.close()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
print("🔄 ПОВТОРНАЯ ОБРАБОТКА НЕУДАЧНЫХ ОТЕЛЕЙ СПб")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
# Получаем неудачников
|
|||
|
|
failed_hotels = get_failed_hotels()
|
|||
|
|
total = len(failed_hotels)
|
|||
|
|
|
|||
|
|
print(f"📊 Найдено неудачных отелей: {total}")
|
|||
|
|
print(f"🚀 Начинаем обработку через n8n webhook...")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
success_count = 0
|
|||
|
|
error_count = 0
|
|||
|
|
start_time = time.time()
|
|||
|
|
|
|||
|
|
for idx, hotel in enumerate(failed_hotels, 1):
|
|||
|
|
print(f"[{idx}/{total}] {hotel['hotel_name']}")
|
|||
|
|
print(f" 🔗 {hotel['website_address'] or 'Нет сайта'}")
|
|||
|
|
|
|||
|
|
# Получаем чанки
|
|||
|
|
chunks = get_hotel_chunks(hotel['id'])
|
|||
|
|
print(f" 📦 Chunks: {len(chunks)}")
|
|||
|
|
|
|||
|
|
if not chunks:
|
|||
|
|
print(f" ⚠️ Нет чанков, пропускаем")
|
|||
|
|
error_count += 1
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Отправляем на аудит
|
|||
|
|
print(f" 🔍 Аудит: {hotel['hotel_name']}...")
|
|||
|
|
success, result = audit_hotel_via_webhook(hotel, chunks)
|
|||
|
|
|
|||
|
|
if success:
|
|||
|
|
# Сохраняем результат
|
|||
|
|
if save_audit_result(hotel, result):
|
|||
|
|
score = result['score_percentage']
|
|||
|
|
print(f" ✅ Успех! Балл: {score:.1f}%")
|
|||
|
|
success_count += 1
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ Ошибка сохранения")
|
|||
|
|
error_count += 1
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ Ошибка: {result}")
|
|||
|
|
error_count += 1
|
|||
|
|
|
|||
|
|
# Прогресс
|
|||
|
|
if idx % 10 == 0:
|
|||
|
|
elapsed = time.time() - start_time
|
|||
|
|
speed = idx / elapsed
|
|||
|
|
eta = (total - idx) / speed if speed > 0 else 0
|
|||
|
|
print(f"\n 📊 Прогресс: {idx}/{total} ({idx/total*100:.1f}%)")
|
|||
|
|
print(f" ⏱️ Скорость: {speed:.2f} отелей/сек")
|
|||
|
|
print(f" 🎯 ETA: {eta/60:.0f} минут\n")
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# Небольшая задержка между запросами
|
|||
|
|
time.sleep(0.5)
|
|||
|
|
|
|||
|
|
# Итоги
|
|||
|
|
print("\n" + "=" * 60)
|
|||
|
|
print("📊 ИТОГО:")
|
|||
|
|
print(f" ✅ Успешно: {success_count}")
|
|||
|
|
print(f" ❌ Ошибок: {error_count}")
|
|||
|
|
print(f" 📝 Всего отелей обработано: {success_count}")
|
|||
|
|
print(f" ⏱️ Время работы: {(time.time() - start_time)/60:.1f} минут")
|
|||
|
|
print()
|
|||
|
|
print("🎉 Повторная обработка завершена!")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|
|||
|
|
|