✨ Major Features: - Complete RAG system for hotel website analysis - Hybrid audit with BGE-M3 embeddings + Natasha NER - Universal horizontal Excel reports with dashboards - Multi-region processing (SPb, Orel, Chukotka, Kamchatka) 📊 Completed Regions: - Орловская область: 100% (36/36) - Чукотский АО: 100% (4/4) - г. Санкт-Петербург: 93% (893/960) - Камчатский край: 87% (89/102) 🔧 Infrastructure: - PostgreSQL with pgvector extension - BGE-M3 embeddings API - Browserless for web scraping - N8N workflows for automation - S3/Nextcloud file storage 📝 Documentation: - Complete DB schemas - API documentation - Setup guides - Status reports
161 lines
5.7 KiB
Python
161 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
ИСПРАВЛЕННЫЙ CSV генератор
|
||
"""
|
||
|
||
import psycopg2
|
||
import json
|
||
import csv
|
||
from urllib.parse import unquote
|
||
from datetime import datetime
|
||
|
||
# Конфигурация БД
|
||
DB_CONFIG = {
|
||
'host': '147.45.189.234',
|
||
'port': 5432,
|
||
'database': 'default_db',
|
||
'user': 'gen_user',
|
||
'password': unquote('2~~9_%5EkVsU%3F2%5CS')
|
||
}
|
||
|
||
def get_audit_results_from_db():
|
||
"""Получить результаты аудита из БД"""
|
||
try:
|
||
conn = psycopg2.connect(**DB_CONFIG)
|
||
cursor = conn.cursor()
|
||
|
||
cursor.execute("""
|
||
SELECT
|
||
ar.hotel_id,
|
||
hm.full_name,
|
||
hm.website_address,
|
||
hm.rkn_registry_status,
|
||
hm.rkn_registry_number,
|
||
hm.rkn_registry_date,
|
||
ar.score_percentage,
|
||
ar.criteria_results,
|
||
hm.created_at
|
||
FROM hotel_audit_results ar
|
||
JOIN hotel_main hm ON ar.hotel_id = hm.id
|
||
WHERE hm.region_name = 'Чукотский автономный округ'
|
||
ORDER BY hm.created_at DESC
|
||
""")
|
||
|
||
results = []
|
||
for row in cursor.fetchall():
|
||
result = {
|
||
'hotel_id': row[0],
|
||
'full_name': row[1],
|
||
'website_address': row[2],
|
||
'rkn_registry_status': row[3],
|
||
'rkn_registry_number': row[4],
|
||
'rkn_registry_date': row[5],
|
||
'score_percentage': row[6],
|
||
'criteria_results': row[7],
|
||
'created_at': row[8]
|
||
}
|
||
results.append(result)
|
||
|
||
cursor.close()
|
||
conn.close()
|
||
return results
|
||
|
||
except Exception as e:
|
||
print(f"❌ Ошибка получения данных: {e}")
|
||
return []
|
||
|
||
def create_csv_report(results):
|
||
"""Создать CSV отчёт"""
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
filename = f"audit_fixed_{timestamp}.csv"
|
||
|
||
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
||
writer = csv.writer(csvfile)
|
||
|
||
# Заголовки - ТОЛЬКО БАЗОВЫЕ + РКН
|
||
headers = [
|
||
'ID отеля', 'Название отеля', 'Сайт', 'Балл (%)', 'Дата аудита',
|
||
'РКН_Реестр', 'РКН_Номер_Дата', 'РКН_Ссылка'
|
||
]
|
||
|
||
writer.writerow(headers)
|
||
|
||
# Данные
|
||
for result in results:
|
||
# Базовые данные
|
||
row = [
|
||
result['hotel_id'],
|
||
result['full_name'],
|
||
result['website_address'] or '-',
|
||
result['score_percentage'],
|
||
str(result['created_at'])[:10]
|
||
]
|
||
|
||
# РКН данные из hotel_main
|
||
rkn_status = result.get('rkn_registry_status', '')
|
||
rkn_in_registry = "ДА" if rkn_status and rkn_status.lower() == 'found' else "НЕТ"
|
||
rkn_number = result.get('rkn_registry_number', '')
|
||
rkn_date = result.get('rkn_registry_date', '')
|
||
rkn_info_text = f"{rkn_number}\n{rkn_date}" if rkn_number or rkn_date else "-"
|
||
rkn_url = f"https://rkn.gov.ru/mass-communications/reestr/search/?q={rkn_number}" if rkn_number else "-"
|
||
|
||
row.extend([rkn_in_registry, rkn_info_text, rkn_url])
|
||
|
||
writer.writerow(row)
|
||
|
||
return filename
|
||
|
||
def main():
|
||
"""Основная функция"""
|
||
print("🚀 ГЕНЕРАЦИЯ ПРОСТОГО CSV")
|
||
print("=" * 30)
|
||
|
||
try:
|
||
results = get_audit_results_from_db()
|
||
|
||
if not results:
|
||
print("❌ Нет данных для отчёта")
|
||
return
|
||
|
||
print(f"✅ Получено результатов: {len(results)}")
|
||
|
||
# Выводим сырые данные первого отеля
|
||
if results:
|
||
print("\n🔍 СЫРЫЕ ДАННЫЕ ПЕРВОГО ОТЕЛЯ:")
|
||
print(f"ID: {results[0]['hotel_id']}")
|
||
print(f"Название: {results[0]['full_name']}")
|
||
print(f"Сайт: {results[0]['website_address']}")
|
||
print(f"РКН статус: {results[0]['rkn_registry_status']}")
|
||
print(f"РКН номер: {results[0]['rkn_registry_number']}")
|
||
print(f"РКН дата: {results[0]['rkn_registry_date']}")
|
||
print(f"Балл: {results[0]['score_percentage']}")
|
||
|
||
# Проверяем критерии
|
||
criteria_results = results[0]['criteria_results']
|
||
print(f"\n📊 КРИТЕРИИ (тип: {type(criteria_results)}):")
|
||
if isinstance(criteria_results, str):
|
||
try:
|
||
parsed = json.loads(criteria_results)
|
||
print(f" JSON парсится: {len(parsed)} элементов")
|
||
if parsed and isinstance(parsed[0], dict):
|
||
print(f" Первый критерий: {parsed[0]}")
|
||
except:
|
||
print(f" Не JSON: {criteria_results[:100]}...")
|
||
else:
|
||
print(f" Не строка: {criteria_results}")
|
||
|
||
# Создаём CSV
|
||
filename = create_csv_report(results)
|
||
|
||
print(f"\n✅ CSV файл сохранён: {filename}")
|
||
print(f"📊 Обработано отелей: {len(results)}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Ошибка: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|