✨ Major Features: - Complete RAG system for hotel website analysis - Hybrid audit with BGE-M3 embeddings + Natasha NER - Universal horizontal Excel reports with dashboards - Multi-region processing (SPb, Orel, Chukotka, Kamchatka) 📊 Completed Regions: - Орловская область: 100% (36/36) - Чукотский АО: 100% (4/4) - г. Санкт-Петербург: 93% (893/960) - Камчатский край: 87% (89/102) 🔧 Infrastructure: - PostgreSQL with pgvector extension - BGE-M3 embeddings API - Browserless for web scraping - N8N workflows for automation - S3/Nextcloud file storage 📝 Documentation: - Complete DB schemas - API documentation - Setup guides - Status reports
74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Тестовый скрипт для проверки обработки данных
|
||
"""
|
||
|
||
import psycopg2
|
||
import json
|
||
from urllib.parse import unquote
|
||
|
||
# Конфигурация БД
|
||
DB_CONFIG = {
|
||
'host': '147.45.189.234',
|
||
'port': 5432,
|
||
'database': 'default_db',
|
||
'user': 'gen_user',
|
||
'password': unquote('2~~9_%5EkVsU%3F2%5CS')
|
||
}
|
||
|
||
def test_data_processing():
|
||
"""Тестируем обработку данных"""
|
||
conn = psycopg2.connect(**DB_CONFIG)
|
||
cur = conn.cursor()
|
||
|
||
cur.execute('SELECT criteria_results FROM hotel_audit_results LIMIT 1')
|
||
row = cur.fetchone()
|
||
|
||
if row:
|
||
criteria = row[0]
|
||
print('🔍 Исходные данные из БД:')
|
||
print(f' Тип: {type(criteria)}')
|
||
print(f' Длина: {len(criteria)}')
|
||
|
||
# Проверяем критерий 2
|
||
criterion_02 = criteria.get('criterion_02', {})
|
||
print(f'\n📋 Критерий 2 (Адрес):')
|
||
print(f' found: {criterion_02.get("found")}')
|
||
print(f' approval_urls: {criterion_02.get("approval_urls")}')
|
||
print(f' quote: {criterion_02.get("quote", "")[:50]}...')
|
||
|
||
# Тестируем обработку
|
||
print(f'\n🔧 Тестируем обработку:')
|
||
|
||
# URL
|
||
url = '-'
|
||
if criterion_02.get('approval_urls'):
|
||
url = criterion_02['approval_urls'][0]
|
||
print(f' URL: {url}')
|
||
|
||
# Комментарий
|
||
comment = "Не найдено"
|
||
if criterion_02['found']:
|
||
if criterion_02.get('quote'):
|
||
comment = criterion_02['quote']
|
||
elif criterion_02.get('approval_quotes'):
|
||
first_quote = criterion_02['approval_quotes'][0]
|
||
if isinstance(first_quote, dict):
|
||
comment = first_quote.get('quote', 'Найдено')
|
||
else:
|
||
comment = str(first_quote)
|
||
else:
|
||
comment = "Найдено"
|
||
|
||
comment = comment[:100] + "..." if len(comment) > 100 else comment
|
||
|
||
print(f' Комментарий: {comment[:50]}...')
|
||
|
||
cur.close()
|
||
conn.close()
|
||
|
||
if __name__ == "__main__":
|
||
test_data_processing()
|
||
|
||
|