150 lines
5.8 KiB
Python
150 lines
5.8 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Экспорт отчета о доступности сайтов в Excel
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import psycopg2
|
|||
|
|
from psycopg2.extras import RealDictCursor
|
|||
|
|
from urllib.parse import unquote
|
|||
|
|
from datetime import datetime
|
|||
|
|
import pandas as pd
|
|||
|
|
|
|||
|
|
# Конфигурация БД
|
|||
|
|
DB_CONFIG = {
|
|||
|
|
'host': "147.45.189.234",
|
|||
|
|
'port': 5432,
|
|||
|
|
'database': "default_db",
|
|||
|
|
'user': "gen_user",
|
|||
|
|
'password': unquote("2~~9_%5EkVsU%3F2%5CS")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def export_website_status_report(region_name=None, output_file=None):
|
|||
|
|
"""Экспорт отчета в Excel"""
|
|||
|
|
|
|||
|
|
conn = psycopg2.connect(**DB_CONFIG, cursor_factory=RealDictCursor)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
# Формируем запрос
|
|||
|
|
where_clause = ""
|
|||
|
|
params = []
|
|||
|
|
if region_name:
|
|||
|
|
where_clause = "WHERE region_name ILIKE %s"
|
|||
|
|
params = [f'%{region_name}%']
|
|||
|
|
|
|||
|
|
# Получаем данные
|
|||
|
|
query = f'''
|
|||
|
|
SELECT
|
|||
|
|
region_name as "Регион",
|
|||
|
|
full_name as "Название отеля",
|
|||
|
|
website_address as "Адрес сайта",
|
|||
|
|
CASE website_status
|
|||
|
|
WHEN 'accessible' THEN '✅ Доступен'
|
|||
|
|
WHEN 'no_website' THEN '❌ Отсутствует'
|
|||
|
|
WHEN 'timeout' THEN '⏱️ Таймаут'
|
|||
|
|
WHEN 'connection_refused' THEN '🚫 Соединение отклонено'
|
|||
|
|
WHEN 'dns_error' THEN '🔍 DNS ошибка'
|
|||
|
|
WHEN 'ssl_error' THEN '🔒 SSL ошибка'
|
|||
|
|
WHEN 'http_error' THEN '⚠️ HTTP ошибка'
|
|||
|
|
WHEN 'invalid_url' THEN '❓ Неверный URL'
|
|||
|
|
ELSE '⏳ Не проверено'
|
|||
|
|
END as "Статус сайта",
|
|||
|
|
CASE
|
|||
|
|
WHEN website_status = 'accessible' THEN 'Да'
|
|||
|
|
WHEN website_status = 'no_website' THEN 'Нет'
|
|||
|
|
ELSE 'Есть, но недоступен'
|
|||
|
|
END as "Наличие сайта",
|
|||
|
|
phone as "Телефон",
|
|||
|
|
email as "Email",
|
|||
|
|
owner_full_name as "Владелец",
|
|||
|
|
owner_inn as "ИНН"
|
|||
|
|
FROM hotel_main
|
|||
|
|
{where_clause}
|
|||
|
|
ORDER BY region_name, website_status, full_name
|
|||
|
|
'''
|
|||
|
|
|
|||
|
|
cur.execute(query, params)
|
|||
|
|
data = cur.fetchall()
|
|||
|
|
|
|||
|
|
# Создаем DataFrame
|
|||
|
|
df = pd.DataFrame(data)
|
|||
|
|
|
|||
|
|
# Получаем статистику
|
|||
|
|
stats_query = f'''
|
|||
|
|
SELECT
|
|||
|
|
region_name as "Регион",
|
|||
|
|
COUNT(*) as "Всего отелей",
|
|||
|
|
COUNT(CASE WHEN website_address IS NOT NULL AND website_address != '' AND website_address != '-' THEN 1 END) as "С указанным сайтом",
|
|||
|
|
COUNT(CASE WHEN website_status = 'accessible' THEN 1 END) as "Сайт доступен",
|
|||
|
|
COUNT(CASE WHEN website_status IN ('timeout', 'connection_refused', 'dns_error', 'ssl_error', 'http_error', 'invalid_url') THEN 1 END) as "Сайт недоступен",
|
|||
|
|
COUNT(CASE WHEN website_status = 'no_website' THEN 1 END) as "Сайт отсутствует",
|
|||
|
|
ROUND(COUNT(CASE WHEN website_status = 'accessible' THEN 1 END) * 100.0 / NULLIF(COUNT(CASE WHEN website_address IS NOT NULL AND website_address != '' AND website_address != '-' THEN 1 END), 0), 2) as "% доступности"
|
|||
|
|
FROM hotel_main
|
|||
|
|
{where_clause}
|
|||
|
|
GROUP BY region_name
|
|||
|
|
ORDER BY region_name
|
|||
|
|
'''
|
|||
|
|
|
|||
|
|
cur.execute(stats_query, params)
|
|||
|
|
stats_data = cur.fetchall()
|
|||
|
|
stats_df = pd.DataFrame(stats_data)
|
|||
|
|
|
|||
|
|
cur.close()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
# Формируем имя файла
|
|||
|
|
if not output_file:
|
|||
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
region_suffix = f"_{region_name.replace(' ', '_')}" if region_name else "_все_регионы"
|
|||
|
|
output_file = f"website_status_report{region_suffix}_{timestamp}.xlsx"
|
|||
|
|
|
|||
|
|
# Сохраняем в Excel
|
|||
|
|
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
|||
|
|
# Лист со статистикой
|
|||
|
|
stats_df.to_excel(writer, sheet_name='Статистика', index=False)
|
|||
|
|
|
|||
|
|
# Лист с детальными данными
|
|||
|
|
df.to_excel(writer, sheet_name='Детальные данные', index=False)
|
|||
|
|
|
|||
|
|
# Лист с проблемными сайтами
|
|||
|
|
problematic_df = df[df['Наличие сайта'] == 'Есть, но недоступен'].copy()
|
|||
|
|
problematic_df.to_excel(writer, sheet_name='Недоступные сайты', index=False)
|
|||
|
|
|
|||
|
|
# Автоматическая ширина колонок
|
|||
|
|
for sheet_name in writer.sheets:
|
|||
|
|
worksheet = writer.sheets[sheet_name]
|
|||
|
|
for column in worksheet.columns:
|
|||
|
|
max_length = 0
|
|||
|
|
column_letter = column[0].column_letter
|
|||
|
|
for cell in column:
|
|||
|
|
try:
|
|||
|
|
if len(str(cell.value)) > max_length:
|
|||
|
|
max_length = len(str(cell.value))
|
|||
|
|
except:
|
|||
|
|
pass
|
|||
|
|
adjusted_width = min(max_length + 2, 50)
|
|||
|
|
worksheet.column_dimensions[column_letter].width = adjusted_width
|
|||
|
|
|
|||
|
|
print(f"✅ Отчет сохранен: {output_file}")
|
|||
|
|
print(f"📊 Всего записей: {len(df)}")
|
|||
|
|
print(f"🔴 Недоступных сайтов: {len(problematic_df)}")
|
|||
|
|
|
|||
|
|
return output_file
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
region = sys.argv[1] if len(sys.argv) > 1 else None
|
|||
|
|
output = sys.argv[2] if len(sys.argv) > 2 else None
|
|||
|
|
|
|||
|
|
if region:
|
|||
|
|
print(f"📍 Генерация отчета для региона: {region}")
|
|||
|
|
else:
|
|||
|
|
print("📍 Генерация отчета для всех регионов")
|
|||
|
|
|
|||
|
|
export_website_status_report(region, output)
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|