130 lines
4.4 KiB
Python
130 lines
4.4 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Оценка времени работы краулера
|
|||
|
|
"""
|
|||
|
|
from urllib.parse import unquote
|
|||
|
|
import psycopg2
|
|||
|
|
from datetime import datetime, timedelta
|
|||
|
|
|
|||
|
|
conn = psycopg2.connect(
|
|||
|
|
host='147.45.189.234',
|
|||
|
|
database='default_db',
|
|||
|
|
user='gen_user',
|
|||
|
|
password=unquote('2~~9_%5EkVsU%3F2%5CS')
|
|||
|
|
)
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
|
|||
|
|
# Общая статистика
|
|||
|
|
cur.execute('SELECT COUNT(*) FROM hotel_main WHERE website_address IS NOT NULL')
|
|||
|
|
total_hotels_with_sites = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
cur.execute('SELECT COUNT(DISTINCT hotel_id) FROM hotel_website_processed')
|
|||
|
|
processed_hotels = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
# Осталось обработать
|
|||
|
|
remaining = total_hotels_with_sites - processed_hotels
|
|||
|
|
|
|||
|
|
# Статистика за последние 24 часа
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT COUNT(DISTINCT hotel_id)
|
|||
|
|
FROM hotel_website_processed
|
|||
|
|
WHERE processed_at > NOW() - INTERVAL '24 hours'
|
|||
|
|
""")
|
|||
|
|
hotels_per_day = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
# Статистика за последний час
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT COUNT(DISTINCT hotel_id)
|
|||
|
|
FROM hotel_website_processed
|
|||
|
|
WHERE processed_at > NOW() - INTERVAL '1 hour'
|
|||
|
|
""")
|
|||
|
|
hotels_per_hour = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
# Время первого и последнего краулинга
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT MIN(processed_at), MAX(processed_at)
|
|||
|
|
FROM hotel_website_processed
|
|||
|
|
""")
|
|||
|
|
first_date, last_date = cur.fetchone()
|
|||
|
|
|
|||
|
|
# Расчёт скорости
|
|||
|
|
if first_date and last_date:
|
|||
|
|
elapsed_time = (last_date - first_date).total_seconds() / 3600 # в часах
|
|||
|
|
if elapsed_time > 0:
|
|||
|
|
avg_hotels_per_hour = processed_hotels / elapsed_time
|
|||
|
|
else:
|
|||
|
|
avg_hotels_per_hour = hotels_per_hour
|
|||
|
|
else:
|
|||
|
|
avg_hotels_per_hour = hotels_per_hour
|
|||
|
|
|
|||
|
|
# Оценки времени
|
|||
|
|
if hotels_per_hour > 0:
|
|||
|
|
hours_left_current = remaining / hotels_per_hour
|
|||
|
|
days_left_current = hours_left_current / 24
|
|||
|
|
|
|||
|
|
hours_left_avg = remaining / avg_hotels_per_hour if avg_hotels_per_hour > 0 else 0
|
|||
|
|
days_left_avg = hours_left_avg / 24
|
|||
|
|
|
|||
|
|
eta_current = datetime.now() + timedelta(hours=hours_left_current)
|
|||
|
|
eta_avg = datetime.now() + timedelta(hours=hours_left_avg)
|
|||
|
|
else:
|
|||
|
|
hours_left_current = 0
|
|||
|
|
days_left_current = 0
|
|||
|
|
hours_left_avg = 0
|
|||
|
|
days_left_avg = 0
|
|||
|
|
eta_current = None
|
|||
|
|
eta_avg = None
|
|||
|
|
|
|||
|
|
print("📊 ОЦЕНКА ВРЕМЕНИ РАБОТЫ КРАУЛЕРА")
|
|||
|
|
print("=" * 60)
|
|||
|
|
print(f"\n🏨 ВСЕГО:")
|
|||
|
|
print(f" Отелей с сайтами: {total_hotels_with_sites:,}")
|
|||
|
|
print(f" Обработано: {processed_hotels:,} ({processed_hotels/total_hotels_with_sites*100:.1f}%)")
|
|||
|
|
print(f" Осталось: {remaining:,}")
|
|||
|
|
|
|||
|
|
print(f"\n⚡ СКОРОСТЬ:")
|
|||
|
|
print(f" За последний час: {hotels_per_hour} отелей/час")
|
|||
|
|
print(f" За последние 24ч: {hotels_per_day} отелей/день")
|
|||
|
|
if avg_hotels_per_hour > 0:
|
|||
|
|
print(f" Средняя с начала: {avg_hotels_per_hour:.1f} отелей/час")
|
|||
|
|
|
|||
|
|
if first_date:
|
|||
|
|
print(f"\n📅 ПЕРИОД:")
|
|||
|
|
print(f" Начало: {first_date.strftime('%Y-%m-%d %H:%M')}")
|
|||
|
|
print(f" Сейчас: {last_date.strftime('%Y-%m-%d %H:%M')}")
|
|||
|
|
print(f" Прошло: {elapsed_time:.1f} часов ({elapsed_time/24:.1f} дней)")
|
|||
|
|
|
|||
|
|
print(f"\n⏱️ ОЦЕНКА ВРЕМЕНИ (по текущей скорости {hotels_per_hour} отелей/час):")
|
|||
|
|
if eta_current:
|
|||
|
|
print(f" Осталось времени: {hours_left_current:.1f} часов ({days_left_current:.1f} дней)")
|
|||
|
|
print(f" Завершение: {eta_current.strftime('%Y-%m-%d %H:%M')}")
|
|||
|
|
else:
|
|||
|
|
print(" Недостаточно данных")
|
|||
|
|
|
|||
|
|
if avg_hotels_per_hour > 0:
|
|||
|
|
print(f"\n⏱️ ОЦЕНКА ВРЕМЕНИ (по средней скорости {avg_hotels_per_hour:.1f} отелей/час):")
|
|||
|
|
print(f" Осталось времени: {hours_left_avg:.1f} часов ({days_left_avg:.1f} дней)")
|
|||
|
|
if eta_avg:
|
|||
|
|
print(f" Завершение: {eta_avg.strftime('%Y-%m-%d %H:%M')}")
|
|||
|
|
|
|||
|
|
# Средние страниц на отель
|
|||
|
|
cur.execute("""
|
|||
|
|
SELECT AVG(page_count)::numeric(10,1)
|
|||
|
|
FROM (
|
|||
|
|
SELECT hotel_id, COUNT(*) as page_count
|
|||
|
|
FROM hotel_website_processed
|
|||
|
|
GROUP BY hotel_id
|
|||
|
|
) sub
|
|||
|
|
""")
|
|||
|
|
avg_pages = cur.fetchone()[0]
|
|||
|
|
|
|||
|
|
print(f"\n📄 СТАТИСТИКА СТРАНИЦ:")
|
|||
|
|
print(f" Среднее страниц на отель: {avg_pages}")
|
|||
|
|
|
|||
|
|
cur.close()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|