113 lines
3.2 KiB
Python
113 lines
3.2 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Тестовый клиент для Universal Parser API
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import requests
|
|||
|
|
import json
|
|||
|
|
|
|||
|
|
# Конфигурация
|
|||
|
|
API_URL = "http://localhost:8003"
|
|||
|
|
API_KEY = "parser_2025_secret_key_a8f3d9c1b4e7"
|
|||
|
|
|
|||
|
|
def test_parse(url: str, extract_links: bool = False):
|
|||
|
|
"""Тест парсинга страницы"""
|
|||
|
|
|
|||
|
|
print("═"*80)
|
|||
|
|
print(f"🔍 ТЕСТИРУЕМ ПАРСИНГ: {url}")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
headers = {
|
|||
|
|
"X-API-Key": API_KEY,
|
|||
|
|
"Content-Type": "application/json"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
payload = {
|
|||
|
|
"url": url,
|
|||
|
|
"wait_seconds": 5,
|
|||
|
|
"extract_links": extract_links,
|
|||
|
|
"screenshot": False,
|
|||
|
|
"javascript_enabled": True
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
print("📤 Отправляем запрос...")
|
|||
|
|
response = requests.post(
|
|||
|
|
f"{API_URL}/parse",
|
|||
|
|
headers=headers,
|
|||
|
|
json=payload,
|
|||
|
|
timeout=60
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if response.status_code == 200:
|
|||
|
|
data = response.json()
|
|||
|
|
|
|||
|
|
print(f"✅ Успех!")
|
|||
|
|
print()
|
|||
|
|
print(f"📊 РЕЗУЛЬТАТЫ:")
|
|||
|
|
print(f" Status Code: {data['status_code']}")
|
|||
|
|
print(f" Title: {data['title']}")
|
|||
|
|
print(f" Текст: {data['text_length']:,} символов")
|
|||
|
|
print(f" Время: {data['parsing_time']}с")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
if data['success']:
|
|||
|
|
print("📄 ПРЕВЬЮ КОНТЕНТА:")
|
|||
|
|
print("-" * 80)
|
|||
|
|
print(data['text'][:1000])
|
|||
|
|
print("-" * 80)
|
|||
|
|
|
|||
|
|
if extract_links and data.get('links'):
|
|||
|
|
print()
|
|||
|
|
print(f"🔗 Найдено ссылок: {len(data['links'])}")
|
|||
|
|
for i, link in enumerate(data['links'][:10], 1):
|
|||
|
|
print(f" {i}. {link}")
|
|||
|
|
if len(data['links']) > 10:
|
|||
|
|
print(f" ... и ещё {len(data['links']) - 10}")
|
|||
|
|
else:
|
|||
|
|
print(f"❌ Ошибка: {data.get('error')}")
|
|||
|
|
|
|||
|
|
else:
|
|||
|
|
print(f"❌ HTTP {response.status_code}")
|
|||
|
|
print(response.text)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Ошибка: {e}")
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
print("═"*80)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_health():
|
|||
|
|
"""Тест health check"""
|
|||
|
|
print("🏥 Проверка здоровья API...")
|
|||
|
|
response = requests.get(f"{API_URL}/health")
|
|||
|
|
|
|||
|
|
if response.status_code == 200:
|
|||
|
|
data = response.json()
|
|||
|
|
print(f"✅ API работает: {data['status']}")
|
|||
|
|
print(f" Версия: {data['version']}")
|
|||
|
|
else:
|
|||
|
|
print(f"❌ API недоступен")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# Тест 1: Health check
|
|||
|
|
test_health()
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# Тест 2: Судебный сайт (с защитой)
|
|||
|
|
test_parse(
|
|||
|
|
"https://mos-sud.ru/312/cases/civil/details/7b8a110a-162d-4493-88b0-e505523c9935?uid=77MS0312-01-2025-002929-35&formType=fullForm",
|
|||
|
|
extract_links=False
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Тест 3: Обычный сайт
|
|||
|
|
print()
|
|||
|
|
test_parse("https://example.com", extract_links=True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|