🚀 Full project sync: Hotels RAG & Audit System
✨ Major Features: - Complete RAG system for hotel website analysis - Hybrid audit with BGE-M3 embeddings + Natasha NER - Universal horizontal Excel reports with dashboards - Multi-region processing (SPb, Orel, Chukotka, Kamchatka) 📊 Completed Regions: - Орловская область: 100% (36/36) - Чукотский АО: 100% (4/4) - г. Санкт-Петербург: 93% (893/960) - Камчатский край: 87% (89/102) 🔧 Infrastructure: - PostgreSQL with pgvector extension - BGE-M3 embeddings API - Browserless for web scraping - N8N workflows for automation - S3/Nextcloud file storage 📝 Documentation: - Complete DB schemas - API documentation - Setup guides - Status reports
This commit is contained in:
240
test_mos_sud_auto.py
Executable file
240
test_mos_sud_auto.py
Executable file
@@ -0,0 +1,240 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Автоматическое тестирование всех методов обхода
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from playwright.async_api import async_playwright
|
||||
import random
|
||||
|
||||
URL = "https://mos-sud.ru/312/cases/civil/details/7b8a110a-162d-4493-88b0-e505523c9935?uid=77MS0312-01-2025-002929-35&formType=fullForm"
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
]
|
||||
|
||||
async def test_method_1_headless_false():
|
||||
"""МЕТОД 1: Headless=False (видимый браузер)"""
|
||||
print("═"*80)
|
||||
print("🧪 МЕТОД 1: ВИДИМЫЙ БРАУЗЕР (headless=False)")
|
||||
print("═"*80)
|
||||
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=False,
|
||||
args=['--disable-blink-features=AutomationControlled']
|
||||
)
|
||||
|
||||
context = await browser.new_context(
|
||||
user_agent=USER_AGENTS[0],
|
||||
viewport={'width': 1920, 'height': 1080},
|
||||
locale='ru-RU'
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
await page.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
|
||||
response = await page.goto(URL, wait_until='domcontentloaded', timeout=30000)
|
||||
await asyncio.sleep(5)
|
||||
|
||||
text = await page.inner_text('body')
|
||||
status = response.status
|
||||
|
||||
await browser.close()
|
||||
|
||||
print(f" Статус: {status}")
|
||||
print(f" Текст: {len(text)} символов")
|
||||
print(f" Превью: {text[:100]}")
|
||||
|
||||
if status == 200 and len(text) > 100:
|
||||
print(" ✅ УСПЕХ!")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ Не сработало (статус {status})")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Ошибка: {e}")
|
||||
return False
|
||||
|
||||
async def test_method_2_firefox():
|
||||
"""МЕТОД 2: Firefox"""
|
||||
print("═"*80)
|
||||
print("🦊 МЕТОД 2: FIREFOX")
|
||||
print("═"*80)
|
||||
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.firefox.launch(headless=False)
|
||||
|
||||
context = await browser.new_context(
|
||||
user_agent=USER_AGENTS[1],
|
||||
locale='ru-RU'
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
response = await page.goto(URL, wait_until='networkidle', timeout=30000)
|
||||
await asyncio.sleep(5)
|
||||
|
||||
text = await page.inner_text('body')
|
||||
status = response.status
|
||||
|
||||
await browser.close()
|
||||
|
||||
print(f" Статус: {status}")
|
||||
print(f" Текст: {len(text)} символов")
|
||||
print(f" Превью: {text[:100]}")
|
||||
|
||||
if status == 200 and len(text) > 100:
|
||||
print(" ✅ УСПЕХ!")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ Не сработало (статус {status})")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Ошибка: {e}")
|
||||
return False
|
||||
|
||||
async def test_method_3_slow_mo():
|
||||
"""МЕТОД 3: Медленное выполнение"""
|
||||
print("═"*80)
|
||||
print("🐌 МЕТОД 3: МЕДЛЕННОЕ ВЫПОЛНЕНИЕ (slow_mo)")
|
||||
print("═"*80)
|
||||
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=False,
|
||||
slow_mo=1000
|
||||
)
|
||||
|
||||
context = await browser.new_context(user_agent=USER_AGENTS[0])
|
||||
page = await context.new_page()
|
||||
|
||||
response = await page.goto(URL, wait_until='load', timeout=60000)
|
||||
await asyncio.sleep(10)
|
||||
|
||||
text = await page.inner_text('body')
|
||||
status = response.status
|
||||
|
||||
await browser.close()
|
||||
|
||||
print(f" Статус: {status}")
|
||||
print(f" Текст: {len(text)} символов")
|
||||
print(f" Превью: {text[:100]}")
|
||||
|
||||
if status == 200 and len(text) > 100:
|
||||
print(" ✅ УСПЕХ!")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ Не сработало (статус {status})")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Ошибка: {e}")
|
||||
return False
|
||||
|
||||
async def test_method_4_step_by_step():
|
||||
"""МЕТОД 4: Пошаговая загрузка"""
|
||||
print("═"*80)
|
||||
print("🪜 МЕТОД 4: ПОШАГОВАЯ ЗАГРУЗКА")
|
||||
print("═"*80)
|
||||
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=False)
|
||||
context = await browser.new_context(user_agent=USER_AGENTS[0])
|
||||
page = await context.new_page()
|
||||
|
||||
# Шаг 1: Главная
|
||||
print(" 📍 Загружаем главную...")
|
||||
await page.goto('https://mos-sud.ru/', wait_until='networkidle')
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# Шаг 2: Целевая страница
|
||||
print(" 📍 Переходим на целевую...")
|
||||
response = await page.goto(URL, wait_until='networkidle', timeout=30000)
|
||||
await asyncio.sleep(5)
|
||||
|
||||
text = await page.inner_text('body')
|
||||
status = response.status
|
||||
|
||||
await browser.close()
|
||||
|
||||
print(f" Статус: {status}")
|
||||
print(f" Текст: {len(text)} символов")
|
||||
print(f" Превью: {text[:100]}")
|
||||
|
||||
if status == 200 and len(text) > 100:
|
||||
print(" ✅ УСПЕХ!")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ Не сработало (статус {status})")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Ошибка: {e}")
|
||||
return False
|
||||
|
||||
async def main():
|
||||
print("🥷"*40)
|
||||
print()
|
||||
print(" АВТОМАТИЧЕСКОЕ ТЕСТИРОВАНИЕ ОБХОДА ЗАЩИТЫ")
|
||||
print()
|
||||
print("🥷"*40)
|
||||
print()
|
||||
print(f"Цель: mos-sud.ru")
|
||||
print()
|
||||
|
||||
methods = [
|
||||
("Видимый браузер", test_method_1_headless_false),
|
||||
("Firefox", test_method_2_firefox),
|
||||
("Медленное выполнение", test_method_3_slow_mo),
|
||||
("Пошаговая загрузка", test_method_4_step_by_step),
|
||||
]
|
||||
|
||||
results = {}
|
||||
|
||||
for name, method in methods:
|
||||
print()
|
||||
result = await method()
|
||||
results[name] = result
|
||||
print()
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Итоги
|
||||
print("═"*80)
|
||||
print("📊 ИТОГОВЫЕ РЕЗУЛЬТАТЫ")
|
||||
print("═"*80)
|
||||
print()
|
||||
|
||||
for name, success in results.items():
|
||||
status = "✅ РАБОТАЕТ" if success else "❌ НЕ РАБОТАЕТ"
|
||||
print(f" {name:30s} {status}")
|
||||
|
||||
print()
|
||||
print("═"*80)
|
||||
|
||||
if not any(results.values()):
|
||||
print()
|
||||
print("💡 ВСЕ МЕТОДЫ НЕ СРАБОТАЛИ")
|
||||
print()
|
||||
print("Судебный сайт имеет ОЧЕНЬ сильную защиту.")
|
||||
print()
|
||||
print("Для обхода нужны:")
|
||||
print(" 1. 🌐 Residential прокси (домашние IP)")
|
||||
print(" 2. 🔐 VPN из России")
|
||||
print(" 3. 📧 Официальный API доступ")
|
||||
print(" 4. 🍪 Реальные cookies из браузера")
|
||||
print()
|
||||
print("═"*80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user