303 lines
11 KiB
Python
303 lines
11 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Тестирование с headless=true и максимальной маскировкой
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import asyncio
|
|||
|
|
from playwright.async_api import async_playwright
|
|||
|
|
from playwright_stealth import Stealth
|
|||
|
|
import random
|
|||
|
|
|
|||
|
|
URL = "https://mos-sud.ru/312/cases/civil/details/7b8a110a-162d-4493-88b0-e505523c9935?uid=77MS0312-01-2025-002929-35&formType=fullForm"
|
|||
|
|
|
|||
|
|
async def test_method_1_stealth_advanced():
|
|||
|
|
"""МЕТОД 1: Максимальная маскировка + Stealth"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🥷 МЕТОД 1: МАКСИМАЛЬНАЯ МАСКИРОВКА + STEALTH")
|
|||
|
|
print("═"*80)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(
|
|||
|
|
headless=True,
|
|||
|
|
args=[
|
|||
|
|
'--disable-blink-features=AutomationControlled',
|
|||
|
|
'--disable-dev-shm-usage',
|
|||
|
|
'--no-sandbox',
|
|||
|
|
'--disable-setuid-sandbox',
|
|||
|
|
'--disable-web-security',
|
|||
|
|
'--disable-features=site-per-process',
|
|||
|
|
'--window-size=1920,1080',
|
|||
|
|
'--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|||
|
|
]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|||
|
|
viewport={'width': 1920, 'height': 1080},
|
|||
|
|
locale='ru-RU',
|
|||
|
|
timezone_id='Europe/Moscow',
|
|||
|
|
geolocation={'latitude': 55.7558, 'longitude': 37.6173},
|
|||
|
|
permissions=['geolocation'],
|
|||
|
|
extra_http_headers={
|
|||
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|||
|
|
'Accept-Language': 'ru-RU,ru;q=0.9',
|
|||
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|||
|
|
'DNT': '1',
|
|||
|
|
'Connection': 'keep-alive',
|
|||
|
|
'Upgrade-Insecure-Requests': '1'
|
|||
|
|
}
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
# Применяем Stealth
|
|||
|
|
stealth = Stealth()
|
|||
|
|
await stealth.apply_stealth_async(page)
|
|||
|
|
|
|||
|
|
# Дополнительные скрипты
|
|||
|
|
await page.add_init_script("""
|
|||
|
|
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
|
|||
|
|
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
|
|||
|
|
Object.defineProperty(navigator, 'languages', {get: () => ['ru-RU', 'ru']});
|
|||
|
|
window.chrome = {runtime: {}, loadTimes: function() {}, csi: function() {}};
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
print(" 🌐 Загружаем страницу...")
|
|||
|
|
response = await page.goto(URL, wait_until='domcontentloaded', timeout=30000)
|
|||
|
|
await asyncio.sleep(7)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
status = response.status
|
|||
|
|
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
print(f" 📊 Статус: {status}")
|
|||
|
|
print(f" 📝 Текст: {len(text)} символов")
|
|||
|
|
print(f" 📄 Превью: {text[:150]}")
|
|||
|
|
|
|||
|
|
if status == 200 and len(text) > 100:
|
|||
|
|
print(" ✅ УСПЕХ!")
|
|||
|
|
return True, text
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ Не сработало")
|
|||
|
|
return False, text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ Ошибка: {str(e)[:100]}")
|
|||
|
|
return False, None
|
|||
|
|
|
|||
|
|
async def test_method_2_firefox_headless():
|
|||
|
|
"""МЕТОД 2: Firefox headless"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🦊 МЕТОД 2: FIREFOX HEADLESS")
|
|||
|
|
print("═"*80)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.firefox.launch(headless=True)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|||
|
|
locale='ru-RU',
|
|||
|
|
timezone_id='Europe/Moscow'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
print(" 🌐 Загружаем через Firefox...")
|
|||
|
|
response = await page.goto(URL, wait_until='networkidle', timeout=30000)
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
status = response.status
|
|||
|
|
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
print(f" 📊 Статус: {status}")
|
|||
|
|
print(f" 📝 Текст: {len(text)} символов")
|
|||
|
|
print(f" 📄 Превью: {text[:150]}")
|
|||
|
|
|
|||
|
|
if status == 200 and len(text) > 100:
|
|||
|
|
print(" ✅ УСПЕХ!")
|
|||
|
|
return True, text
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ Не сработало")
|
|||
|
|
return False, text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ Ошибка: {str(e)[:100]}")
|
|||
|
|
return False, None
|
|||
|
|
|
|||
|
|
async def test_method_3_two_step():
|
|||
|
|
"""МЕТОД 3: Двухшаговая загрузка"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🪜 МЕТОД 3: ДВУХШАГОВАЯ ЗАГРУЗКА")
|
|||
|
|
print("═"*80)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(
|
|||
|
|
headless=True,
|
|||
|
|
args=['--disable-blink-features=AutomationControlled']
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
print(" 📍 Шаг 1: Главная страница...")
|
|||
|
|
await page.goto('https://mos-sud.ru/', wait_until='networkidle', timeout=30000)
|
|||
|
|
await asyncio.sleep(3)
|
|||
|
|
|
|||
|
|
print(" 📍 Шаг 2: Целевая страница...")
|
|||
|
|
response = await page.goto(URL, wait_until='networkidle', timeout=30000)
|
|||
|
|
await asyncio.sleep(7)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
status = response.status
|
|||
|
|
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
print(f" 📊 Статус: {status}")
|
|||
|
|
print(f" 📝 Текст: {len(text)} символов")
|
|||
|
|
print(f" 📄 Превью: {text[:150]}")
|
|||
|
|
|
|||
|
|
if status == 200 and len(text) > 100:
|
|||
|
|
print(" ✅ УСПЕХ!")
|
|||
|
|
return True, text
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ Не сработало")
|
|||
|
|
return False, text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ Ошибка: {str(e)[:100]}")
|
|||
|
|
return False, None
|
|||
|
|
|
|||
|
|
async def test_method_4_webkit():
|
|||
|
|
"""МЕТОД 4: WebKit (Safari engine)"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🌐 МЕТОД 4: WEBKIT (Safari)")
|
|||
|
|
print("═"*80)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.webkit.launch(headless=True)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15',
|
|||
|
|
locale='ru-RU'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
print(" 🌐 Загружаем через WebKit...")
|
|||
|
|
response = await page.goto(URL, wait_until='domcontentloaded', timeout=30000)
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
status = response.status
|
|||
|
|
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
print(f" 📊 Статус: {status}")
|
|||
|
|
print(f" 📝 Текст: {len(text)} символов")
|
|||
|
|
print(f" 📄 Превью: {text[:150]}")
|
|||
|
|
|
|||
|
|
if status == 200 and len(text) > 100:
|
|||
|
|
print(" ✅ УСПЕХ!")
|
|||
|
|
return True, text
|
|||
|
|
else:
|
|||
|
|
print(f" ❌ Не сработало")
|
|||
|
|
return False, text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ Ошибка: {str(e)[:100]}")
|
|||
|
|
return False, None
|
|||
|
|
|
|||
|
|
async def main():
|
|||
|
|
print("🥷"*40)
|
|||
|
|
print()
|
|||
|
|
print(" ТЕСТИРОВАНИЕ ОБХОДА ЗАЩИТЫ (HEADLESS MODE)")
|
|||
|
|
print()
|
|||
|
|
print("🥷"*40)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
methods = [
|
|||
|
|
("Stealth + Маскировка", test_method_1_stealth_advanced),
|
|||
|
|
("Firefox", test_method_2_firefox_headless),
|
|||
|
|
("Двухшаговая загрузка", test_method_3_two_step),
|
|||
|
|
("WebKit (Safari)", test_method_4_webkit),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
results = {}
|
|||
|
|
|
|||
|
|
for name, method in methods:
|
|||
|
|
print()
|
|||
|
|
success, text = await method()
|
|||
|
|
results[name] = {
|
|||
|
|
'success': success,
|
|||
|
|
'text': text
|
|||
|
|
}
|
|||
|
|
print()
|
|||
|
|
await asyncio.sleep(2)
|
|||
|
|
|
|||
|
|
# Итоги
|
|||
|
|
print("═"*80)
|
|||
|
|
print("📊 ИТОГОВЫЕ РЕЗУЛЬТАТЫ")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
for name, result in results.items():
|
|||
|
|
status = "✅ РАБОТАЕТ" if result['success'] else "❌ НЕ РАБОТАЕТ"
|
|||
|
|
print(f" {name:30s} {status}")
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# Если хоть один метод сработал
|
|||
|
|
if any(r['success'] for r in results.values()):
|
|||
|
|
print("🎉 НАЙДЕН РАБОЧИЙ МЕТОД!")
|
|||
|
|
for name, result in results.items():
|
|||
|
|
if result['success']:
|
|||
|
|
print(f"\n✅ {name} - УСПЕШНО!")
|
|||
|
|
print(f"\nКОНТЕНТ:\n{'-'*80}")
|
|||
|
|
print(result['text'][:1000])
|
|||
|
|
print('-'*80)
|
|||
|
|
else:
|
|||
|
|
print("💡 ВСЕ МЕТОДЫ ВЕРНУЛИ 403")
|
|||
|
|
print()
|
|||
|
|
print("Сайт mos-sud.ru имеет ОЧЕНЬ сильную защиту WAF.")
|
|||
|
|
print()
|
|||
|
|
print("🔐 ОСТАВШИЕСЯ ВАРИАНТЫ:")
|
|||
|
|
print()
|
|||
|
|
print(" 1. 🌐 Residential прокси ($50-200/мес)")
|
|||
|
|
print(" - Выглядят как домашние пользователи")
|
|||
|
|
print(" - Обходят 99% защит")
|
|||
|
|
print()
|
|||
|
|
print(" 2. 🔐 VPN через российский сервер")
|
|||
|
|
print(" - Меняет IP на российский")
|
|||
|
|
print(" - Может помочь с геоблокировкой")
|
|||
|
|
print()
|
|||
|
|
print(" 3. 🍪 Экспорт cookies из реального браузера")
|
|||
|
|
print(" - Открыть сайт вручную")
|
|||
|
|
print(" - Экспортировать cookies")
|
|||
|
|
print(" - Использовать в парсере")
|
|||
|
|
print()
|
|||
|
|
print(" 4. 📧 Официальный API доступ")
|
|||
|
|
print(" - Запросить у суда API ключ")
|
|||
|
|
print(" - Для исследовательских целей")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
print("═"*80)
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
asyncio.run(main())
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|