362 lines
13 KiB
Python
362 lines
13 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
🥷 ПРОДВИНУТЫЙ STEALTH ПАРСЕР
|
|||
|
|
Максимальный обход защит для судебных сайтов
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import asyncio
|
|||
|
|
from playwright.async_api import async_playwright
|
|||
|
|
import random
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
class AdvancedStealthParser:
|
|||
|
|
"""Парсер с максимальной маскировкой"""
|
|||
|
|
|
|||
|
|
# Реальные User-Agents
|
|||
|
|
USER_AGENTS = [
|
|||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
|||
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|||
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
async def parse_with_human_behavior(url: str):
|
|||
|
|
"""
|
|||
|
|
МЕТОД 1: Имитация человеческого поведения
|
|||
|
|
"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🧑 МЕТОД 1: ИМИТАЦИЯ ЧЕЛОВЕКА")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(
|
|||
|
|
headless=False, # НЕ headless - как настоящий браузер!
|
|||
|
|
args=[
|
|||
|
|
'--disable-blink-features=AutomationControlled',
|
|||
|
|
'--disable-dev-shm-usage',
|
|||
|
|
'--disable-web-security',
|
|||
|
|
]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent=random.choice(AdvancedStealthParser.USER_AGENTS),
|
|||
|
|
viewport={'width': 1920, 'height': 1080},
|
|||
|
|
locale='ru-RU',
|
|||
|
|
timezone_id='Europe/Moscow',
|
|||
|
|
geolocation={'latitude': 55.7558, 'longitude': 37.6173}, # Москва
|
|||
|
|
permissions=['geolocation']
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
# Скрываем автоматизацию
|
|||
|
|
await page.add_init_script("""
|
|||
|
|
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
|
|||
|
|
Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3]});
|
|||
|
|
window.chrome = {runtime: {}};
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
print("🌐 Загружаем страницу...")
|
|||
|
|
|
|||
|
|
# Медленно загружаем
|
|||
|
|
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
|||
|
|
print(f"📊 Статус: {await page.title()}")
|
|||
|
|
|
|||
|
|
# ИМИТИРУЕМ ЧЕЛОВЕКА
|
|||
|
|
print("🖱️ Имитируем действия человека...")
|
|||
|
|
|
|||
|
|
# 1. Скроллим случайно
|
|||
|
|
await page.evaluate("window.scrollTo(0, 300)")
|
|||
|
|
await asyncio.sleep(random.uniform(1, 2))
|
|||
|
|
|
|||
|
|
await page.evaluate("window.scrollTo(0, 600)")
|
|||
|
|
await asyncio.sleep(random.uniform(1, 2))
|
|||
|
|
|
|||
|
|
# 2. Двигаем мышь
|
|||
|
|
await page.mouse.move(random.randint(100, 500), random.randint(100, 500))
|
|||
|
|
await asyncio.sleep(0.5)
|
|||
|
|
|
|||
|
|
# 3. Ждём дольше
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
# Получаем контент
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
|
|||
|
|
print(f"✅ Получено {len(text)} символов")
|
|||
|
|
print()
|
|||
|
|
print("ПРЕВЬЮ:")
|
|||
|
|
print("-"*80)
|
|||
|
|
print(text[:500])
|
|||
|
|
print("-"*80)
|
|||
|
|
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Ошибка: {e}")
|
|||
|
|
return None
|
|||
|
|
finally:
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
async def parse_with_firefox(url: str):
|
|||
|
|
"""
|
|||
|
|
МЕТОД 2: Firefox (часто менее детектируемый)
|
|||
|
|
"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🦊 МЕТОД 2: FIREFOX")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.firefox.launch(headless=False)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
|||
|
|
viewport={'width': 1920, 'height': 1080},
|
|||
|
|
locale='ru-RU'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
print("🌐 Загружаем через Firefox...")
|
|||
|
|
await page.goto(url, wait_until='networkidle', timeout=30000)
|
|||
|
|
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
print(f"✅ Получено {len(text)} символов")
|
|||
|
|
print()
|
|||
|
|
print("ПРЕВЬЮ:")
|
|||
|
|
print("-"*80)
|
|||
|
|
print(text[:500])
|
|||
|
|
print("-"*80)
|
|||
|
|
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Ошибка: {e}")
|
|||
|
|
return None
|
|||
|
|
finally:
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
async def parse_with_cookies(url: str):
|
|||
|
|
"""
|
|||
|
|
МЕТОД 3: С реальными cookies
|
|||
|
|
"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🍪 МЕТОД 3: РЕАЛЬНЫЕ COOKIES")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
print("💡 Для этого метода нужно:")
|
|||
|
|
print(" 1. Открыть сайт в обычном браузере")
|
|||
|
|
print(" 2. Экспортировать cookies")
|
|||
|
|
print(" 3. Передать их в парсер")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
# Пример структуры
|
|||
|
|
print("Пример кода:")
|
|||
|
|
print("-"*80)
|
|||
|
|
print("""
|
|||
|
|
cookies = [
|
|||
|
|
{
|
|||
|
|
'name': 'session',
|
|||
|
|
'value': 'abc123...',
|
|||
|
|
'domain': '.mos-sud.ru',
|
|||
|
|
'path': '/'
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
context = await browser.new_context()
|
|||
|
|
await context.add_cookies(cookies)
|
|||
|
|
""")
|
|||
|
|
print("-"*80)
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
async def parse_step_by_step(url: str):
|
|||
|
|
"""
|
|||
|
|
МЕТОД 4: Пошаговая загрузка (сначала главная, потом целевая)
|
|||
|
|
"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("🪜 МЕТОД 4: ПОШАГОВАЯ ЗАГРУЗКА")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(
|
|||
|
|
headless=False,
|
|||
|
|
args=['--disable-blink-features=AutomationControlled']
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent=random.choice(AdvancedStealthParser.USER_AGENTS),
|
|||
|
|
viewport={'width': 1920, 'height': 1080},
|
|||
|
|
locale='ru-RU'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Шаг 1: Главная страница
|
|||
|
|
print("📍 Шаг 1: Загружаем главную страницу...")
|
|||
|
|
await page.goto('https://mos-sud.ru/', wait_until='networkidle')
|
|||
|
|
await asyncio.sleep(3)
|
|||
|
|
print("✅ Главная загружена")
|
|||
|
|
|
|||
|
|
# Шаг 2: Переходим на нужную страницу
|
|||
|
|
print("📍 Шаг 2: Переходим на целевую страницу...")
|
|||
|
|
await page.goto(url, wait_until='networkidle', timeout=30000)
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
print(f"✅ Получено {len(text)} символов")
|
|||
|
|
print()
|
|||
|
|
print("ПРЕВЬЮ:")
|
|||
|
|
print("-"*80)
|
|||
|
|
print(text[:500])
|
|||
|
|
print("-"*80)
|
|||
|
|
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Ошибка: {e}")
|
|||
|
|
return None
|
|||
|
|
finally:
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
async def parse_with_delays(url: str):
|
|||
|
|
"""
|
|||
|
|
МЕТОД 5: Большие задержки между действиями
|
|||
|
|
"""
|
|||
|
|
print("═"*80)
|
|||
|
|
print("⏰ МЕТОД 5: МЕДЛЕННАЯ ЗАГРУЗКА")
|
|||
|
|
print("═"*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(
|
|||
|
|
headless=False,
|
|||
|
|
slow_mo=500 # Замедляем ВСЕ действия
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
context = await browser.new_context(
|
|||
|
|
user_agent=random.choice(AdvancedStealthParser.USER_AGENTS)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
print("🐌 Загружаем ОЧЕНЬ медленно (как человек)...")
|
|||
|
|
|
|||
|
|
await page.goto(url, wait_until='load', timeout=60000)
|
|||
|
|
print("⏳ Ждём 10 секунд...")
|
|||
|
|
await asyncio.sleep(10)
|
|||
|
|
|
|||
|
|
# Скроллим медленно
|
|||
|
|
for i in range(3):
|
|||
|
|
scroll_y = (i + 1) * 300
|
|||
|
|
await page.evaluate(f"window.scrollTo(0, {scroll_y})")
|
|||
|
|
await asyncio.sleep(2)
|
|||
|
|
|
|||
|
|
print("⏳ Ждём ещё 5 секунд...")
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
text = await page.inner_text('body')
|
|||
|
|
print(f"✅ Получено {len(text)} символов")
|
|||
|
|
print()
|
|||
|
|
print("ПРЕВЬЮ:")
|
|||
|
|
print("-"*80)
|
|||
|
|
print(text[:500])
|
|||
|
|
print("-"*80)
|
|||
|
|
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Ошибка: {e}")
|
|||
|
|
return None
|
|||
|
|
finally:
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def test_all_methods(url: str):
|
|||
|
|
"""Тестируем все методы по очереди"""
|
|||
|
|
|
|||
|
|
print("🥷"*40)
|
|||
|
|
print()
|
|||
|
|
print(" ПРОДВИНУТЫЕ МЕТОДЫ ОБХОДА ЗАЩИТЫ")
|
|||
|
|
print()
|
|||
|
|
print("🥷"*40)
|
|||
|
|
print()
|
|||
|
|
print(f"Цель: {url}")
|
|||
|
|
print()
|
|||
|
|
input("⏸️ Нажми Enter чтобы начать тестирование...")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
methods = [
|
|||
|
|
("Имитация человека", AdvancedStealthParser.parse_with_human_behavior),
|
|||
|
|
("Firefox", AdvancedStealthParser.parse_with_firefox),
|
|||
|
|
("Пошаговая загрузка", AdvancedStealthParser.parse_step_by_step),
|
|||
|
|
("Медленная загрузка", AdvancedStealthParser.parse_with_delays),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
results = {}
|
|||
|
|
|
|||
|
|
for name, method in methods:
|
|||
|
|
print()
|
|||
|
|
print("="*80)
|
|||
|
|
print(f"ТЕСТИРУЕМ: {name}")
|
|||
|
|
print("="*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
result = await method(url)
|
|||
|
|
|
|||
|
|
if result and len(result) > 100:
|
|||
|
|
results[name] = "✅ УСПЕХ"
|
|||
|
|
if "77MS0312" in result or "дело" in result.lower():
|
|||
|
|
results[name] = "🎯 УСПЕХ (нашли данные!)"
|
|||
|
|
else:
|
|||
|
|
results[name] = "❌ Не удалось"
|
|||
|
|
except Exception as e:
|
|||
|
|
results[name] = f"❌ Ошибка: {e}"
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
input("⏸️ Нажми Enter для следующего метода...")
|
|||
|
|
|
|||
|
|
# Итоги
|
|||
|
|
print()
|
|||
|
|
print("="*80)
|
|||
|
|
print("📊 ИТОГОВЫЕ РЕЗУЛЬТАТЫ")
|
|||
|
|
print("="*80)
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
for name, result in results.items():
|
|||
|
|
print(f"{name:30s} {result}")
|
|||
|
|
|
|||
|
|
print()
|
|||
|
|
print("="*80)
|
|||
|
|
print()
|
|||
|
|
print("💡 ДОПОЛНИТЕЛЬНЫЕ МЕТОДЫ:")
|
|||
|
|
print()
|
|||
|
|
print("🍪 Cookies: Экспортируй cookies из реального браузера")
|
|||
|
|
print("🌐 Прокси: Используй residential прокси")
|
|||
|
|
print("🔐 VPN: Подключись через российский VPN")
|
|||
|
|
print("📧 API: Запроси официальный доступ к API суда")
|
|||
|
|
print()
|
|||
|
|
print("="*80)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
url = "https://mos-sud.ru/312/cases/civil/details/7b8a110a-162d-4493-88b0-e505523c9935?uid=77MS0312-01-2025-002929-35&formType=fullForm"
|
|||
|
|
|
|||
|
|
asyncio.run(test_all_methods(url))
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|