2025-10-24 20:27:10 +03:00
|
|
|
|
"""
|
2025-10-24 21:24:00 +03:00
|
|
|
|
Upload API Routes - Загрузка файлов с OCR и S3
|
2025-10-24 20:27:10 +03:00
|
|
|
|
"""
|
|
|
|
|
|
from fastapi import APIRouter, UploadFile, File, HTTPException
|
|
|
|
|
|
from typing import List
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
import os
|
|
|
|
|
|
from ..config import settings
|
2025-10-24 21:24:00 +03:00
|
|
|
|
from ..services.s3_service import s3_service
|
2025-10-24 21:58:34 +03:00
|
|
|
|
from ..services.ocr_service import ocr_service
|
|
|
|
|
|
from ..services.redis_service import redis_service
|
|
|
|
|
|
from ..services.rabbitmq_service import rabbitmq_service
|
2025-10-24 20:27:10 +03:00
|
|
|
|
import logging
|
2025-10-24 21:58:34 +03:00
|
|
|
|
import json
|
2025-10-24 20:27:10 +03:00
|
|
|
|
|
|
|
|
|
|
router = APIRouter(prefix="/api/v1/upload", tags=["Upload"])
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
UPLOAD_DIR = "/tmp/erv_uploads"
|
|
|
|
|
|
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/policy")
|
|
|
|
|
|
async def upload_policy(file: UploadFile = File(...)):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Загрузить скан полиса + OCR обработка
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
- file_id: ID загруженного файла
|
|
|
|
|
|
- ocr_text: распознанный текст
|
|
|
|
|
|
- extracted_data: извлеченные данные (номер полиса, серия, даты)
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Генерируем уникальный ID
|
|
|
|
|
|
file_id = str(uuid.uuid4())
|
|
|
|
|
|
file_ext = file.filename.split('.')[-1] if '.' in file.filename else 'jpg'
|
|
|
|
|
|
file_path = f"{UPLOAD_DIR}/{file_id}.{file_ext}"
|
|
|
|
|
|
|
|
|
|
|
|
# Сохраняем файл
|
|
|
|
|
|
with open(file_path, "wb") as f:
|
|
|
|
|
|
content = await file.read()
|
|
|
|
|
|
f.write(content)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"📄 File saved: {file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
# Отправляем на OCR
|
|
|
|
|
|
try:
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
|
|
|
|
with open(file_path, "rb") as f:
|
|
|
|
|
|
files = {"file": (file.filename, f, file.content_type)}
|
|
|
|
|
|
response = await client.post(
|
|
|
|
|
|
f"{settings.ocr_api_url}/process",
|
|
|
|
|
|
files=files
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
|
ocr_result = response.json()
|
|
|
|
|
|
logger.info(f"✅ OCR completed for policy")
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: Извлечь номер полиса, серию, даты из OCR текста
|
|
|
|
|
|
# Используем regex или AI для парсинга
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
|
"ocr_text": ocr_result.get("text", ""),
|
|
|
|
|
|
"extracted_data": {
|
|
|
|
|
|
"policy_number": None, # TODO: парсинг
|
|
|
|
|
|
"policy_series": None,
|
|
|
|
|
|
"start_date": None,
|
|
|
|
|
|
"end_date": None
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.error(f"OCR error: {response.status_code}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail="OCR service error")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as ocr_error:
|
|
|
|
|
|
logger.error(f"OCR processing error: {ocr_error}")
|
|
|
|
|
|
# Возвращаем без OCR
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
|
"ocr_text": "",
|
|
|
|
|
|
"extracted_data": {},
|
|
|
|
|
|
"message": "Файл загружен, но OCR не удалось выполнить"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"File upload error: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/passport")
|
|
|
|
|
|
async def upload_passport(file: UploadFile = File(...)):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Загрузить скан паспорта + OCR для ФИО
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
- file_id: ID загруженного файла
|
|
|
|
|
|
- ocr_text: распознанный текст
|
|
|
|
|
|
- extracted_data: ФИО, дата рождения, серия/номер
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_id = str(uuid.uuid4())
|
|
|
|
|
|
file_ext = file.filename.split('.')[-1] if '.' in file.filename else 'jpg'
|
|
|
|
|
|
file_path = f"{UPLOAD_DIR}/{file_id}.{file_ext}"
|
|
|
|
|
|
|
|
|
|
|
|
with open(file_path, "wb") as f:
|
|
|
|
|
|
content = await file.read()
|
|
|
|
|
|
f.write(content)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"📄 Passport saved: {file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
# OCR обработка
|
|
|
|
|
|
try:
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
|
|
|
|
with open(file_path, "rb") as f:
|
|
|
|
|
|
files = {"file": (file.filename, f, file.content_type)}
|
|
|
|
|
|
response = await client.post(
|
|
|
|
|
|
f"{settings.ocr_api_url}/process",
|
|
|
|
|
|
files=files
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
|
ocr_result = response.json()
|
|
|
|
|
|
logger.info(f"✅ OCR completed for passport")
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: Извлечь ФИО через regex или AI
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
|
"ocr_text": ocr_result.get("text", ""),
|
|
|
|
|
|
"extracted_data": {
|
|
|
|
|
|
"full_name": None, # TODO: парсинг
|
|
|
|
|
|
"birth_date": None,
|
|
|
|
|
|
"passport_series": None,
|
|
|
|
|
|
"passport_number": None
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise HTTPException(status_code=500, detail="OCR service error")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as ocr_error:
|
|
|
|
|
|
logger.error(f"OCR error: {ocr_error}")
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
|
"ocr_text": "",
|
|
|
|
|
|
"extracted_data": {},
|
|
|
|
|
|
"message": "Файл загружен, но OCR не удалось"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Passport upload error: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
2025-10-24 21:24:00 +03:00
|
|
|
|
|
|
|
|
|
|
@router.post("/files")
|
|
|
|
|
|
async def upload_files(files: List[UploadFile] = File(...), folder: str = "claims"):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Универсальная загрузка файлов в S3
|
|
|
|
|
|
Поддерживает множественную загрузку
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
2025-10-24 21:34:50 +03:00
|
|
|
|
files: Список файлов для загрузки (макс 10 файлов по 15MB)
|
2025-10-24 21:24:00 +03:00
|
|
|
|
folder: Папка в S3 (claims, policies, documents и т.д.)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
List[dict]: Список загруженных файлов с URLs
|
|
|
|
|
|
"""
|
2025-10-24 21:34:50 +03:00
|
|
|
|
# Защита: лимит файлов
|
|
|
|
|
|
if len(files) > 10:
|
|
|
|
|
|
raise HTTPException(status_code=400, detail="Максимум 10 файлов за раз")
|
|
|
|
|
|
|
|
|
|
|
|
# Защита: санитизация folder
|
|
|
|
|
|
allowed_folders = ['claims', 'policies', 'documents', 'passports', 'tickets']
|
|
|
|
|
|
if folder not in allowed_folders:
|
|
|
|
|
|
folder = 'claims'
|
|
|
|
|
|
|
2025-10-24 21:24:00 +03:00
|
|
|
|
try:
|
|
|
|
|
|
uploaded_files = []
|
2025-10-24 21:34:50 +03:00
|
|
|
|
MAX_FILE_SIZE = 15 * 1024 * 1024 # 15MB
|
2025-10-24 21:24:00 +03:00
|
|
|
|
|
|
|
|
|
|
for file in files:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Читаем содержимое файла
|
|
|
|
|
|
content = await file.read()
|
|
|
|
|
|
|
2025-10-24 21:34:50 +03:00
|
|
|
|
# Защита: проверка размера файла
|
|
|
|
|
|
if len(content) > MAX_FILE_SIZE:
|
|
|
|
|
|
uploaded_files.append({
|
|
|
|
|
|
"success": False,
|
|
|
|
|
|
"filename": file.filename,
|
|
|
|
|
|
"error": f"Файл больше 15MB ({len(content) / 1024 / 1024:.1f}MB)"
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Защита: валидация типа файла
|
|
|
|
|
|
allowed_types = ['image/', 'application/pdf']
|
|
|
|
|
|
if file.content_type and not any(file.content_type.startswith(t) for t in allowed_types):
|
|
|
|
|
|
uploaded_files.append({
|
|
|
|
|
|
"success": False,
|
|
|
|
|
|
"filename": file.filename,
|
|
|
|
|
|
"error": f"Недопустимый тип файла: {file.content_type}"
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2025-10-24 21:24:00 +03:00
|
|
|
|
# Загружаем в S3
|
|
|
|
|
|
file_url = await s3_service.upload_file(
|
|
|
|
|
|
file_content=content,
|
|
|
|
|
|
filename=file.filename,
|
|
|
|
|
|
content_type=file.content_type or 'application/octet-stream',
|
|
|
|
|
|
folder=folder
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if file_url:
|
2025-10-24 21:58:34 +03:00
|
|
|
|
file_id = str(uuid.uuid4())
|
2025-10-25 09:39:34 +03:00
|
|
|
|
ocr_result = None # Инициализация
|
2025-10-24 21:58:34 +03:00
|
|
|
|
|
|
|
|
|
|
# Запускаем OCR в фоне через RabbitMQ
|
|
|
|
|
|
ocr_task = {
|
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
|
"file_url": file_url,
|
|
|
|
|
|
"filename": file.filename,
|
|
|
|
|
|
"folder": folder,
|
|
|
|
|
|
"content_type": file.content_type
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Отправляем задачу в очередь OCR
|
|
|
|
|
|
await rabbitmq_service.publish(
|
|
|
|
|
|
queue_name="erv_ocr_processing",
|
|
|
|
|
|
message=json.dumps(ocr_task)
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.info(f"📤 OCR task queued: {file_id} - {file.filename}")
|
|
|
|
|
|
|
|
|
|
|
|
# Также сразу запускаем OCR для быстрого результата
|
|
|
|
|
|
# (параллельно с очередью для бэкапа)
|
|
|
|
|
|
ocr_result = await ocr_service.process_document(content, file.filename)
|
|
|
|
|
|
|
|
|
|
|
|
# Сохраняем результат в Redis на 1 час
|
|
|
|
|
|
await redis_service.set(
|
|
|
|
|
|
f"ocr_result:{file_id}",
|
|
|
|
|
|
json.dumps(ocr_result, ensure_ascii=False),
|
|
|
|
|
|
expire=3600
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"💾 OCR result cached in Redis: {file_id}")
|
|
|
|
|
|
logger.info(f"📊 Document type: {ocr_result.get('document_type')}")
|
|
|
|
|
|
logger.info(f"✅ Valid: {ocr_result.get('is_valid')}, Confidence: {ocr_result.get('confidence')}")
|
|
|
|
|
|
|
|
|
|
|
|
if ocr_result.get('document_type') == 'garbage':
|
|
|
|
|
|
logger.warning(f"🗑️ GARBAGE uploaded: {file.filename} (but user doesn't know)")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as queue_error:
|
|
|
|
|
|
logger.error(f"⚠️ Queue error (non-critical): {queue_error}")
|
|
|
|
|
|
|
2025-10-24 21:24:00 +03:00
|
|
|
|
uploaded_files.append({
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"filename": file.filename,
|
|
|
|
|
|
"url": file_url,
|
2025-10-24 21:58:34 +03:00
|
|
|
|
"file_id": file_id,
|
2025-10-24 21:24:00 +03:00
|
|
|
|
"size": len(content),
|
2025-10-24 21:58:34 +03:00
|
|
|
|
"content_type": file.content_type,
|
2025-10-25 09:39:34 +03:00
|
|
|
|
"ocr_result": ocr_result
|
2025-10-24 21:24:00 +03:00
|
|
|
|
})
|
|
|
|
|
|
else:
|
|
|
|
|
|
uploaded_files.append({
|
|
|
|
|
|
"success": False,
|
|
|
|
|
|
"filename": file.filename,
|
|
|
|
|
|
"error": "S3 upload failed"
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as file_error:
|
|
|
|
|
|
logger.error(f"Error uploading {file.filename}: {file_error}")
|
|
|
|
|
|
uploaded_files.append({
|
|
|
|
|
|
"success": False,
|
|
|
|
|
|
"filename": file.filename,
|
|
|
|
|
|
"error": str(file_error)
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"uploaded_count": len([f for f in uploaded_files if f.get("success")]),
|
|
|
|
|
|
"total_count": len(files),
|
|
|
|
|
|
"files": uploaded_files
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Batch upload error: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
2025-10-24 21:58:34 +03:00
|
|
|
|
|
|
|
|
|
|
@router.get("/ocr-result/{file_id}")
|
|
|
|
|
|
async def get_ocr_result(file_id: str):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Получить результат OCR по file_id из Redis
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
file_id: UUID файла
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
OCR результат или None если еще не обработан
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Достаем из Redis
|
|
|
|
|
|
result_json = await redis_service.get(f"ocr_result:{file_id}")
|
|
|
|
|
|
|
|
|
|
|
|
if result_json:
|
|
|
|
|
|
result = json.loads(result_json)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"found": True,
|
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
|
"ocr_result": result
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
return {
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"found": False,
|
|
|
|
|
|
"message": "OCR результат еще не готов или не найден"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Error getting OCR result: {e}")
|
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|