162 lines
5.1 KiB
Python
162 lines
5.1 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
FastAPI сервис для генерации эмбеддингов
|
|||
|
|
Замена Ollama для n8n workflow
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from fastapi import FastAPI, HTTPException
|
|||
|
|
from pydantic import BaseModel
|
|||
|
|
from typing import List, Union
|
|||
|
|
import uvicorn
|
|||
|
|
from sentence_transformers import SentenceTransformer
|
|||
|
|
import logging
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
# Настройка логирования
|
|||
|
|
logging.basicConfig(level=logging.INFO)
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
app = FastAPI(
|
|||
|
|
title="Embedding Service",
|
|||
|
|
description="Сервис для генерации эмбеддингов через Sentence Transformers",
|
|||
|
|
version="1.0.0"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Глобальная модель (загружается один раз при старте)
|
|||
|
|
model = None
|
|||
|
|
|
|||
|
|
class EmbeddingRequest(BaseModel):
|
|||
|
|
"""Запрос на генерацию эмбеддинга"""
|
|||
|
|
text: Union[str, List[str]]
|
|||
|
|
batch_size: int = 32
|
|||
|
|
normalize: bool = True
|
|||
|
|
|
|||
|
|
class EmbeddingResponse(BaseModel):
|
|||
|
|
"""Ответ с эмбеддингами"""
|
|||
|
|
embeddings: List[List[float]]
|
|||
|
|
model_name: str
|
|||
|
|
processing_time: float
|
|||
|
|
text_count: int
|
|||
|
|
|
|||
|
|
@app.on_event("startup")
|
|||
|
|
async def load_model():
|
|||
|
|
"""Загружаем модель при старте сервиса"""
|
|||
|
|
global model
|
|||
|
|
logger.info("🔄 Загружаем модель BGE-M3...")
|
|||
|
|
start_time = time.time()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
model = SentenceTransformer('BAAI/bge-m3')
|
|||
|
|
load_time = time.time() - start_time
|
|||
|
|
logger.info(f"✅ Модель загружена за {load_time:.2f} сек")
|
|||
|
|
logger.info(f"📊 Размерность: {model.get_sentence_embedding_dimension()}")
|
|||
|
|
logger.info(f"📏 Max sequence: {model.max_seq_length}")
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"❌ Ошибка загрузки модели: {e}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
@app.get("/")
|
|||
|
|
async def root():
|
|||
|
|
"""Проверка работы сервиса"""
|
|||
|
|
return {
|
|||
|
|
"status": "running",
|
|||
|
|
"model": "BAAI/bge-m3",
|
|||
|
|
"dimension": model.get_sentence_embedding_dimension() if model else "loading...",
|
|||
|
|
"max_sequence": model.max_seq_length if model else "loading..."
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@app.get("/health")
|
|||
|
|
async def health_check():
|
|||
|
|
"""Health check для n8n"""
|
|||
|
|
if model is None:
|
|||
|
|
raise HTTPException(status_code=503, detail="Model not loaded")
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"status": "healthy",
|
|||
|
|
"model_loaded": True,
|
|||
|
|
"model_name": "BAAI/bge-m3",
|
|||
|
|
"dimension": model.get_sentence_embedding_dimension(),
|
|||
|
|
"max_sequence": model.max_seq_length
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@app.post("/embed", response_model=EmbeddingResponse)
|
|||
|
|
async def generate_embeddings(request: EmbeddingRequest):
|
|||
|
|
"""
|
|||
|
|
Генерируем эмбеддинги для текста
|
|||
|
|
|
|||
|
|
Поддерживает:
|
|||
|
|
- Одиночный текст: {"text": "Привет мир"}
|
|||
|
|
- Массив текстов: {"text": ["Текст 1", "Текст 2"]}
|
|||
|
|
- Батчинг для больших массивов
|
|||
|
|
"""
|
|||
|
|
if model is None:
|
|||
|
|
raise HTTPException(status_code=503, detail="Model not loaded")
|
|||
|
|
|
|||
|
|
start_time = time.time()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Подготавливаем текст
|
|||
|
|
if isinstance(request.text, str):
|
|||
|
|
texts = [request.text]
|
|||
|
|
else:
|
|||
|
|
texts = request.text
|
|||
|
|
|
|||
|
|
logger.info(f"🔄 Обрабатываем {len(texts)} текстов...")
|
|||
|
|
|
|||
|
|
# Генерируем эмбеддинги с батчингом
|
|||
|
|
embeddings = model.encode(
|
|||
|
|
texts,
|
|||
|
|
batch_size=request.batch_size,
|
|||
|
|
normalize_embeddings=request.normalize,
|
|||
|
|
show_progress_bar=True
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
processing_time = time.time() - start_time
|
|||
|
|
|
|||
|
|
# Конвертируем numpy в list для JSON
|
|||
|
|
embeddings_list = embeddings.tolist()
|
|||
|
|
|
|||
|
|
logger.info(f"✅ Обработано за {processing_time:.2f} сек")
|
|||
|
|
logger.info(f"📊 Размерность эмбеддинга: {len(embeddings_list[0])}")
|
|||
|
|
|
|||
|
|
return EmbeddingResponse(
|
|||
|
|
embeddings=embeddings_list,
|
|||
|
|
model_name="BAAI/bge-m3",
|
|||
|
|
processing_time=processing_time,
|
|||
|
|
text_count=len(texts)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"❌ Ошибка генерации эмбеддингов: {e}")
|
|||
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|||
|
|
|
|||
|
|
@app.post("/embed-single")
|
|||
|
|
async def embed_single(text: str):
|
|||
|
|
"""
|
|||
|
|
Упрощённый эндпоинт для одного текста
|
|||
|
|
Совместимость с n8n
|
|||
|
|
"""
|
|||
|
|
request = EmbeddingRequest(text=text)
|
|||
|
|
response = await generate_embeddings(request)
|
|||
|
|
|
|||
|
|
# Возвращаем только первый эмбеддинг
|
|||
|
|
return {
|
|||
|
|
"embedding": response.embeddings[0],
|
|||
|
|
"model": response.model_name,
|
|||
|
|
"time": response.processing_time
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
uvicorn.run(
|
|||
|
|
"embedding_service:app",
|
|||
|
|
host="0.0.0.0",
|
|||
|
|
port=8001,
|
|||
|
|
reload=False,
|
|||
|
|
workers=1 # Один воркер для экономии памяти
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|