- Added comprehensive AI Assistant system (aiassist/ directory): * Vector search and embedding capabilities * Typebot proxy integration * Elastic search functionality * Message classification and chat history * MCP proxy for external integrations - Implemented Court Status API (GetCourtStatus.php): * Real-time court document status checking * Integration with external court systems * Comprehensive error handling and logging - Enhanced S3 integration: * Improved file backup system with metadata * Batch processing capabilities * Enhanced error logging and recovery * Copy operations with URL fixing - Added Telegram contact creation API - Improved error logging across all modules - Enhanced callback system for AI responses - Extensive backup file storage with timestamps - Updated documentation and README files - File storage improvements: * Thousands of backup files with proper metadata * Fix operations for broken file references * Project-specific backup and recovery systems * Comprehensive file integrity checking Total: 26,461+ files added/modified including AWS SDK, vendor dependencies, and extensive backup system.
63 lines
2.6 KiB
Python
63 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
||
import sys
|
||
import json
|
||
from deeppavlov import build_model, configs
|
||
|
||
def group_entities(tokens, labels):
|
||
entities = []
|
||
current_entity = []
|
||
current_type = None
|
||
for token, label in zip(tokens, labels):
|
||
if label.startswith("B-"):
|
||
if current_entity:
|
||
entities.append((" ".join(current_entity), current_type))
|
||
current_entity = [token]
|
||
current_type = label.split("-")[1] # например, PER или ORG
|
||
elif label.startswith("I-") and current_entity and label.split("-")[1] == current_type:
|
||
current_entity.append(token)
|
||
else:
|
||
if current_entity:
|
||
entities.append((" ".join(current_entity), current_type))
|
||
current_entity = []
|
||
current_type = None
|
||
if current_entity:
|
||
entities.append((" ".join(current_entity), current_type))
|
||
return entities
|
||
|
||
def extract_entities_dp(text):
|
||
# Загружаем модель NER DeepPavlov
|
||
ner_model = build_model(configs.ner.ner_rus_bert, download=True)
|
||
result = ner_model([text])
|
||
# Ожидаемый формат результата:
|
||
# [
|
||
# [ [ "Иванов", "Иван", "Иванович", "заключил", "договор", "с", "ООО", "Рога", "и", "Копыта" ] ],
|
||
# [ [ "B-PER", "I-PER", "I-PER", "O", "O", "O", "B-ORG", "I-ORG", "I-ORG", "I-ORG" ] ]
|
||
# ]
|
||
tokens = result[0][0] if result and result[0] else []
|
||
labels = result[1][0] if result and result[1] else []
|
||
grouped = group_entities(tokens, labels)
|
||
|
||
# Извлекаем первую сущность типа PERSON как истца и первую сущность типа ORG как ответчика
|
||
истец = next((entity for entity, typ in grouped if typ == "PER"), "Не определено")
|
||
ответчик = next((entity for entity, typ in grouped if typ == "ORG"), "Не определено")
|
||
суть_спора = text[:100] if text else "Не определено"
|
||
|
||
return {
|
||
"истец": истец,
|
||
"ответчик": ответчик,
|
||
"суть_спора": суть_спора
|
||
}
|
||
|
||
if __name__ == '__main__':
|
||
if len(sys.argv) < 2:
|
||
print(json.dumps({
|
||
"истец": "Не определено",
|
||
"ответчик": "Не определено",
|
||
"суть_спора": "Не определено"
|
||
}, ensure_ascii=False))
|
||
sys.exit(0)
|
||
|
||
input_text = " ".join(sys.argv[1:])
|
||
entities = extract_entities_dp(input_text)
|
||
print(json.dumps(entities, ensure_ascii=False, indent=2))
|