- Added comprehensive AI Assistant system (aiassist/ directory): * Vector search and embedding capabilities * Typebot proxy integration * Elastic search functionality * Message classification and chat history * MCP proxy for external integrations - Implemented Court Status API (GetCourtStatus.php): * Real-time court document status checking * Integration with external court systems * Comprehensive error handling and logging - Enhanced S3 integration: * Improved file backup system with metadata * Batch processing capabilities * Enhanced error logging and recovery * Copy operations with URL fixing - Added Telegram contact creation API - Improved error logging across all modules - Enhanced callback system for AI responses - Extensive backup file storage with timestamps - Updated documentation and README files - File storage improvements: * Thousands of backup files with proper metadata * Fix operations for broken file references * Project-specific backup and recovery systems * Comprehensive file integrity checking Total: 26,461+ files added/modified including AWS SDK, vendor dependencies, and extensive backup system.
568 lines
22 KiB
PHP
568 lines
22 KiB
PHP
<?php
|
||
// aiassist/search.php
|
||
if (!defined('ELASTIC_URL')) {
|
||
define('ELASTIC_URL', 'http://localhost:9200');
|
||
}
|
||
|
||
/**
|
||
* Выполняет поиск по указанному индексу ElasticSearch.
|
||
*
|
||
* @param string $index Имя индекса.
|
||
* @param array $query Тело запроса.
|
||
* @return array Результаты поиска.
|
||
*/
|
||
function searchIndex($index, $query) {
|
||
$es_url = ELASTIC_URL . "/{$index}/_search";
|
||
$ch = curl_init($es_url);
|
||
curl_setopt_array($ch, [
|
||
CURLOPT_RETURNTRANSFER => true,
|
||
CURLOPT_POST => true,
|
||
CURLOPT_POSTFIELDS => json_encode($query),
|
||
CURLOPT_HTTPHEADER => ['Content-Type: application/json']
|
||
]);
|
||
$response = curl_exec($ch);
|
||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||
if ($httpCode !== 200) {
|
||
error_log("Ошибка поиска в ElasticSearch (индекс $index): HTTP $httpCode - " . $response . "\n", 3, __DIR__ . "/logs/search.log");
|
||
}
|
||
curl_close($ch);
|
||
return json_decode($response, true);
|
||
}
|
||
|
||
|
||
|
||
function searchRefinedCasesFromBestExample($text, $embedding2048, $embedding1024 = null, $size = 5) {
|
||
$shouldClauses = [
|
||
[
|
||
"multi_match" => [
|
||
"query" => $text,
|
||
"fields" => ["court_decision", "суть_спора"],
|
||
"fuzziness" => "AUTO",
|
||
"boost" => 1.5
|
||
]
|
||
],
|
||
[
|
||
"match_phrase" => [
|
||
"court_decision" => [
|
||
"query" => $text,
|
||
"slop" => 2
|
||
]
|
||
]
|
||
]
|
||
];
|
||
|
||
if ($embedding2048 !== null) {
|
||
$shouldClauses[] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
|
||
"params" => ["query_vector" => $embedding2048]
|
||
],
|
||
"boost" => 1.5
|
||
]
|
||
];
|
||
}
|
||
|
||
if ($embedding1024 !== null) {
|
||
$shouldClauses[] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
|
||
"params" => ["query_vector" => $embedding1024]
|
||
],
|
||
"boost" => 1.0
|
||
]
|
||
];
|
||
}
|
||
|
||
$query = [
|
||
"size" => $size,
|
||
"query" => [
|
||
"bool" => [
|
||
"should" => $shouldClauses,
|
||
"filter" => [
|
||
["exists" => ["field" => "court_decision"]],
|
||
["exists" => ["field" => "law_articles"]],
|
||
["range" => ["case_year" => ["gte" => 2015]]]
|
||
]
|
||
]
|
||
]
|
||
];
|
||
|
||
file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] 🔎 Уточнённый поиск: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
|
||
|
||
$response = searchIndex("legal_cases", $query);
|
||
return isset($response['hits']['hits']) ? array_column($response['hits']['hits'], '_source') : [];
|
||
}
|
||
|
||
|
||
|
||
|
||
/**
|
||
* Выполняет объединённый поиск по эмбеддингам и лексическому запросу для индекса cases.
|
||
*
|
||
* @param string $queryText Текст запроса.
|
||
* @param array $queryEmbeddings Вектор запроса.
|
||
* @param int $size Количество результатов.
|
||
* @return array Результаты поиска.
|
||
*/
|
||
function searchCases($queryText, $queryEmbeddings, $size = 5) {
|
||
$query = [
|
||
"size" => $size,
|
||
"query" => [
|
||
"bool" => [
|
||
"should" => [
|
||
[
|
||
"multi_match" => [
|
||
"query" => $queryText,
|
||
"fields" => ["court_decision", "суть_спора"],
|
||
"fuzziness" => "AUTO"
|
||
]
|
||
],
|
||
[
|
||
"script_score" => [
|
||
"query" => ["match_all" => (object)[]],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') * 1.5 + 1.0",
|
||
"params" => [
|
||
"query_vector" => $queryEmbeddings["embedding_2048"]
|
||
]
|
||
]
|
||
]
|
||
]
|
||
]
|
||
]
|
||
]
|
||
];
|
||
|
||
return searchIndex("legal_cases", $query);
|
||
}
|
||
|
||
/**
|
||
* Выполняет поиск похожих судебных решений.
|
||
*
|
||
* @param array $queryParams Параметры запроса.
|
||
* @param int $size Количество результатов.
|
||
* @return array Результаты поиска.
|
||
*/
|
||
|
||
function searchSimilarCases($queryParams, $size = 5) {
|
||
$logFile = __DIR__ . "/logs/search.log";
|
||
|
||
if (empty($queryParams['facts_short']) || empty($queryParams['facts_full'])) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
|
||
$factsShort = $queryParams['facts_short']; // Для лексического поиска
|
||
$factsFull = $queryParams['facts_full']; // Для логирования
|
||
$normalizedEmbedding1024 = $queryParams['embedding_1024'] ?? null;
|
||
$normalizedEmbedding2048 = $queryParams['embedding_2048'] ?? null;
|
||
|
||
$shouldClauses = [];
|
||
|
||
// 🔎 **Лексический поиск (multi_match) на основе `facts_short`**
|
||
$shouldClauses[] = [
|
||
"multi_match" => [
|
||
"query" => $factsShort,
|
||
"fields" => ["court_decision", "law_articles"],
|
||
"fuzziness"=> "0", // ❗ Отключаем fuzziness
|
||
"boost" => 1.5
|
||
]
|
||
];
|
||
|
||
// 🔎 **Добавляем `match_phrase` для точного поиска**
|
||
$shouldClauses[] = [
|
||
"match_phrase" => [
|
||
"court_decision" => [
|
||
"query" => $factsShort,
|
||
"slop" => 2
|
||
]
|
||
]
|
||
];
|
||
|
||
// 🔎 **Векторный поиск (script_score) на основе `embedding_2048`**
|
||
if ($normalizedEmbedding2048 !== null) {
|
||
$shouldClauses[] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
|
||
"params" => [
|
||
"query_vector" => $normalizedEmbedding2048
|
||
]
|
||
],
|
||
"boost" => 1.5
|
||
]
|
||
];
|
||
}
|
||
|
||
// 🔎 **Векторный поиск (script_score) на основе `embedding_1024`**
|
||
if ($normalizedEmbedding1024 !== null) {
|
||
$shouldClauses[] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
|
||
"params" => [
|
||
"query_vector" => $normalizedEmbedding1024
|
||
]
|
||
],
|
||
"boost" => 1.0
|
||
]
|
||
];
|
||
}
|
||
|
||
$query = [
|
||
"size" => $size,
|
||
"query" => [
|
||
"bool" => [
|
||
"should" => $shouldClauses,
|
||
"filter" => [
|
||
["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]],
|
||
["exists" => ["field" => "court_decision"]],
|
||
["exists" => ["field" => "law_articles"]],
|
||
["range" => ["case_year" => ["gte" => 2010]]]
|
||
]
|
||
]
|
||
]
|
||
];
|
||
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
|
||
|
||
$response = searchIndex("legal_cases", $query);
|
||
|
||
if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
|
||
return array_column($response['hits']['hits'], '_source');
|
||
}
|
||
|
||
|
||
|
||
|
||
/*
|
||
function searchSimilarCases($queryParams, $size = 5) {
|
||
$logFile = __DIR__ . "/logs/search.log";
|
||
|
||
if (empty($queryParams['facts']) || empty($queryParams['category'])) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
|
||
// Начальные параметры
|
||
$amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null;
|
||
$fuzzinessLevels = ["AUTO", "2", "5"];
|
||
$years = [2010, 2000, null]; // Искать сначала с 2010, потом 2000, потом без ограничений
|
||
$category = $queryParams['category'];
|
||
$expandedSearch = false;
|
||
|
||
$fuzzinessIndex = 0;
|
||
$yearIndex = 0;
|
||
|
||
do {
|
||
$fuzziness = $fuzzinessLevels[$fuzzinessIndex] ?? "AUTO";
|
||
$minYear = $years[$yearIndex] ?? null;
|
||
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Поиск: fuzziness=$fuzziness, minYear=$minYear, category=$category\n", FILE_APPEND);
|
||
|
||
// Создаём поисковые условия
|
||
$shouldClauses = [
|
||
[
|
||
"multi_match" => [
|
||
"query" => $queryParams['facts'],
|
||
"fields" => ["court_decision", "law_articles"],
|
||
"fuzziness" => $fuzziness,
|
||
"boost" => 1.0
|
||
]
|
||
]
|
||
];
|
||
|
||
// 🔍 Добавляем поиск по `embedding_2048`, если есть
|
||
if (isset($queryParams['embedding_2048']) && is_array($queryParams['embedding_2048'])) {
|
||
$shouldClauses[] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
|
||
"params" => ["query_vector" => $queryParams['embedding_2048']]
|
||
],
|
||
"boost" => 1.5
|
||
]
|
||
];
|
||
}
|
||
|
||
// 🔍 Добавляем поиск по `embedding_1024`, если есть
|
||
if (isset($queryParams['embedding_1024']) && is_array($queryParams['embedding_1024'])) {
|
||
$shouldClauses[] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
|
||
"params" => ["query_vector" => $queryParams['embedding_1024']]
|
||
],
|
||
"boost" => 1.0
|
||
]
|
||
];
|
||
}
|
||
|
||
// 🔍 Основные фильтры
|
||
$filterClauses = [
|
||
["match" => ["case_category_text" => ["query" => $category, "fuzziness" => "AUTO"]]],
|
||
["exists" => ["field" => "court_decision"]],
|
||
["exists" => ["field" => "law_articles"]]
|
||
];
|
||
|
||
if ($minYear !== null) {
|
||
$filterClauses[] = ["range" => ["case_year" => ["gte" => $minYear]]];
|
||
}
|
||
|
||
// 🔍 Фильтрация по сумме, если указана
|
||
if ($amount !== null && $amount > 0) {
|
||
$rangeMultiplier = $expandedSearch ? 2.0 : 1.3;
|
||
$filterClauses[] = [
|
||
"range" => ["requested_amount" => ["gte" => $amount * 0.7, "lte" => $amount * $rangeMultiplier]]
|
||
];
|
||
}
|
||
|
||
$query = [
|
||
"size" => $size,
|
||
"query" => [
|
||
"bool" => [
|
||
"should" => $shouldClauses,
|
||
"filter" => $filterClauses
|
||
]
|
||
]
|
||
];
|
||
|
||
// Выполняем поиск в ElasticSearch
|
||
$response = searchIndex("legal_cases", $query);
|
||
|
||
// Проверяем результат
|
||
if (isset($response['hits']['hits']) && !empty($response['hits']['hits'])) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено: " . count($response['hits']['hits']) . " документов.\n", FILE_APPEND);
|
||
return array_column($response['hits']['hits'], '_source');
|
||
}
|
||
|
||
// Если результатов нет, пробуем ослабить параметры
|
||
if ($fuzzinessIndex < count($fuzzinessLevels) - 1) {
|
||
$fuzzinessIndex++;
|
||
} elseif ($yearIndex < count($years) - 1) {
|
||
$yearIndex++;
|
||
} elseif (!$expandedSearch) {
|
||
$expandedSearch = true;
|
||
$category = "*"; // Поиск по всем категориям
|
||
} else {
|
||
break;
|
||
}
|
||
|
||
} while (true);
|
||
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch не нашел результатов даже с ослабленными параметрами!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
|
||
*/
|
||
|
||
|
||
/*function searchSimilarCases($queryParams, $size = 10) {
|
||
$logFile = __DIR__ . "/logs/search.log";
|
||
|
||
if (empty($queryParams['facts']) || empty($queryParams['category'])) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
|
||
$amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null;
|
||
$fuzzinessLevel = (mb_strlen($queryParams['facts']) > 100) ? "0" : "AUTO";
|
||
|
||
// Нормализация эмбеддингов
|
||
$normalizedEmbedding2048 = isset($queryParams['embedding_2048']) && is_array($queryParams['embedding_2048'])
|
||
? normalizeEmbedding($queryParams['embedding_2048'])
|
||
: null;
|
||
|
||
$normalizedEmbedding1024 = isset($queryParams['embedding_1024']) && is_array($queryParams['embedding_1024'])
|
||
? normalizeEmbedding($queryParams['embedding_1024'])
|
||
: null;
|
||
//$fuzzinessLevel
|
||
// Формируем запрос в ElasticSearch
|
||
$query = [
|
||
"size" => $size,
|
||
"query" => [
|
||
"bool" => [
|
||
"should" => [
|
||
[
|
||
"multi_match" => [
|
||
"query" => $queryParams['facts'],
|
||
"fields" => ["court_decision", "law_articles"],
|
||
"fuzziness" => $fuzzinessLevel
|
||
]
|
||
]
|
||
],
|
||
"filter" => [
|
||
["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]],
|
||
["exists" => ["field" => "court_decision"]],
|
||
["exists" => ["field" => "law_articles"]],
|
||
["range" => ["case_year" => ["gte" => 2020]]]
|
||
]
|
||
]
|
||
]
|
||
];
|
||
|
||
// Добавляем векторный поиск
|
||
if ($normalizedEmbedding2048 !== null) {
|
||
$query["query"]["bool"]["should"][] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
|
||
"params" => ["query_vector" => $normalizedEmbedding2048]
|
||
]
|
||
]
|
||
];
|
||
}
|
||
|
||
if ($normalizedEmbedding1024 !== null) {
|
||
$query["query"]["bool"]["should"][] = [
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
|
||
"params" => ["query_vector" => $normalizedEmbedding1024]
|
||
]
|
||
]
|
||
];
|
||
}
|
||
|
||
if ($amount !== null && $amount > 0) {
|
||
$query["query"]["bool"]["filter"][] = [
|
||
"range" => ["requested_amount" => ["gte" => $amount * 0.7, "lte" => $amount * 1.3]]
|
||
];
|
||
}
|
||
|
||
// Логируем запрос
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
|
||
|
||
try {
|
||
$response = searchIndex("legal_cases", $query);
|
||
if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
|
||
// Формируем выходные данные
|
||
$results = [];
|
||
foreach ($response['hits']['hits'] as $doc) {
|
||
$source = $doc['_source'];
|
||
$results[] = [
|
||
'case_id' => $source['case_id'] ?? 'Неизвестный ID',
|
||
'court' => $source['court'] ?? 'Неизвестный суд',
|
||
'court_decision' => $source['court_decision'] ?? 'Текст решения отсутствует',
|
||
'case_url' => $source['case_url'] ?? 'Нет ссылки'
|
||
];
|
||
}
|
||
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено " . count($results) . " документов.\n", FILE_APPEND);
|
||
return $results;
|
||
} catch (Exception $e) {
|
||
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка Elasticsearch: " . $e->getMessage() . "\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
}
|
||
|
||
*/
|
||
|
||
/**
|
||
* Преобразует ответ ElasticSearch в удобный массив результатов.
|
||
*
|
||
* @param array $esResponse Ответ от ElasticSearch.
|
||
* @return array Массив результатов.
|
||
*/
|
||
function parseSearchResults($esResponse) {
|
||
$results = [];
|
||
if (isset($esResponse['hits']['hits'])) {
|
||
foreach ($esResponse['hits']['hits'] as $hit) {
|
||
$results[] = $hit['_source'];
|
||
}
|
||
}
|
||
return $results;
|
||
}
|
||
|
||
/**
|
||
* Выполняет поиск по индексу legal_chunks.
|
||
*
|
||
* @param string $queryText Текст запроса.
|
||
* @param array $queryEmbeddings Вектор запроса (например, embedding_1024, embedding_2048).
|
||
* @param int $size Количество результатов.
|
||
* @return array Результаты поиска.
|
||
*/
|
||
function searchLegalChunks($queryText, $queryEmbeddings, $size = 5) {
|
||
// Формируем запрос
|
||
$query = [
|
||
"size" => $size,
|
||
"query" => [
|
||
"bool" => [
|
||
"should" => [
|
||
// Поиск по полям с текстом (например, court_decision, text)
|
||
[
|
||
"multi_match" => [
|
||
"query" => $queryText,
|
||
"fields" => ["court_decision", "text", "case_category_text", "defendant", "plaintiff"],
|
||
"fuzziness" => "AUTO",
|
||
"boost" => 1.5
|
||
]
|
||
],
|
||
// Поиск по вектору (embedding_2048)
|
||
[
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
|
||
"params" => ["query_vector" => $queryEmbeddings["embedding_2048"]]
|
||
],
|
||
"boost" => 1.5
|
||
]
|
||
],
|
||
// Поиск по вектору (embedding_1024)
|
||
[
|
||
"script_score" => [
|
||
"query" => ["match_all" => new stdClass()],
|
||
"script" => [
|
||
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
|
||
"params" => ["query_vector" => $queryEmbeddings["embedding_1024"]]
|
||
],
|
||
"boost" => 1.0
|
||
]
|
||
]
|
||
],
|
||
"filter" => [
|
||
["exists" => ["field" => "court_decision"]],
|
||
["exists" => ["field" => "law_articles"]],
|
||
// Можно добавить дополнительные фильтры, например, по году или категориям
|
||
["range" => ["case_year" => ["gte" => 2015]]]
|
||
]
|
||
]
|
||
]
|
||
];
|
||
|
||
// Логируем запрос
|
||
file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch для legal_chunks: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
|
||
|
||
// Выполняем поиск
|
||
$response = searchIndex("legal_chunks", $query);
|
||
|
||
// Если результат есть, возвращаем его
|
||
if (isset($response['hits']['hits']) && !empty($response['hits']['hits'])) {
|
||
return array_column($response['hits']['hits'], '_source');
|
||
} else {
|
||
file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch не нашел результатов для legal_chunks!\n", FILE_APPEND);
|
||
return [];
|
||
}
|
||
}
|
||
|
||
?>
|