Files
crm.clientright.ru/aiassist/search.php
Fedor ac7467f0b4 Major CRM updates: AI Assistant, Court Status API, S3 integration improvements, and extensive file storage system
- Added comprehensive AI Assistant system (aiassist/ directory):
  * Vector search and embedding capabilities
  * Typebot proxy integration
  * Elastic search functionality
  * Message classification and chat history
  * MCP proxy for external integrations

- Implemented Court Status API (GetCourtStatus.php):
  * Real-time court document status checking
  * Integration with external court systems
  * Comprehensive error handling and logging

- Enhanced S3 integration:
  * Improved file backup system with metadata
  * Batch processing capabilities
  * Enhanced error logging and recovery
  * Copy operations with URL fixing

- Added Telegram contact creation API
- Improved error logging across all modules
- Enhanced callback system for AI responses
- Extensive backup file storage with timestamps
- Updated documentation and README files

- File storage improvements:
  * Thousands of backup files with proper metadata
  * Fix operations for broken file references
  * Project-specific backup and recovery systems
  * Comprehensive file integrity checking

Total: 26,461+ files added/modified including AWS SDK, vendor dependencies, and extensive backup system.
2025-10-16 11:17:21 +03:00

568 lines
22 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
// aiassist/search.php
if (!defined('ELASTIC_URL')) {
define('ELASTIC_URL', 'http://localhost:9200');
}
/**
* Выполняет поиск по указанному индексу ElasticSearch.
*
* @param string $index Имя индекса.
* @param array $query Тело запроса.
* @return array Результаты поиска.
*/
function searchIndex($index, $query) {
$es_url = ELASTIC_URL . "/{$index}/_search";
$ch = curl_init($es_url);
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($query),
CURLOPT_HTTPHEADER => ['Content-Type: application/json']
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($httpCode !== 200) {
error_log("Ошибка поиска в ElasticSearch (индекс $index): HTTP $httpCode - " . $response . "\n", 3, __DIR__ . "/logs/search.log");
}
curl_close($ch);
return json_decode($response, true);
}
function searchRefinedCasesFromBestExample($text, $embedding2048, $embedding1024 = null, $size = 5) {
$shouldClauses = [
[
"multi_match" => [
"query" => $text,
"fields" => ["court_decision", "суть_спора"],
"fuzziness" => "AUTO",
"boost" => 1.5
]
],
[
"match_phrase" => [
"court_decision" => [
"query" => $text,
"slop" => 2
]
]
]
];
if ($embedding2048 !== null) {
$shouldClauses[] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
"params" => ["query_vector" => $embedding2048]
],
"boost" => 1.5
]
];
}
if ($embedding1024 !== null) {
$shouldClauses[] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
"params" => ["query_vector" => $embedding1024]
],
"boost" => 1.0
]
];
}
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => $shouldClauses,
"filter" => [
["exists" => ["field" => "court_decision"]],
["exists" => ["field" => "law_articles"]],
["range" => ["case_year" => ["gte" => 2015]]]
]
]
]
];
file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] 🔎 Уточнённый поиск: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
$response = searchIndex("legal_cases", $query);
return isset($response['hits']['hits']) ? array_column($response['hits']['hits'], '_source') : [];
}
/**
* Выполняет объединённый поиск по эмбеддингам и лексическому запросу для индекса cases.
*
* @param string $queryText Текст запроса.
* @param array $queryEmbeddings Вектор запроса.
* @param int $size Количество результатов.
* @return array Результаты поиска.
*/
function searchCases($queryText, $queryEmbeddings, $size = 5) {
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => [
[
"multi_match" => [
"query" => $queryText,
"fields" => ["court_decision", "суть_спора"],
"fuzziness" => "AUTO"
]
],
[
"script_score" => [
"query" => ["match_all" => (object)[]],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') * 1.5 + 1.0",
"params" => [
"query_vector" => $queryEmbeddings["embedding_2048"]
]
]
]
]
]
]
]
];
return searchIndex("legal_cases", $query);
}
/**
* Выполняет поиск похожих судебных решений.
*
* @param array $queryParams Параметры запроса.
* @param int $size Количество результатов.
* @return array Результаты поиска.
*/
function searchSimilarCases($queryParams, $size = 5) {
$logFile = __DIR__ . "/logs/search.log";
if (empty($queryParams['facts_short']) || empty($queryParams['facts_full'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
return [];
}
$factsShort = $queryParams['facts_short']; // Для лексического поиска
$factsFull = $queryParams['facts_full']; // Для логирования
$normalizedEmbedding1024 = $queryParams['embedding_1024'] ?? null;
$normalizedEmbedding2048 = $queryParams['embedding_2048'] ?? null;
$shouldClauses = [];
// 🔎 **Лексический поиск (multi_match) на основе `facts_short`**
$shouldClauses[] = [
"multi_match" => [
"query" => $factsShort,
"fields" => ["court_decision", "law_articles"],
"fuzziness"=> "0", // ❗ Отключаем fuzziness
"boost" => 1.5
]
];
// 🔎 **Добавляем `match_phrase` для точного поиска**
$shouldClauses[] = [
"match_phrase" => [
"court_decision" => [
"query" => $factsShort,
"slop" => 2
]
]
];
// 🔎 **Векторный поиск (script_score) на основе `embedding_2048`**
if ($normalizedEmbedding2048 !== null) {
$shouldClauses[] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
"params" => [
"query_vector" => $normalizedEmbedding2048
]
],
"boost" => 1.5
]
];
}
// 🔎 **Векторный поиск (script_score) на основе `embedding_1024`**
if ($normalizedEmbedding1024 !== null) {
$shouldClauses[] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
"params" => [
"query_vector" => $normalizedEmbedding1024
]
],
"boost" => 1.0
]
];
}
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => $shouldClauses,
"filter" => [
["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]],
["exists" => ["field" => "court_decision"]],
["exists" => ["field" => "law_articles"]],
["range" => ["case_year" => ["gte" => 2010]]]
]
]
]
];
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
$response = searchIndex("legal_cases", $query);
if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND);
return [];
}
return array_column($response['hits']['hits'], '_source');
}
/*
function searchSimilarCases($queryParams, $size = 5) {
$logFile = __DIR__ . "/logs/search.log";
if (empty($queryParams['facts']) || empty($queryParams['category'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
return [];
}
// Начальные параметры
$amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null;
$fuzzinessLevels = ["AUTO", "2", "5"];
$years = [2010, 2000, null]; // Искать сначала с 2010, потом 2000, потом без ограничений
$category = $queryParams['category'];
$expandedSearch = false;
$fuzzinessIndex = 0;
$yearIndex = 0;
do {
$fuzziness = $fuzzinessLevels[$fuzzinessIndex] ?? "AUTO";
$minYear = $years[$yearIndex] ?? null;
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Поиск: fuzziness=$fuzziness, minYear=$minYear, category=$category\n", FILE_APPEND);
// Создаём поисковые условия
$shouldClauses = [
[
"multi_match" => [
"query" => $queryParams['facts'],
"fields" => ["court_decision", "law_articles"],
"fuzziness" => $fuzziness,
"boost" => 1.0
]
]
];
// 🔍 Добавляем поиск по `embedding_2048`, если есть
if (isset($queryParams['embedding_2048']) && is_array($queryParams['embedding_2048'])) {
$shouldClauses[] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
"params" => ["query_vector" => $queryParams['embedding_2048']]
],
"boost" => 1.5
]
];
}
// 🔍 Добавляем поиск по `embedding_1024`, если есть
if (isset($queryParams['embedding_1024']) && is_array($queryParams['embedding_1024'])) {
$shouldClauses[] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
"params" => ["query_vector" => $queryParams['embedding_1024']]
],
"boost" => 1.0
]
];
}
// 🔍 Основные фильтры
$filterClauses = [
["match" => ["case_category_text" => ["query" => $category, "fuzziness" => "AUTO"]]],
["exists" => ["field" => "court_decision"]],
["exists" => ["field" => "law_articles"]]
];
if ($minYear !== null) {
$filterClauses[] = ["range" => ["case_year" => ["gte" => $minYear]]];
}
// 🔍 Фильтрация по сумме, если указана
if ($amount !== null && $amount > 0) {
$rangeMultiplier = $expandedSearch ? 2.0 : 1.3;
$filterClauses[] = [
"range" => ["requested_amount" => ["gte" => $amount * 0.7, "lte" => $amount * $rangeMultiplier]]
];
}
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => $shouldClauses,
"filter" => $filterClauses
]
]
];
// Выполняем поиск в ElasticSearch
$response = searchIndex("legal_cases", $query);
// Проверяем результат
if (isset($response['hits']['hits']) && !empty($response['hits']['hits'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено: " . count($response['hits']['hits']) . " документов.\n", FILE_APPEND);
return array_column($response['hits']['hits'], '_source');
}
// Если результатов нет, пробуем ослабить параметры
if ($fuzzinessIndex < count($fuzzinessLevels) - 1) {
$fuzzinessIndex++;
} elseif ($yearIndex < count($years) - 1) {
$yearIndex++;
} elseif (!$expandedSearch) {
$expandedSearch = true;
$category = "*"; // Поиск по всем категориям
} else {
break;
}
} while (true);
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch не нашел результатов даже с ослабленными параметрами!\n", FILE_APPEND);
return [];
}
*/
/*function searchSimilarCases($queryParams, $size = 10) {
$logFile = __DIR__ . "/logs/search.log";
if (empty($queryParams['facts']) || empty($queryParams['category'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND);
return [];
}
$amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null;
$fuzzinessLevel = (mb_strlen($queryParams['facts']) > 100) ? "0" : "AUTO";
// Нормализация эмбеддингов
$normalizedEmbedding2048 = isset($queryParams['embedding_2048']) && is_array($queryParams['embedding_2048'])
? normalizeEmbedding($queryParams['embedding_2048'])
: null;
$normalizedEmbedding1024 = isset($queryParams['embedding_1024']) && is_array($queryParams['embedding_1024'])
? normalizeEmbedding($queryParams['embedding_1024'])
: null;
//$fuzzinessLevel
// Формируем запрос в ElasticSearch
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => [
[
"multi_match" => [
"query" => $queryParams['facts'],
"fields" => ["court_decision", "law_articles"],
"fuzziness" => $fuzzinessLevel
]
]
],
"filter" => [
["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]],
["exists" => ["field" => "court_decision"]],
["exists" => ["field" => "law_articles"]],
["range" => ["case_year" => ["gte" => 2020]]]
]
]
]
];
// Добавляем векторный поиск
if ($normalizedEmbedding2048 !== null) {
$query["query"]["bool"]["should"][] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
"params" => ["query_vector" => $normalizedEmbedding2048]
]
]
];
}
if ($normalizedEmbedding1024 !== null) {
$query["query"]["bool"]["should"][] = [
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
"params" => ["query_vector" => $normalizedEmbedding1024]
]
]
];
}
if ($amount !== null && $amount > 0) {
$query["query"]["bool"]["filter"][] = [
"range" => ["requested_amount" => ["gte" => $amount * 0.7, "lte" => $amount * 1.3]]
];
}
// Логируем запрос
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
try {
$response = searchIndex("legal_cases", $query);
if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND);
return [];
}
// Формируем выходные данные
$results = [];
foreach ($response['hits']['hits'] as $doc) {
$source = $doc['_source'];
$results[] = [
'case_id' => $source['case_id'] ?? 'Неизвестный ID',
'court' => $source['court'] ?? 'Неизвестный суд',
'court_decision' => $source['court_decision'] ?? 'Текст решения отсутствует',
'case_url' => $source['case_url'] ?? 'Нет ссылки'
];
}
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено " . count($results) . " документов.\n", FILE_APPEND);
return $results;
} catch (Exception $e) {
file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка Elasticsearch: " . $e->getMessage() . "\n", FILE_APPEND);
return [];
}
}
*/
/**
* Преобразует ответ ElasticSearch в удобный массив результатов.
*
* @param array $esResponse Ответ от ElasticSearch.
* @return array Массив результатов.
*/
function parseSearchResults($esResponse) {
$results = [];
if (isset($esResponse['hits']['hits'])) {
foreach ($esResponse['hits']['hits'] as $hit) {
$results[] = $hit['_source'];
}
}
return $results;
}
/**
* Выполняет поиск по индексу legal_chunks.
*
* @param string $queryText Текст запроса.
* @param array $queryEmbeddings Вектор запроса (например, embedding_1024, embedding_2048).
* @param int $size Количество результатов.
* @return array Результаты поиска.
*/
function searchLegalChunks($queryText, $queryEmbeddings, $size = 5) {
// Формируем запрос
$query = [
"size" => $size,
"query" => [
"bool" => [
"should" => [
// Поиск по полям с текстом (например, court_decision, text)
[
"multi_match" => [
"query" => $queryText,
"fields" => ["court_decision", "text", "case_category_text", "defendant", "plaintiff"],
"fuzziness" => "AUTO",
"boost" => 1.5
]
],
// Поиск по вектору (embedding_2048)
[
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0",
"params" => ["query_vector" => $queryEmbeddings["embedding_2048"]]
],
"boost" => 1.5
]
],
// Поиск по вектору (embedding_1024)
[
"script_score" => [
"query" => ["match_all" => new stdClass()],
"script" => [
"source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0",
"params" => ["query_vector" => $queryEmbeddings["embedding_1024"]]
],
"boost" => 1.0
]
]
],
"filter" => [
["exists" => ["field" => "court_decision"]],
["exists" => ["field" => "law_articles"]],
// Можно добавить дополнительные фильтры, например, по году или категориям
["range" => ["case_year" => ["gte" => 2015]]]
]
]
]
];
// Логируем запрос
file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch для legal_chunks: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND);
// Выполняем поиск
$response = searchIndex("legal_chunks", $query);
// Если результат есть, возвращаем его
if (isset($response['hits']['hits']) && !empty($response['hits']['hits'])) {
return array_column($response['hits']['hits'], '_source');
} else {
file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch не нашел результатов для legal_chunks!\n", FILE_APPEND);
return [];
}
}
?>