true, CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode($query), CURLOPT_HTTPHEADER => ['Content-Type: application/json'] ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ($httpCode !== 200) { error_log("Ошибка поиска в ElasticSearch (индекс $index): HTTP $httpCode - " . $response . "\n", 3, __DIR__ . "/logs/search.log"); } curl_close($ch); return json_decode($response, true); } function searchRefinedCasesFromBestExample($text, $embedding2048, $embedding1024 = null, $size = 5) { $shouldClauses = [ [ "multi_match" => [ "query" => $text, "fields" => ["court_decision", "суть_спора"], "fuzziness" => "AUTO", "boost" => 1.5 ] ], [ "match_phrase" => [ "court_decision" => [ "query" => $text, "slop" => 2 ] ] ] ]; if ($embedding2048 !== null) { $shouldClauses[] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0", "params" => ["query_vector" => $embedding2048] ], "boost" => 1.5 ] ]; } if ($embedding1024 !== null) { $shouldClauses[] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0", "params" => ["query_vector" => $embedding1024] ], "boost" => 1.0 ] ]; } $query = [ "size" => $size, "query" => [ "bool" => [ "should" => $shouldClauses, "filter" => [ ["exists" => ["field" => "court_decision"]], ["exists" => ["field" => "law_articles"]], ["range" => ["case_year" => ["gte" => 2015]]] ] ] ] ]; file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] 🔎 Уточнённый поиск: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND); $response = searchIndex("legal_cases", $query); return isset($response['hits']['hits']) ? array_column($response['hits']['hits'], '_source') : []; } /** * Выполняет объединённый поиск по эмбеддингам и лексическому запросу для индекса cases. * * @param string $queryText Текст запроса. * @param array $queryEmbeddings Вектор запроса. * @param int $size Количество результатов. * @return array Результаты поиска. */ function searchCases($queryText, $queryEmbeddings, $size = 5) { $query = [ "size" => $size, "query" => [ "bool" => [ "should" => [ [ "multi_match" => [ "query" => $queryText, "fields" => ["court_decision", "суть_спора"], "fuzziness" => "AUTO" ] ], [ "script_score" => [ "query" => ["match_all" => (object)[]], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_2048') * 1.5 + 1.0", "params" => [ "query_vector" => $queryEmbeddings["embedding_2048"] ] ] ] ] ] ] ] ]; return searchIndex("legal_cases", $query); } /** * Выполняет поиск похожих судебных решений. * * @param array $queryParams Параметры запроса. * @param int $size Количество результатов. * @return array Результаты поиска. */ function searchSimilarCases($queryParams, $size = 5) { $logFile = __DIR__ . "/logs/search.log"; if (empty($queryParams['facts_short']) || empty($queryParams['facts_full'])) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND); return []; } $factsShort = $queryParams['facts_short']; // Для лексического поиска $factsFull = $queryParams['facts_full']; // Для логирования $normalizedEmbedding1024 = $queryParams['embedding_1024'] ?? null; $normalizedEmbedding2048 = $queryParams['embedding_2048'] ?? null; $shouldClauses = []; // 🔎 **Лексический поиск (multi_match) на основе `facts_short`** $shouldClauses[] = [ "multi_match" => [ "query" => $factsShort, "fields" => ["court_decision", "law_articles"], "fuzziness"=> "0", // ❗ Отключаем fuzziness "boost" => 1.5 ] ]; // 🔎 **Добавляем `match_phrase` для точного поиска** $shouldClauses[] = [ "match_phrase" => [ "court_decision" => [ "query" => $factsShort, "slop" => 2 ] ] ]; // 🔎 **Векторный поиск (script_score) на основе `embedding_2048`** if ($normalizedEmbedding2048 !== null) { $shouldClauses[] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0", "params" => [ "query_vector" => $normalizedEmbedding2048 ] ], "boost" => 1.5 ] ]; } // 🔎 **Векторный поиск (script_score) на основе `embedding_1024`** if ($normalizedEmbedding1024 !== null) { $shouldClauses[] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0", "params" => [ "query_vector" => $normalizedEmbedding1024 ] ], "boost" => 1.0 ] ]; } $query = [ "size" => $size, "query" => [ "bool" => [ "should" => $shouldClauses, "filter" => [ ["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]], ["exists" => ["field" => "court_decision"]], ["exists" => ["field" => "law_articles"]], ["range" => ["case_year" => ["gte" => 2010]]] ] ] ] ]; file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND); $response = searchIndex("legal_cases", $query); if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND); return []; } return array_column($response['hits']['hits'], '_source'); } /* function searchSimilarCases($queryParams, $size = 5) { $logFile = __DIR__ . "/logs/search.log"; if (empty($queryParams['facts']) || empty($queryParams['category'])) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND); return []; } // Начальные параметры $amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null; $fuzzinessLevels = ["AUTO", "2", "5"]; $years = [2010, 2000, null]; // Искать сначала с 2010, потом 2000, потом без ограничений $category = $queryParams['category']; $expandedSearch = false; $fuzzinessIndex = 0; $yearIndex = 0; do { $fuzziness = $fuzzinessLevels[$fuzzinessIndex] ?? "AUTO"; $minYear = $years[$yearIndex] ?? null; file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Поиск: fuzziness=$fuzziness, minYear=$minYear, category=$category\n", FILE_APPEND); // Создаём поисковые условия $shouldClauses = [ [ "multi_match" => [ "query" => $queryParams['facts'], "fields" => ["court_decision", "law_articles"], "fuzziness" => $fuzziness, "boost" => 1.0 ] ] ]; // 🔍 Добавляем поиск по `embedding_2048`, если есть if (isset($queryParams['embedding_2048']) && is_array($queryParams['embedding_2048'])) { $shouldClauses[] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0", "params" => ["query_vector" => $queryParams['embedding_2048']] ], "boost" => 1.5 ] ]; } // 🔍 Добавляем поиск по `embedding_1024`, если есть if (isset($queryParams['embedding_1024']) && is_array($queryParams['embedding_1024'])) { $shouldClauses[] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0", "params" => ["query_vector" => $queryParams['embedding_1024']] ], "boost" => 1.0 ] ]; } // 🔍 Основные фильтры $filterClauses = [ ["match" => ["case_category_text" => ["query" => $category, "fuzziness" => "AUTO"]]], ["exists" => ["field" => "court_decision"]], ["exists" => ["field" => "law_articles"]] ]; if ($minYear !== null) { $filterClauses[] = ["range" => ["case_year" => ["gte" => $minYear]]]; } // 🔍 Фильтрация по сумме, если указана if ($amount !== null && $amount > 0) { $rangeMultiplier = $expandedSearch ? 2.0 : 1.3; $filterClauses[] = [ "range" => ["requested_amount" => ["gte" => $amount * 0.7, "lte" => $amount * $rangeMultiplier]] ]; } $query = [ "size" => $size, "query" => [ "bool" => [ "should" => $shouldClauses, "filter" => $filterClauses ] ] ]; // Выполняем поиск в ElasticSearch $response = searchIndex("legal_cases", $query); // Проверяем результат if (isset($response['hits']['hits']) && !empty($response['hits']['hits'])) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено: " . count($response['hits']['hits']) . " документов.\n", FILE_APPEND); return array_column($response['hits']['hits'], '_source'); } // Если результатов нет, пробуем ослабить параметры if ($fuzzinessIndex < count($fuzzinessLevels) - 1) { $fuzzinessIndex++; } elseif ($yearIndex < count($years) - 1) { $yearIndex++; } elseif (!$expandedSearch) { $expandedSearch = true; $category = "*"; // Поиск по всем категориям } else { break; } } while (true); file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch не нашел результатов даже с ослабленными параметрами!\n", FILE_APPEND); return []; } */ /*function searchSimilarCases($queryParams, $size = 10) { $logFile = __DIR__ . "/logs/search.log"; if (empty($queryParams['facts']) || empty($queryParams['category'])) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка: Недостаточно данных для поиска!\n", FILE_APPEND); return []; } $amount = isset($queryParams['amount']) && is_numeric($queryParams['amount']) ? floatval($queryParams['amount']) : null; $fuzzinessLevel = (mb_strlen($queryParams['facts']) > 100) ? "0" : "AUTO"; // Нормализация эмбеддингов $normalizedEmbedding2048 = isset($queryParams['embedding_2048']) && is_array($queryParams['embedding_2048']) ? normalizeEmbedding($queryParams['embedding_2048']) : null; $normalizedEmbedding1024 = isset($queryParams['embedding_1024']) && is_array($queryParams['embedding_1024']) ? normalizeEmbedding($queryParams['embedding_1024']) : null; //$fuzzinessLevel // Формируем запрос в ElasticSearch $query = [ "size" => $size, "query" => [ "bool" => [ "should" => [ [ "multi_match" => [ "query" => $queryParams['facts'], "fields" => ["court_decision", "law_articles"], "fuzziness" => $fuzzinessLevel ] ] ], "filter" => [ ["match" => ["case_category_text" => ["query" => $queryParams['category'], "fuzziness" => "AUTO"]]], ["exists" => ["field" => "court_decision"]], ["exists" => ["field" => "law_articles"]], ["range" => ["case_year" => ["gte" => 2020]]] ] ] ] ]; // Добавляем векторный поиск if ($normalizedEmbedding2048 !== null) { $query["query"]["bool"]["should"][] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0", "params" => ["query_vector" => $normalizedEmbedding2048] ] ] ]; } if ($normalizedEmbedding1024 !== null) { $query["query"]["bool"]["should"][] = [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0", "params" => ["query_vector" => $normalizedEmbedding1024] ] ] ]; } if ($amount !== null && $amount > 0) { $query["query"]["bool"]["filter"][] = [ "range" => ["requested_amount" => ["gte" => $amount * 0.7, "lte" => $amount * 1.3]] ]; } // Логируем запрос file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND); try { $response = searchIndex("legal_cases", $query); if (!isset($response['hits']['hits']) || empty($response['hits']['hits'])) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch вернул 0 результатов!\n", FILE_APPEND); return []; } // Формируем выходные данные $results = []; foreach ($response['hits']['hits'] as $doc) { $source = $doc['_source']; $results[] = [ 'case_id' => $source['case_id'] ?? 'Неизвестный ID', 'court' => $source['court'] ?? 'Неизвестный суд', 'court_decision' => $source['court_decision'] ?? 'Текст решения отсутствует', 'case_url' => $source['case_url'] ?? 'Нет ссылки' ]; } file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ✅ Найдено " . count($results) . " документов.\n", FILE_APPEND); return $results; } catch (Exception $e) { file_put_contents($logFile, "[" . date("Y-m-d H:i:s") . "] ❌ Ошибка Elasticsearch: " . $e->getMessage() . "\n", FILE_APPEND); return []; } } */ /** * Преобразует ответ ElasticSearch в удобный массив результатов. * * @param array $esResponse Ответ от ElasticSearch. * @return array Массив результатов. */ function parseSearchResults($esResponse) { $results = []; if (isset($esResponse['hits']['hits'])) { foreach ($esResponse['hits']['hits'] as $hit) { $results[] = $hit['_source']; } } return $results; } /** * Выполняет поиск по индексу legal_chunks. * * @param string $queryText Текст запроса. * @param array $queryEmbeddings Вектор запроса (например, embedding_1024, embedding_2048). * @param int $size Количество результатов. * @return array Результаты поиска. */ function searchLegalChunks($queryText, $queryEmbeddings, $size = 5) { // Формируем запрос $query = [ "size" => $size, "query" => [ "bool" => [ "should" => [ // Поиск по полям с текстом (например, court_decision, text) [ "multi_match" => [ "query" => $queryText, "fields" => ["court_decision", "text", "case_category_text", "defendant", "plaintiff"], "fuzziness" => "AUTO", "boost" => 1.5 ] ], // Поиск по вектору (embedding_2048) [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_2048') + 1.0", "params" => ["query_vector" => $queryEmbeddings["embedding_2048"]] ], "boost" => 1.5 ] ], // Поиск по вектору (embedding_1024) [ "script_score" => [ "query" => ["match_all" => new stdClass()], "script" => [ "source" => "cosineSimilarity(params.query_vector, 'embedding_1024') + 1.0", "params" => ["query_vector" => $queryEmbeddings["embedding_1024"]] ], "boost" => 1.0 ] ] ], "filter" => [ ["exists" => ["field" => "court_decision"]], ["exists" => ["field" => "law_articles"]], // Можно добавить дополнительные фильтры, например, по году или категориям ["range" => ["case_year" => ["gte" => 2015]]] ] ] ] ]; // Логируем запрос file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] 🔎 Запрос в Elasticsearch для legal_chunks: " . json_encode($query, JSON_UNESCAPED_UNICODE) . "\n", FILE_APPEND); // Выполняем поиск $response = searchIndex("legal_chunks", $query); // Если результат есть, возвращаем его if (isset($response['hits']['hits']) && !empty($response['hits']['hits'])) { return array_column($response['hits']['hits'], '_source'); } else { file_put_contents(__DIR__ . "/logs/search.log", "[" . date("Y-m-d H:i:s") . "] ❌ Elasticsearch не нашел результатов для legal_chunks!\n", FILE_APPEND); return []; } } ?>