123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- import { uniq } from 'lodash-es'
- import type { MultipleRetrievalConfig } from './types'
- import type {
- DataSet,
- SelectedDatasetsMode,
- } from '@/models/datasets'
- import {
- DEFAULT_WEIGHTED_SCORE,
- RerankingModeEnum,
- } from '@/models/datasets'
- import { RETRIEVE_METHOD } from '@/types/app'
- import { DATASET_DEFAULT } from '@/config'
- export const checkNodeValid = () => {
- return true
- }
- export const getSelectedDatasetsMode = (datasets: DataSet[]) => {
- let allHighQuality = true
- let allHighQualityVectorSearch = true
- let allHighQualityFullTextSearch = true
- let allEconomic = true
- let mixtureHighQualityAndEconomic = true
- let inconsistentEmbeddingModel = false
- if (!datasets.length) {
- allHighQuality = false
- allHighQualityVectorSearch = false
- allHighQualityFullTextSearch = false
- allEconomic = false
- mixtureHighQualityAndEconomic = false
- inconsistentEmbeddingModel = false
- }
- datasets.forEach((dataset) => {
- if (dataset.indexing_technique === 'economy') {
- allHighQuality = false
- allHighQualityVectorSearch = false
- allHighQualityFullTextSearch = false
- }
- if (dataset.indexing_technique === 'high_quality') {
- allEconomic = false
- if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic)
- allHighQualityVectorSearch = false
- if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
- allHighQualityFullTextSearch = false
- }
- })
- if (allHighQuality || allEconomic)
- mixtureHighQualityAndEconomic = false
- if (allHighQuality)
- inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1
- return {
- allHighQuality,
- allHighQualityVectorSearch,
- allHighQualityFullTextSearch,
- allEconomic,
- mixtureHighQualityAndEconomic,
- inconsistentEmbeddingModel,
- } as SelectedDatasetsMode
- }
- export const getMultipleRetrievalConfig = (multipleRetrievalConfig: MultipleRetrievalConfig, selectedDatasets: DataSet[]) => {
- const {
- allHighQuality,
- allHighQualityVectorSearch,
- allHighQualityFullTextSearch,
- allEconomic,
- mixtureHighQualityAndEconomic,
- inconsistentEmbeddingModel,
- } = getSelectedDatasetsMode(selectedDatasets)
- const {
- top_k = DATASET_DEFAULT.top_k,
- score_threshold,
- reranking_mode,
- reranking_model,
- weights,
- reranking_enable,
- } = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k }
- const result = {
- top_k,
- score_threshold,
- reranking_mode,
- reranking_model,
- weights,
- reranking_enable,
- }
- if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel)
- result.reranking_mode = RerankingModeEnum.RerankingModel
- if (allHighQuality && !inconsistentEmbeddingModel && reranking_mode === undefined)
- result.reranking_mode = RerankingModeEnum.WeightedScore
- if (allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined) && !weights) {
- result.weights = {
- vector_setting: {
- vector_weight: allHighQualityVectorSearch
- ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic
- : allHighQualityFullTextSearch
- ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic
- : DEFAULT_WEIGHTED_SCORE.other.semantic,
- embedding_provider_name: selectedDatasets[0].embedding_model_provider,
- embedding_model_name: selectedDatasets[0].embedding_model,
- },
- keyword_setting: {
- keyword_weight: allHighQualityVectorSearch
- ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword
- : allHighQualityFullTextSearch
- ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword
- : DEFAULT_WEIGHTED_SCORE.other.keyword,
- },
- }
- }
- return result
- }
|