| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 | import services.dataset_servicefrom controllers.service_api import apifrom controllers.service_api.dataset.error import DatasetNameDuplicateErrorfrom controllers.service_api.wraps import DatasetApiResourcefrom core.model_runtime.entities.model_entities import ModelTypefrom core.provider_manager import ProviderManagerfrom fields.dataset_fields import dataset_detail_fieldsfrom flask import requestfrom flask_restful import marshal, reqparsefrom libs.login import current_userfrom services.dataset_service import DatasetServicedef _validate_name(name):    if not name or len(name) < 1 or len(name) > 40:        raise ValueError('Name must be between 1 to 40 characters.')    return nameclass DatasetApi(DatasetApiResource):    """Resource for get datasets."""    def get(self, tenant_id):        page = request.args.get('page', default=1, type=int)        limit = request.args.get('limit', default=20, type=int)        provider = request.args.get('provider', default="vendor")        datasets, total = DatasetService.get_datasets(page, limit, provider,                                                      tenant_id, current_user)        # check embedding setting        provider_manager = ProviderManager()        configurations = provider_manager.get_configurations(            tenant_id=current_user.current_tenant_id        )        embedding_models = configurations.get_models(            model_type=ModelType.TEXT_EMBEDDING,            only_active=True        )        model_names = []        for embedding_model in embedding_models:            model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}")        data = marshal(datasets, dataset_detail_fields)        for item in data:            if item['indexing_technique'] == 'high_quality':                item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"                if item_model in model_names:                    item['embedding_available'] = True                else:                    item['embedding_available'] = False            else:                item['embedding_available'] = True        response = {            'data': data,            'has_more': len(datasets) == limit,            'limit': limit,            'total': total,            'page': page        }        return response, 200    """Resource for datasets."""    def post(self, tenant_id):        parser = reqparse.RequestParser()        parser.add_argument('name', nullable=False, required=True,                            help='type is required. Name must be between 1 to 40 characters.',                            type=_validate_name)        parser.add_argument('indexing_technique', type=str, location='json',                            choices=('high_quality', 'economy'),                            help='Invalid indexing technique.')        args = parser.parse_args()        try:            dataset = DatasetService.create_empty_dataset(                tenant_id=tenant_id,                name=args['name'],                indexing_technique=args['indexing_technique'],                account=current_user            )        except services.errors.dataset.DatasetNameDuplicateError:            raise DatasetNameDuplicateError()        return marshal(dataset, dataset_detail_fields), 200api.add_resource(DatasetApi, '/datasets')
 |