Browse Source

Fix/upload limit (#2521)

Co-authored-by: jyong <jyong@dify.ai>
Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
Jyong 1 year ago
parent
commit
97fe817186

+ 2 - 0
api/.env.example

@@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL=
 
 
 SSRF_PROXY_HTTP_URL=
 SSRF_PROXY_HTTP_URL=
 SSRF_PROXY_HTTPS_URL=
 SSRF_PROXY_HTTPS_URL=
+
+BATCH_UPLOAD_LIMIT=10

+ 3 - 0
api/config.py

@@ -56,6 +56,7 @@ DEFAULTS = {
     'BILLING_ENABLED': 'False',
     'BILLING_ENABLED': 'False',
     'CAN_REPLACE_LOGO': 'False',
     'CAN_REPLACE_LOGO': 'False',
     'ETL_TYPE': 'dify',
     'ETL_TYPE': 'dify',
+    'BATCH_UPLOAD_LIMIT': 20
 }
 }
 
 
 
 
@@ -285,6 +286,8 @@ class Config:
         self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
         self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
         self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
         self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
 
 
+        self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')
+
 
 
 class CloudEditionConfig(Config):
 class CloudEditionConfig(Config):
 
 

+ 17 - 0
api/core/indexing_runner.py

@@ -32,6 +32,7 @@ from models.dataset import Dataset, DatasetProcessRule, DocumentSegment
 from models.dataset import Document as DatasetDocument
 from models.dataset import Document as DatasetDocument
 from models.model import UploadFile
 from models.model import UploadFile
 from models.source import DataSourceBinding
 from models.source import DataSourceBinding
+from services.feature_service import FeatureService
 
 
 
 
 class IndexingRunner:
 class IndexingRunner:
@@ -244,6 +245,14 @@ class IndexingRunner:
         """
         """
         Estimate the indexing for the document.
         Estimate the indexing for the document.
         """
         """
+        # check document limit
+        features = FeatureService.get_features(tenant_id)
+        if features.billing.enabled:
+            count = len(file_details)
+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
+            if count > batch_upload_limit:
+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
+
         embedding_model_instance = None
         embedding_model_instance = None
         if dataset_id:
         if dataset_id:
             dataset = Dataset.query.filter_by(
             dataset = Dataset.query.filter_by(
@@ -361,6 +370,14 @@ class IndexingRunner:
         """
         """
         Estimate the indexing for the document.
         Estimate the indexing for the document.
         """
         """
+        # check document limit
+        features = FeatureService.get_features(tenant_id)
+        if features.billing.enabled:
+            count = len(notion_info_list)
+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
+            if count > batch_upload_limit:
+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
+
         embedding_model_instance = None
         embedding_model_instance = None
         if dataset_id:
         if dataset_id:
             dataset = Dataset.query.filter_by(
             dataset = Dataset.query.filter_by(

+ 7 - 0
api/services/annotation_service.py

@@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound
 from extensions.ext_database import db
 from extensions.ext_database import db
 from extensions.ext_redis import redis_client
 from extensions.ext_redis import redis_client
 from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
 from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
+from services.feature_service import FeatureService
 from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
 from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
 from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
 from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
 from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
 from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
@@ -284,6 +285,12 @@ class AppAnnotationService:
                 result.append(content)
                 result.append(content)
             if len(result) == 0:
             if len(result) == 0:
                 raise ValueError("The CSV file is empty.")
                 raise ValueError("The CSV file is empty.")
+            # check annotation limit
+            features = FeatureService.get_features(current_user.current_tenant_id)
+            if features.billing.enabled:
+                annotation_quota_limit = features.annotation_quota_limit
+                if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size:
+                    raise ValueError("The number of annotations exceeds the limit of your subscription.")
             # async job
             # async job
             job_id = str(uuid.uuid4())
             job_id = str(uuid.uuid4())
             indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
             indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))

+ 22 - 10
api/services/dataset_service.py

@@ -36,6 +36,7 @@ from services.errors.account import NoPermissionError
 from services.errors.dataset import DatasetNameDuplicateError
 from services.errors.dataset import DatasetNameDuplicateError
 from services.errors.document import DocumentIndexingError
 from services.errors.document import DocumentIndexingError
 from services.errors.file import FileNotExistsError
 from services.errors.file import FileNotExistsError
+from services.feature_service import FeatureService
 from services.vector_service import VectorService
 from services.vector_service import VectorService
 from tasks.clean_notion_document_task import clean_notion_document_task
 from tasks.clean_notion_document_task import clean_notion_document_task
 from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
 from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
@@ -452,7 +453,9 @@ class DocumentService:
                                       created_from: str = 'web'):
                                       created_from: str = 'web'):
 
 
         # check document limit
         # check document limit
-        if current_app.config['EDITION'] == 'CLOUD':
+        features = FeatureService.get_features(current_user.current_tenant_id)
+
+        if features.billing.enabled:
             if 'original_document_id' not in document_data or not document_data['original_document_id']:
             if 'original_document_id' not in document_data or not document_data['original_document_id']:
                 count = 0
                 count = 0
                 if document_data["data_source"]["type"] == "upload_file":
                 if document_data["data_source"]["type"] == "upload_file":
@@ -462,6 +465,9 @@ class DocumentService:
                     notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
                     notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
                     for notion_info in notion_info_list:
                     for notion_info in notion_info_list:
                         count = count + len(notion_info['pages'])
                         count = count + len(notion_info['pages'])
+                batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
+                if count > batch_upload_limit:
+                    raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
         # if dataset is empty, update dataset data_source_type
         # if dataset is empty, update dataset data_source_type
         if not dataset.data_source_type:
         if not dataset.data_source_type:
             dataset.data_source_type = document_data["data_source"]["type"]
             dataset.data_source_type = document_data["data_source"]["type"]
@@ -741,14 +747,20 @@ class DocumentService:
 
 
     @staticmethod
     @staticmethod
     def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
     def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
-        count = 0
+        features = FeatureService.get_features(current_user.current_tenant_id)
-        if document_data["data_source"]["type"] == "upload_file":
+
-            upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
+        if features.billing.enabled:
-            count = len(upload_file_list)
+            count = 0
-        elif document_data["data_source"]["type"] == "notion_import":
+            if document_data["data_source"]["type"] == "upload_file":
-            notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
+                upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
-            for notion_info in notion_info_list:
+                count = len(upload_file_list)
-                count = count + len(notion_info['pages'])
+            elif document_data["data_source"]["type"] == "notion_import":
+                notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
+                for notion_info in notion_info_list:
+                    count = count + len(notion_info['pages'])
+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
+            if count > batch_upload_limit:
+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
 
 
         embedding_model = None
         embedding_model = None
         dataset_collection_binding_id = None
         dataset_collection_binding_id = None
@@ -1139,7 +1151,7 @@ class SegmentService:
                     segment.answer = args['answer']
                     segment.answer = args['answer']
                 if 'keywords' in args and args['keywords']:
                 if 'keywords' in args and args['keywords']:
                     segment.keywords = args['keywords']
                     segment.keywords = args['keywords']
-                if'enabled' in args and args['enabled'] is not None:
+                if 'enabled' in args and args['enabled'] is not None:
                     segment.enabled = args['enabled']
                     segment.enabled = args['enabled']
                 db.session.add(segment)
                 db.session.add(segment)
                 db.session.commit()
                 db.session.commit()

+ 2 - 2
api/services/file_service.py

@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
 IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
 IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
 IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
 
 
-ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
+ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
 UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
 UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
-                                   'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
+                                   'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
 PREVIEW_WORDS_LIMIT = 3000
 PREVIEW_WORDS_LIMIT = 3000
 
 
 
 

+ 32 - 1
api/tasks/document_indexing_task.py

@@ -4,10 +4,12 @@ import time
 
 
 import click
 import click
 from celery import shared_task
 from celery import shared_task
+from flask import current_app
 
 
 from core.indexing_runner import DocumentIsPausedException, IndexingRunner
 from core.indexing_runner import DocumentIsPausedException, IndexingRunner
 from extensions.ext_database import db
 from extensions.ext_database import db
-from models.dataset import Document
+from models.dataset import Dataset, Document
+from services.feature_service import FeatureService
 
 
 
 
 @shared_task(queue='dataset')
 @shared_task(queue='dataset')
@@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list):
     """
     """
     documents = []
     documents = []
     start_at = time.perf_counter()
     start_at = time.perf_counter()
+
+    dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
+
+    # check document limit
+    features = FeatureService.get_features(dataset.tenant_id)
+    try:
+        if features.billing.enabled:
+            vector_space = features.vector_space
+            count = len(document_ids)
+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
+            if count > batch_upload_limit:
+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
+            if 0 < vector_space.limit <= vector_space.size:
+                raise ValueError("Your total number of documents plus the number of uploads have over the limit of "
+                                 "your subscription.")
+    except Exception as e:
+        for document_id in document_ids:
+            document = db.session.query(Document).filter(
+                Document.id == document_id,
+                Document.dataset_id == dataset_id
+            ).first()
+            if document:
+                document.indexing_status = 'error'
+                document.error = str(e)
+                document.stopped_at = datetime.datetime.utcnow()
+                db.session.add(document)
+        db.session.commit()
+        return
+
     for document_id in document_ids:
     for document_id in document_ids:
         logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))
         logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))
 
 

+ 8 - 1
web/app/components/datasets/create/file-uploader/index.tsx

@@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets'
 import I18n from '@/context/i18n'
 import I18n from '@/context/i18n'
 import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'
 import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'
 
 
+const FILES_NUMBER_LIMIT = 20
+
 type IFileUploaderProps = {
 type IFileUploaderProps = {
   fileList: FileItem[]
   fileList: FileItem[]
   titleClassName?: string
   titleClassName?: string
@@ -176,6 +178,11 @@ const FileUploader = ({
     if (!files.length)
     if (!files.length)
       return false
       return false
 
 
+    if (files.length + fileList.length > FILES_NUMBER_LIMIT) {
+      notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) })
+      return false
+    }
+
     const preparedFiles = files.map((file, index) => ({
     const preparedFiles = files.map((file, index) => ({
       fileID: `file${index}-${Date.now()}`,
       fileID: `file${index}-${Date.now()}`,
       file,
       file,
@@ -185,7 +192,7 @@ const FileUploader = ({
     prepareFileList(newFiles)
     prepareFileList(newFiles)
     fileListRef.current = newFiles
     fileListRef.current = newFiles
     uploadMultipleFiles(preparedFiles)
     uploadMultipleFiles(preparedFiles)
-  }, [prepareFileList, uploadMultipleFiles])
+  }, [prepareFileList, uploadMultipleFiles, notify, t, fileList])
 
 
   const handleDragEnter = (e: DragEvent) => {
   const handleDragEnter = (e: DragEvent) => {
     e.preventDefault()
     e.preventDefault()

+ 1 - 0
web/i18n/lang/dataset-creation.en.ts

@@ -28,6 +28,7 @@ const translation = {
         typeError: 'File type not supported',
         typeError: 'File type not supported',
         size: 'File too large. Maximum is {{size}}MB',
         size: 'File too large. Maximum is {{size}}MB',
         count: 'Multiple files not supported',
         count: 'Multiple files not supported',
+        filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.',
       },
       },
       cancel: 'Cancel',
       cancel: 'Cancel',
       change: 'Change',
       change: 'Change',

+ 1 - 0
web/i18n/lang/dataset-creation.pt.ts

@@ -28,6 +28,7 @@ const translation = {
         typeError: 'Tipo de arquivo não suportado',
         typeError: 'Tipo de arquivo não suportado',
         size: 'Arquivo muito grande. Máximo é {{size}}MB',
         size: 'Arquivo muito grande. Máximo é {{size}}MB',
         count: 'Vários arquivos não suportados',
         count: 'Vários arquivos não suportados',
+        filesNumber: 'Limite de upload em massa {{filesNumber}}.',
       },
       },
       cancel: 'Cancelar',
       cancel: 'Cancelar',
       change: 'Alterar',
       change: 'Alterar',

+ 1 - 0
web/i18n/lang/dataset-creation.uk.ts

@@ -28,6 +28,7 @@ const translation = {
         typeError: 'Тип файлу не підтримується',
         typeError: 'Тип файлу не підтримується',
         size: 'Файл занадто великий. Максимум – {{size}} МБ',
         size: 'Файл занадто великий. Максимум – {{size}} МБ',
         count: 'Не підтримується завантаження кількох файлів',
         count: 'Не підтримується завантаження кількох файлів',
+        filesNumber: 'Ліміт масового завантаження {{filesNumber}}.',
       },
       },
       cancel: 'Скасувати',
       cancel: 'Скасувати',
       change: 'Змінити',
       change: 'Змінити',

+ 1 - 0
web/i18n/lang/dataset-creation.zh.ts

@@ -28,6 +28,7 @@ const translation = {
         typeError: '文件类型不支持',
         typeError: '文件类型不支持',
         size: '文件太大了,不能超过 {{size}}MB',
         size: '文件太大了,不能超过 {{size}}MB',
         count: '暂不支持多个文件',
         count: '暂不支持多个文件',
+        filesNumber: '批量上传限制 {{filesNumber}}。',
       },
       },
       cancel: '取消',
       cancel: '取消',
       change: '更改文件',
       change: '更改文件',