| 
					
				 | 
			
			
				@@ -15,9 +15,10 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from werkzeug.datastructures import FileStorage 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from werkzeug.exceptions import NotFound 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                      'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'doc', 'csv'] + IMAGE_EXTENSIONS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                      'docx', 'doc', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 PREVIEW_WORDS_LIMIT = 3000 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -27,13 +28,7 @@ class FileService: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def upload_file(file: FileStorage, user: Union[Account, EndUser], only_image: bool = False) -> UploadFile: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         extension = file.filename.split('.')[-1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         etl_type = current_app.config['ETL_TYPE'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if etl_type == 'Unstructured': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                  'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if extension.lower() not in allowed_extensions: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             raise UnsupportedFileTypeError() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         elif only_image and extension.lower() not in IMAGE_EXTENSIONS: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -133,13 +128,7 @@ class FileService: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # extract text from file 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         extension = upload_file.extension 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         etl_type = current_app.config['ETL_TYPE'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if etl_type == 'Unstructured': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                  'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            allowed_extensions = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                  'jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if extension.lower() not in allowed_extensions: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             raise UnsupportedFileTypeError() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |