document.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import datetime
  2. import uuid
  3. from flask import current_app
  4. from flask_restful import reqparse
  5. from werkzeug.exceptions import NotFound
  6. import services.dataset_service
  7. from controllers.service_api import api
  8. from controllers.service_api.app.error import ProviderNotInitializeError
  9. from controllers.service_api.dataset.error import ArchivedDocumentImmutableError, DocumentIndexingError, \
  10. DatasetNotInitedError
  11. from controllers.service_api.wraps import DatasetApiResource
  12. from core.llm.error import ProviderTokenNotInitError
  13. from extensions.ext_database import db
  14. from extensions.ext_storage import storage
  15. from models.model import UploadFile
  16. from services.dataset_service import DocumentService
  17. class DocumentListApi(DatasetApiResource):
  18. """Resource for documents."""
  19. def post(self, dataset):
  20. """Create document."""
  21. parser = reqparse.RequestParser()
  22. parser.add_argument('name', type=str, required=True, nullable=False, location='json')
  23. parser.add_argument('text', type=str, required=True, nullable=False, location='json')
  24. parser.add_argument('doc_type', type=str, location='json')
  25. parser.add_argument('doc_metadata', type=dict, location='json')
  26. args = parser.parse_args()
  27. if not dataset.indexing_technique:
  28. raise DatasetNotInitedError("Dataset indexing technique must be set.")
  29. doc_type = args.get('doc_type')
  30. doc_metadata = args.get('doc_metadata')
  31. if doc_type and doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
  32. raise ValueError('Invalid doc_type.')
  33. # user uuid as file name
  34. file_uuid = str(uuid.uuid4())
  35. file_key = 'upload_files/' + dataset.tenant_id + '/' + file_uuid + '.txt'
  36. # save file to storage
  37. storage.save(file_key, args.get('text'))
  38. # save file to db
  39. config = current_app.config
  40. upload_file = UploadFile(
  41. tenant_id=dataset.tenant_id,
  42. storage_type=config['STORAGE_TYPE'],
  43. key=file_key,
  44. name=args.get('name') + '.txt',
  45. size=len(args.get('text')),
  46. extension='txt',
  47. mime_type='text/plain',
  48. created_by=dataset.created_by,
  49. created_at=datetime.datetime.utcnow(),
  50. used=True,
  51. used_by=dataset.created_by,
  52. used_at=datetime.datetime.utcnow()
  53. )
  54. db.session.add(upload_file)
  55. db.session.commit()
  56. document_data = {
  57. 'data_source': {
  58. 'type': 'upload_file',
  59. 'info': [
  60. {
  61. 'upload_file_id': upload_file.id
  62. }
  63. ]
  64. }
  65. }
  66. try:
  67. documents, batch = DocumentService.save_document_with_dataset_id(
  68. dataset=dataset,
  69. document_data=document_data,
  70. account=dataset.created_by_account,
  71. dataset_process_rule=dataset.latest_process_rule,
  72. created_from='api'
  73. )
  74. except ProviderTokenNotInitError as ex:
  75. raise ProviderNotInitializeError(ex.description)
  76. document = documents[0]
  77. if doc_type and doc_metadata:
  78. metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
  79. document.doc_metadata = {}
  80. for key, value_type in metadata_schema.items():
  81. value = doc_metadata.get(key)
  82. if value is not None and isinstance(value, value_type):
  83. document.doc_metadata[key] = value
  84. document.doc_type = doc_type
  85. document.updated_at = datetime.datetime.utcnow()
  86. db.session.commit()
  87. return {'id': document.id}
  88. class DocumentApi(DatasetApiResource):
  89. def delete(self, dataset, document_id):
  90. """Delete document."""
  91. document_id = str(document_id)
  92. document = DocumentService.get_document(dataset.id, document_id)
  93. # 404 if document not found
  94. if document is None:
  95. raise NotFound("Document Not Exists.")
  96. # 403 if document is archived
  97. if DocumentService.check_archived(document):
  98. raise ArchivedDocumentImmutableError()
  99. try:
  100. # delete document
  101. DocumentService.delete_document(document)
  102. except services.errors.document.DocumentIndexingError:
  103. raise DocumentIndexingError('Cannot delete document during indexing.')
  104. return {'result': 'success'}, 204
  105. api.add_resource(DocumentListApi, '/documents')
  106. api.add_resource(DocumentApi, '/documents/<uuid:document_id>')