datasets.ts 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. import type { DataSourceNotionPage } from './common'
  2. import type { AppMode, RetrievalConfig } from '@/types/app'
  3. export enum DataSourceType {
  4. FILE = 'upload_file',
  5. NOTION = 'notion_import',
  6. WEB = 'web_import',
  7. }
  8. export type DataSet = {
  9. id: string
  10. name: string
  11. icon: string
  12. icon_background: string
  13. description: string
  14. permission: 'only_me' | 'all_team_members'
  15. data_source_type: DataSourceType
  16. indexing_technique: 'high_quality' | 'economy'
  17. created_by: string
  18. updated_by: string
  19. updated_at: number
  20. app_count: number
  21. document_count: number
  22. word_count: number
  23. embedding_model: string
  24. embedding_model_provider: string
  25. embedding_available: boolean
  26. retrieval_model_dict: RetrievalConfig
  27. retrieval_model: RetrievalConfig
  28. }
  29. export type CustomFile = File & {
  30. id?: string
  31. extension?: string
  32. mime_type?: string
  33. created_by?: string
  34. created_at?: number
  35. }
  36. export type FileItem = {
  37. fileID: string
  38. file: CustomFile
  39. progress: number
  40. }
  41. export type DataSetListResponse = {
  42. data: DataSet[]
  43. has_more: boolean
  44. limit: number
  45. page: number
  46. total: number
  47. }
  48. export type QA = {
  49. question: string
  50. answer: string
  51. }
  52. export type IndexingEstimateResponse = {
  53. tokens: number
  54. total_price: number
  55. currency: string
  56. total_segments: number
  57. preview: string[]
  58. qa_preview?: QA[]
  59. }
  60. export type FileIndexingEstimateResponse = {
  61. total_nodes: number
  62. } & IndexingEstimateResponse
  63. export type IndexingStatusResponse = {
  64. id: string
  65. indexing_status: DocumentIndexingStatus
  66. processing_started_at: number
  67. parsing_completed_at: number
  68. cleaning_completed_at: number
  69. splitting_completed_at: number
  70. completed_at: any
  71. paused_at: any
  72. error: any
  73. stopped_at: any
  74. completed_segments: number
  75. total_segments: number
  76. }
  77. export type IndexingStatusBatchResponse = {
  78. data: IndexingStatusResponse[]
  79. }
  80. export type ProcessMode = 'automatic' | 'custom'
  81. export type ProcessRuleResponse = {
  82. mode: ProcessMode
  83. rules: Rules
  84. }
  85. export type Rules = {
  86. pre_processing_rules: PreProcessingRule[]
  87. segmentation: Segmentation
  88. }
  89. export type PreProcessingRule = {
  90. id: string
  91. enabled: boolean
  92. }
  93. export type Segmentation = {
  94. separator: string
  95. max_tokens: number
  96. chunk_overlap: number
  97. }
  98. export const DocumentIndexingStatusList = [
  99. 'waiting',
  100. 'parsing',
  101. 'cleaning',
  102. 'splitting',
  103. 'indexing',
  104. 'paused',
  105. 'error',
  106. 'completed',
  107. ] as const
  108. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  109. export const DisplayStatusList = [
  110. 'queuing',
  111. 'indexing',
  112. 'paused',
  113. 'error',
  114. 'available',
  115. 'enabled',
  116. 'disabled',
  117. 'archived',
  118. ] as const
  119. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  120. export type DataSourceInfo = {
  121. upload_file: {
  122. id: string
  123. name: string
  124. size: number
  125. mime_type: string
  126. created_at: number
  127. created_by: string
  128. extension: string
  129. }
  130. notion_page_icon?: string
  131. }
  132. export type InitialDocumentDetail = {
  133. id: string
  134. batch: string
  135. position: number
  136. dataset_id: string
  137. data_source_type: DataSourceType
  138. data_source_info: DataSourceInfo
  139. dataset_process_rule_id: string
  140. name: string
  141. created_from: 'api' | 'web'
  142. created_by: string
  143. created_at: number
  144. indexing_status: DocumentIndexingStatus
  145. display_status: DocumentDisplayStatus
  146. completed_segments?: number
  147. total_segments?: number
  148. doc_form: 'text_model' | 'qa_model'
  149. }
  150. export type SimpleDocumentDetail = InitialDocumentDetail & {
  151. enabled: boolean
  152. word_count: number
  153. error?: string | null
  154. archived: boolean
  155. updated_at: number
  156. hit_count: number
  157. dataset_process_rule_id?: string
  158. }
  159. export type DocumentListResponse = {
  160. data: SimpleDocumentDetail[]
  161. has_more: boolean
  162. total: number
  163. page: number
  164. limit: number
  165. }
  166. export type DocumentReq = {
  167. original_document_id?: string
  168. indexing_technique?: string
  169. doc_form: 'text_model' | 'qa_model'
  170. doc_language: string
  171. process_rule: ProcessRule
  172. }
  173. export type CreateDocumentReq = DocumentReq & {
  174. data_source: DataSource
  175. retrieval_model: RetrievalConfig
  176. }
  177. export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
  178. dataset_id: string
  179. }
  180. export type DataSource = {
  181. type: DataSourceType
  182. info_list: {
  183. data_source_type: DataSourceType
  184. notion_info_list?: NotionInfo[]
  185. file_info_list?: {
  186. file_ids: string[]
  187. }
  188. }
  189. }
  190. export type NotionInfo = {
  191. workspace_id: string
  192. pages: DataSourceNotionPage[]
  193. }
  194. export type NotionPage = {
  195. page_id: string
  196. type: string
  197. }
  198. export type ProcessRule = {
  199. mode: string
  200. rules: Rules
  201. }
  202. export type createDocumentResponse = {
  203. dataset?: DataSet
  204. batch: string
  205. documents: InitialDocumentDetail[]
  206. }
  207. export type FullDocumentDetail = SimpleDocumentDetail & {
  208. batch: string
  209. created_api_request_id: string
  210. processing_started_at: number
  211. parsing_completed_at: number
  212. cleaning_completed_at: number
  213. splitting_completed_at: number
  214. tokens: number
  215. indexing_latency: number
  216. completed_at: number
  217. paused_by: string
  218. paused_at: number
  219. stopped_at: number
  220. indexing_status: string
  221. disabled_at: number
  222. disabled_by: string
  223. archived_reason: 'rule_modified' | 're_upload'
  224. archived_by: string
  225. archived_at: number
  226. doc_type?: DocType | null | 'others'
  227. doc_metadata?: DocMetadata | null
  228. segment_count: number
  229. [key: string]: any
  230. }
  231. export type DocMetadata = {
  232. title: string
  233. language: string
  234. author: string
  235. publisher: string
  236. publicationDate: string
  237. ISBN: string
  238. category: string
  239. [key: string]: string
  240. }
  241. export const CUSTOMIZABLE_DOC_TYPES = [
  242. 'book',
  243. 'web_page',
  244. 'paper',
  245. 'social_media_post',
  246. 'personal_document',
  247. 'business_document',
  248. 'im_chat_log',
  249. ] as const
  250. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  251. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  252. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  253. export type DocType = CustomizableDocType | FixedDocType
  254. export type DocumentDetailResponse = FullDocumentDetail
  255. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  256. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  257. export type SegmentsQuery = {
  258. last_id?: string
  259. limit: number
  260. // status?: SegmentStatus
  261. hit_count_gte?: number
  262. keyword?: string
  263. enabled?: boolean
  264. }
  265. export type SegmentDetailModel = {
  266. id: string
  267. position: number
  268. document_id: string
  269. content: string
  270. word_count: number
  271. tokens: number
  272. keywords: string[]
  273. index_node_id: string
  274. index_node_hash: string
  275. hit_count: number
  276. enabled: boolean
  277. disabled_at: number
  278. disabled_by: string
  279. status: SegmentStatus
  280. created_by: string
  281. created_at: number
  282. indexing_at: number
  283. completed_at: number
  284. error: string | null
  285. stopped_at: number
  286. answer?: string
  287. }
  288. export type SegmentsResponse = {
  289. data: SegmentDetailModel[]
  290. has_more: boolean
  291. limit: number
  292. total: number
  293. }
  294. export type HitTestingRecord = {
  295. id: string
  296. content: string
  297. source: 'app' | 'hit_testing' | 'plugin'
  298. source_app_id: string
  299. created_by_role: 'account' | 'end_user'
  300. created_by: string
  301. created_at: number
  302. }
  303. export type HitTesting = {
  304. segment: Segment
  305. score: number
  306. tsne_position: TsnePosition
  307. }
  308. export type Segment = {
  309. id: string
  310. document: Document
  311. content: string
  312. position: number
  313. word_count: number
  314. tokens: number
  315. keywords: string[]
  316. hit_count: number
  317. index_node_hash: string
  318. }
  319. export type Document = {
  320. id: string
  321. data_source_type: string
  322. name: string
  323. doc_type: DocType
  324. }
  325. export type HitTestingRecordsResponse = {
  326. data: HitTestingRecord[]
  327. has_more: boolean
  328. limit: number
  329. total: number
  330. page: number
  331. }
  332. export type TsnePosition = {
  333. x: number
  334. y: number
  335. }
  336. export type HitTestingResponse = {
  337. query: {
  338. content: string
  339. tsne_position: TsnePosition
  340. }
  341. records: Array<HitTesting>
  342. }
  343. export type RelatedApp = {
  344. id: string
  345. name: string
  346. mode: AppMode
  347. icon: string
  348. icon_background: string
  349. }
  350. export type RelatedAppResponse = {
  351. data: Array<RelatedApp>
  352. total: number
  353. }
  354. export type SegmentUpdator = {
  355. content: string
  356. answer?: string
  357. keywords?: string[]
  358. }
  359. export enum DocForm {
  360. TEXT = 'text_model',
  361. QA = 'qa_model',
  362. }