datasets.ts 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. import { AppMode } from './app'
  2. export type DataSet = {
  3. id: string
  4. name: string
  5. icon: string
  6. icon_background: string
  7. description: string
  8. permission: 'only_me' | 'all_team_members'
  9. data_source_type: 'upload_file'
  10. indexing_technique: 'high_quality' | 'economy'
  11. created_by: string
  12. updated_by: string
  13. updated_at: number
  14. app_count: number
  15. document_count: number
  16. word_count: number
  17. }
  18. export type File = {
  19. id: string
  20. name: string
  21. size: number
  22. extension: string
  23. mime_type: string
  24. created_by: string
  25. created_at: number
  26. }
  27. export type DataSetListResponse = {
  28. data: DataSet[]
  29. has_more: boolean
  30. limit: number
  31. page: number
  32. total: number
  33. }
  34. export type IndexingEstimateResponse = {
  35. tokens: number
  36. total_price: number
  37. currency: string
  38. total_segments: number
  39. preview: string[]
  40. }
  41. export interface FileIndexingEstimateResponse extends IndexingEstimateResponse {
  42. total_nodes: number
  43. }
  44. export type IndexingStatusResponse = {
  45. id: string
  46. indexing_status: DocumentIndexingStatus
  47. processing_started_at: number
  48. parsing_completed_at: number
  49. cleaning_completed_at: number
  50. splitting_completed_at: number
  51. completed_at: any
  52. paused_at: any
  53. error: any
  54. stopped_at: any
  55. completed_segments: number
  56. total_segments: number
  57. }
  58. export type ProcessMode = 'automatic' | 'custom'
  59. export type ProcessRuleResponse = {
  60. mode: ProcessMode
  61. rules: Rules
  62. }
  63. export type Rules = {
  64. pre_processing_rules: PreProcessingRule[]
  65. segmentation: Segmentation
  66. }
  67. export type PreProcessingRule = {
  68. id: string
  69. enabled: boolean
  70. }
  71. export type Segmentation = {
  72. separator: string
  73. max_tokens: number
  74. }
  75. export const DocumentIndexingStatusList = [
  76. 'waiting',
  77. 'parsing',
  78. 'cleaning',
  79. 'splitting',
  80. 'indexing',
  81. 'paused',
  82. 'error',
  83. 'completed',
  84. ] as const
  85. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  86. export const DisplayStatusList = [
  87. "queuing",
  88. "indexing",
  89. "paused",
  90. "error",
  91. "available",
  92. "enabled",
  93. "disabled",
  94. "archived",
  95. ] as const;
  96. export type DocumentDisplayStatus = typeof DisplayStatusList[number];
  97. export type DataSourceInfo = {
  98. upload_file: {
  99. id: string
  100. name: string
  101. size: number
  102. mime_type: string
  103. created_at: number
  104. created_by: string
  105. extension: string
  106. }
  107. }
  108. export type InitialDocumentDetail = {
  109. id: string
  110. position: number
  111. dataset_id: string
  112. data_source_type: 'upload_file'
  113. data_source_info: DataSourceInfo
  114. dataset_process_rule_id: string
  115. name: string
  116. created_from: 'api' | 'web'
  117. created_by: string
  118. created_at: number
  119. indexing_status: DocumentIndexingStatus
  120. display_status: DocumentDisplayStatus
  121. }
  122. export type SimpleDocumentDetail = InitialDocumentDetail & {
  123. enabled: boolean
  124. word_count: number
  125. error?: string | null
  126. archived: boolean
  127. updated_at: number
  128. hit_count: number
  129. dataset_process_rule_id?: string
  130. }
  131. export type DocumentListResponse = {
  132. data: SimpleDocumentDetail[]
  133. has_more: boolean
  134. total: number
  135. page: number
  136. limit: number
  137. }
  138. export type CreateDocumentReq = {
  139. original_document_id?: string
  140. indexing_technique?: string;
  141. name: string
  142. data_source: DataSource
  143. process_rule: ProcessRule
  144. }
  145. export type DataSource = {
  146. type: string
  147. info: string // upload_file_id
  148. name: string
  149. }
  150. export type ProcessRule = {
  151. mode: string
  152. rules: Rules
  153. }
  154. export type createDocumentResponse = {
  155. dataset?: DataSet
  156. document: InitialDocumentDetail
  157. }
  158. export type FullDocumentDetail = SimpleDocumentDetail & {
  159. batch: string
  160. created_api_request_id: string
  161. processing_started_at: number
  162. parsing_completed_at: number
  163. cleaning_completed_at: number
  164. splitting_completed_at: number
  165. tokens: number
  166. indexing_latency: number
  167. completed_at: number
  168. paused_by: string
  169. paused_at: number
  170. stopped_at: number
  171. indexing_status: string
  172. disabled_at: number
  173. disabled_by: string
  174. archived_reason: 'rule_modified' | 're_upload'
  175. archived_by: string
  176. archived_at: number
  177. doc_type?: DocType | null
  178. doc_metadata?: DocMetadata | null
  179. segment_count: number
  180. [key: string]: any
  181. }
  182. export type DocMetadata = {
  183. title: string
  184. language: string
  185. author: string
  186. publisher: string
  187. publicationDate: string
  188. ISBN: string
  189. category: string
  190. [key: string]: string
  191. }
  192. export const CUSTOMIZABLE_DOC_TYPES = [
  193. "book",
  194. "web_page",
  195. "paper",
  196. "social_media_post",
  197. "personal_document",
  198. "business_document",
  199. "im_chat_log",
  200. ] as const;
  201. export const FIXED_DOC_TYPES = ["synced_from_github", "synced_from_notion", "wikipedia_entry"] as const;
  202. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number];
  203. export type FixedDocType = typeof FIXED_DOC_TYPES[number];
  204. export type DocType = CustomizableDocType | FixedDocType;
  205. export type DocumentDetailResponse = FullDocumentDetail
  206. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  207. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  208. export type SegmentsQuery = {
  209. last_id?: string
  210. limit: number
  211. // status?: SegmentStatus
  212. hit_count_gte?: number
  213. keyword?: string
  214. enabled?: boolean
  215. }
  216. export type SegmentDetailModel = {
  217. id: string
  218. position: number
  219. document_id: string
  220. content: string
  221. word_count: number
  222. tokens: number
  223. keywords: string[]
  224. index_node_id: string
  225. index_node_hash: string
  226. hit_count: number
  227. enabled: boolean
  228. disabled_at: number
  229. disabled_by: string
  230. status: SegmentStatus
  231. created_by: string
  232. created_at: number
  233. indexing_at: number
  234. completed_at: number
  235. error: string | null
  236. stopped_at: number
  237. }
  238. export type SegmentsResponse = {
  239. data: SegmentDetailModel[]
  240. has_more: boolean
  241. limit: number
  242. total: number
  243. }
  244. export type HitTestingRecord = {
  245. id: string
  246. content: string
  247. source: 'app' | 'hit_testing' | 'plugin'
  248. source_app_id: string
  249. created_by_role: 'account' | 'end_user'
  250. created_by: string
  251. created_at: number
  252. }
  253. export type HitTesting = {
  254. segment: Segment
  255. score: number
  256. tsne_position: TsnePosition
  257. }
  258. export type Segment = {
  259. id: string
  260. document: Document
  261. content: string
  262. position: number
  263. word_count: number
  264. tokens: number
  265. keywords: string[]
  266. hit_count: number
  267. index_node_hash: string
  268. }
  269. export type Document = {
  270. id: string
  271. data_source_type: string
  272. name: string
  273. doc_type: DocType
  274. }
  275. export type HitTestingRecordsResponse = {
  276. data: HitTestingRecord[]
  277. has_more: boolean
  278. limit: number
  279. total: number
  280. page: number
  281. }
  282. export type TsnePosition = {
  283. x: number
  284. y: number
  285. }
  286. export type HitTestingResponse = {
  287. query: {
  288. content: string
  289. tsne_position: TsnePosition
  290. }
  291. records: Array<HitTesting>
  292. }
  293. export type RelatedApp = {
  294. id: string
  295. name: string
  296. mode: AppMode
  297. icon: string
  298. icon_background: string
  299. }
  300. export type RelatedAppResponse = {
  301. data: Array<RelatedApp>
  302. total: number
  303. }