template.zh.mdx 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. import { CodeGroup } from '@/app/components/develop/code.tsx'
  2. import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
  3. # 数据集 API
  4. <br/>
  5. <br/>
  6. <Heading
  7. url='/datasets'
  8. method='POST'
  9. title='创建空数据集'
  10. name='#create_empty_dataset'
  11. />
  12. <Row>
  13. <Col>
  14. ### Request Body
  15. <Properties>
  16. <Property name='name' type='string' key='name'>
  17. 数据集名称
  18. </Property>
  19. </Properties>
  20. </Col>
  21. <Col sticky>
  22. <CodeGroup
  23. title="Request"
  24. tag="POST"
  25. label="/datasets"
  26. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`}
  27. >
  28. ```bash {{ title: 'cURL' }}
  29. curl --location --request POST '${props.apiBaseUrl}/datasets' \
  30. --header 'Authorization: Bearer {api_key}' \
  31. --header 'Content-Type: application/json' \
  32. --data-raw '{
  33. "name": "name"
  34. }'
  35. ```
  36. </CodeGroup>
  37. <CodeGroup title="Response">
  38. ```json {{ title: 'Response' }}
  39. {
  40. "id": "",
  41. "name": "name",
  42. "description": null,
  43. "provider": "vendor",
  44. "permission": "only_me",
  45. "data_source_type": null,
  46. "indexing_technique": null,
  47. "app_count": 0,
  48. "document_count": 0,
  49. "word_count": 0,
  50. "created_by": "",
  51. "created_at": 1695636173,
  52. "updated_by": "",
  53. "updated_at": 1695636173,
  54. "embedding_model": null,
  55. "embedding_model_provider": null,
  56. "embedding_available": null
  57. }
  58. ```
  59. </CodeGroup>
  60. </Col>
  61. </Row>
  62. ---
  63. <Heading
  64. url='/datasets'
  65. method='GET'
  66. title='数据集列表'
  67. name='#dataset_list'
  68. />
  69. <Row>
  70. <Col>
  71. ### Query
  72. <Properties>
  73. <Property name='page' type='string' key='page'>
  74. 页码
  75. </Property>
  76. <Property name='limit' type='string' key='limit'>
  77. 返回条数,默认 20,范围 1-100
  78. </Property>
  79. </Properties>
  80. </Col>
  81. <Col sticky>
  82. <CodeGroup
  83. title="Request"
  84. tag="POST"
  85. label="/datasets"
  86. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  87. >
  88. ```bash {{ title: 'cURL' }}
  89. curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
  90. --header 'Authorization: Bearer {api_key}'
  91. ```
  92. </CodeGroup>
  93. <CodeGroup title="Response">
  94. ```json {{ title: 'Response' }}
  95. {
  96. "data": [
  97. {
  98. "id": "",
  99. "name": "数据集名称",
  100. "description": "描述信息",
  101. "permission": "only_me",
  102. "data_source_type": "upload_file",
  103. "indexing_technique": "",
  104. "app_count": 2,
  105. "document_count": 10,
  106. "word_count": 1200,
  107. "created_by": "",
  108. "created_at": "",
  109. "updated_by": "",
  110. "updated_at": ""
  111. },
  112. ...
  113. ],
  114. "has_more": true,
  115. "limit": 20,
  116. "total": 50,
  117. "page": 1
  118. }
  119. ```
  120. </CodeGroup>
  121. </Col>
  122. </Row>
  123. ---
  124. <Heading
  125. url='/datasets/{dataset_id}/document/create_by_text'
  126. method='POST'
  127. title='通过文本创建文档'
  128. name='#create_by_text'
  129. />
  130. <Row>
  131. <Col>
  132. 此接口基于已存在数据集,在此数据集的基础上通过文本创建新的文档
  133. ### Path
  134. <Properties>
  135. <Property name='dataset_id' type='string' key='dataset_id'>
  136. 数据集 ID
  137. </Property>
  138. </Properties>
  139. ### Request Body
  140. <Properties>
  141. <Property name='name' type='string' key='name'>
  142. 文档名称
  143. </Property>
  144. <Property name='text' type='string' key='text'>
  145. 文档内容
  146. </Property>
  147. <Property name='indexing_technique' type='string' key='indexing_technique'>
  148. 索引方式
  149. - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
  150. - <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
  151. </Property>
  152. <Property name='process_rule' type='object' key='process_rule'>
  153. 处理规则
  154. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  155. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  156. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  157. - <code>id</code> (string) 预处理规则的唯一标识符
  158. - 枚举:
  159. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  160. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  161. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  162. - <code>segmentation</code> (object) 分段规则
  163. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  164. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  165. </Property>
  166. </Properties>
  167. </Col>
  168. <Col sticky>
  169. <CodeGroup
  170. title="Request"
  171. tag="POST"
  172. label="/datasets/{dataset_id}/document/create_by_text"
  173. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
  174. >
  175. ```bash {{ title: 'cURL' }}
  176. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
  177. --header 'Authorization: Bearer {api_key}' \
  178. --header 'Content-Type: application/json' \
  179. --data-raw '{
  180. "name": "text",
  181. "text": "text",
  182. "indexing_technique": "high_quality",
  183. "process_rule": {
  184. "mode": "automatic"
  185. }
  186. }'
  187. ```
  188. </CodeGroup>
  189. <CodeGroup title="Response">
  190. ```json {{ title: 'Response' }}
  191. {
  192. "document": {
  193. "id": "",
  194. "position": 1,
  195. "data_source_type": "upload_file",
  196. "data_source_info": {
  197. "upload_file_id": ""
  198. },
  199. "dataset_process_rule_id": "",
  200. "name": "text.txt",
  201. "created_from": "api",
  202. "created_by": "",
  203. "created_at": 1695690280,
  204. "tokens": 0,
  205. "indexing_status": "waiting",
  206. "error": null,
  207. "enabled": true,
  208. "disabled_at": null,
  209. "disabled_by": null,
  210. "archived": false,
  211. "display_status": "queuing",
  212. "word_count": 0,
  213. "hit_count": 0,
  214. "doc_form": "text_model"
  215. },
  216. "batch": ""
  217. }
  218. ```
  219. </CodeGroup>
  220. </Col>
  221. </Row>
  222. ---
  223. <Heading
  224. url='/datasets/{dataset_id}/document/create_by_file'
  225. method='POST'
  226. title='通过文件创建文档 '
  227. name='#create_by_file'
  228. />
  229. <Row>
  230. <Col>
  231. 此接口基于已存在数据集,在此数据集的基础上通过文件创建新的文档
  232. ### Path
  233. <Properties>
  234. <Property name='dataset_id' type='string' key='dataset_id'>
  235. 数据集 ID
  236. </Property>
  237. </Properties>
  238. ### Request Body
  239. <Properties>
  240. <Property name='original_document_id' type='string' key='original_document_id'>
  241. 源文档 ID (选填)
  242. - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
  243. - 源文档不可为归档的文档
  244. - 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
  245. - 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
  246. </Property>
  247. <Property name='file' type='multipart/form-data' key='file'>
  248. 需要上传的文件。
  249. </Property>
  250. <Property name='indexing_technique' type='string' key='indexing_technique'>
  251. 索引方式
  252. - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
  253. - <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
  254. </Property>
  255. <Property name='process_rule' type='object' key='process_rule'>
  256. 处理规则
  257. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  258. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  259. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  260. - <code>id</code> (string) 预处理规则的唯一标识符
  261. - 枚举:
  262. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  263. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  264. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  265. - <code>segmentation</code> (object) 分段规则
  266. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  267. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  268. </Property>
  269. </Properties>
  270. </Col>
  271. <Col sticky>
  272. <CodeGroup
  273. title="Request"
  274. tag="POST"
  275. label="/datasets/{dataset_id}/document/create_by_file"
  276. targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  277. >
  278. ```bash {{ title: 'cURL' }}
  279. curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
  280. --header 'Authorization: Bearer {api_key}' \
  281. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  282. --form 'file=@"/path/to/file"'
  283. ```
  284. </CodeGroup>
  285. <CodeGroup title="Response">
  286. ```json {{ title: 'Response' }}
  287. {
  288. "document": {
  289. "id": "",
  290. "position": 1,
  291. "data_source_type": "upload_file",
  292. "data_source_info": {
  293. "upload_file_id": ""
  294. },
  295. "dataset_process_rule_id": "",
  296. "name": "Dify.txt",
  297. "created_from": "api",
  298. "created_by": "",
  299. "created_at": 1695308667,
  300. "tokens": 0,
  301. "indexing_status": "waiting",
  302. "error": null,
  303. "enabled": true,
  304. "disabled_at": null,
  305. "disabled_by": null,
  306. "archived": false,
  307. "display_status": "queuing",
  308. "word_count": 0,
  309. "hit_count": 0,
  310. "doc_form": "text_model"
  311. },
  312. "batch": ""
  313. }
  314. ```
  315. </CodeGroup>
  316. </Col>
  317. </Row>
  318. ---
  319. <Heading
  320. url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
  321. method='POST'
  322. title='通过文本更新文档 '
  323. name='#update_by_text'
  324. />
  325. <Row>
  326. <Col>
  327. 此接口基于已存在数据集,在此数据集的基础上通过文本更新文档
  328. ### Path
  329. <Properties>
  330. <Property name='dataset_id' type='string' key='dataset_id'>
  331. 数据集 ID
  332. </Property>
  333. <Property name='document_id' type='string' key='document_id'>
  334. 文档 ID
  335. </Property>
  336. </Properties>
  337. ### Request Body
  338. <Properties>
  339. <Property name='name' type='string' key='name'>
  340. 文档名称 (选填)
  341. </Property>
  342. <Property name='text' type='string' key='text'>
  343. 文档内容(选填)
  344. </Property>
  345. <Property name='process_rule' type='object' key='process_rule'>
  346. 处理规则(选填)
  347. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  348. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  349. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  350. - <code>id</code> (string) 预处理规则的唯一标识符
  351. - 枚举:
  352. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  353. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  354. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  355. - <code>segmentation</code> (object) 分段规则
  356. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  357. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  358. </Property>
  359. </Properties>
  360. </Col>
  361. <Col sticky>
  362. <CodeGroup
  363. title="Request"
  364. tag="POST"
  365. label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
  366. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
  367. >
  368. ```bash {{ title: 'cURL' }}
  369. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
  370. --header 'Authorization: Bearer {api_key}' \
  371. --header 'Content-Type: application/json' \
  372. --data-raw '{
  373. "name": "name",
  374. "text": "text"
  375. }'
  376. ```
  377. </CodeGroup>
  378. <CodeGroup title="Response">
  379. ```json {{ title: 'Response' }}
  380. {
  381. "document": {
  382. "id": "",
  383. "position": 1,
  384. "data_source_type": "upload_file",
  385. "data_source_info": {
  386. "upload_file_id": ""
  387. },
  388. "dataset_process_rule_id": "",
  389. "name": "name.txt",
  390. "created_from": "api",
  391. "created_by": "",
  392. "created_at": 1695308667,
  393. "tokens": 0,
  394. "indexing_status": "waiting",
  395. "error": null,
  396. "enabled": true,
  397. "disabled_at": null,
  398. "disabled_by": null,
  399. "archived": false,
  400. "display_status": "queuing",
  401. "word_count": 0,
  402. "hit_count": 0,
  403. "doc_form": "text_model"
  404. },
  405. "batch": ""
  406. }
  407. ```
  408. </CodeGroup>
  409. </Col>
  410. </Row>
  411. ---
  412. <Heading
  413. url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
  414. method='POST'
  415. title='通过文件更新文档 '
  416. name='#update_by_file'
  417. />
  418. <Row>
  419. <Col>
  420. 此接口基于已存在数据集,在此数据集的基础上通过文件更新文档的操作。
  421. ### Path
  422. <Properties>
  423. <Property name='dataset_id' type='string' key='dataset_id'>
  424. 数据集 ID
  425. </Property>
  426. <Property name='document_id' type='string' key='document_id'>
  427. 文档 ID
  428. </Property>
  429. </Properties>
  430. ### Request Body
  431. <Properties>
  432. <Property name='name' type='string' key='name'>
  433. 文档名称 (选填)
  434. </Property>
  435. <Property name='file' type='multipart/form-data' key='file'>
  436. 需要上传的文件
  437. </Property>
  438. <Property name='process_rule' type='object' key='process_rule'>
  439. 处理规则(选填)
  440. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  441. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  442. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  443. - <code>id</code> (string) 预处理规则的唯一标识符
  444. - 枚举:
  445. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  446. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  447. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  448. - <code>segmentation</code> (object) 分段规则
  449. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  450. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  451. </Property>
  452. </Properties>
  453. </Col>
  454. <Col sticky>
  455. <CodeGroup
  456. title="Request"
  457. tag="POST"
  458. label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
  459. targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  460. >
  461. ```bash {{ title: 'cURL' }}
  462. curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \
  463. --header 'Authorization: Bearer {api_key}' \
  464. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  465. --form 'file=@"/path/to/file"'
  466. ```
  467. </CodeGroup>
  468. <CodeGroup title="Response">
  469. ```json {{ title: 'Response' }}
  470. {
  471. "document": {
  472. "id": "",
  473. "position": 1,
  474. "data_source_type": "upload_file",
  475. "data_source_info": {
  476. "upload_file_id": ""
  477. },
  478. "dataset_process_rule_id": "",
  479. "name": "Dify.txt",
  480. "created_from": "api",
  481. "created_by": "",
  482. "created_at": 1695308667,
  483. "tokens": 0,
  484. "indexing_status": "waiting",
  485. "error": null,
  486. "enabled": true,
  487. "disabled_at": null,
  488. "disabled_by": null,
  489. "archived": false,
  490. "display_status": "queuing",
  491. "word_count": 0,
  492. "hit_count": 0,
  493. "doc_form": "text_model"
  494. },
  495. "batch": "20230921150427533684"
  496. }
  497. ```
  498. </CodeGroup>
  499. </Col>
  500. </Row>
  501. ---
  502. <Heading
  503. url='/datasets/{dataset_id}/batch/{batch}/indexing-status'
  504. method='GET'
  505. title='获取文档嵌入状态(进度)'
  506. name='#indexing_status'
  507. />
  508. <Row>
  509. <Col>
  510. ### Path
  511. <Properties>
  512. <Property name='dataset_id' type='string' key='dataset_id'>
  513. 数据集 ID
  514. </Property>
  515. <Property name='batch' type='string' key='batch'>
  516. 上传文档的批次号
  517. </Property>
  518. </Properties>
  519. </Col>
  520. <Col sticky>
  521. <CodeGroup
  522. title="Request"
  523. tag="GET"
  524. label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
  525. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
  526. >
  527. ```bash {{ title: 'cURL' }}
  528. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
  529. --header 'Authorization: Bearer {api_key}' \
  530. ```
  531. </CodeGroup>
  532. <CodeGroup title="Response">
  533. ```json {{ title: 'Response' }}
  534. {
  535. "data":[{
  536. "id": "",
  537. "indexing_status": "indexing",
  538. "processing_started_at": 1681623462.0,
  539. "parsing_completed_at": 1681623462.0,
  540. "cleaning_completed_at": 1681623462.0,
  541. "splitting_completed_at": 1681623462.0,
  542. "completed_at": null,
  543. "paused_at": null,
  544. "error": null,
  545. "stopped_at": null,
  546. "completed_segments": 24,
  547. "total_segments": 100
  548. }]
  549. }
  550. ```
  551. </CodeGroup>
  552. </Col>
  553. </Row>
  554. ---
  555. <Heading
  556. url='/datasets/{dataset_id}/documents/{document_id}'
  557. method='DELETE'
  558. title='删除文档'
  559. name='#delete_document'
  560. />
  561. <Row>
  562. <Col>
  563. ### Path
  564. <Properties>
  565. <Property name='dataset_id' type='string' key='dataset_id'>
  566. 数据集 ID
  567. </Property>
  568. <Property name='document_id' type='string' key='document_id'>
  569. 文档 ID
  570. </Property>
  571. </Properties>
  572. </Col>
  573. <Col sticky>
  574. <CodeGroup
  575. title="Request"
  576. tag="DELETE"
  577. label="/datasets/{dataset_id}/documents/{document_id}"
  578. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  579. >
  580. ```bash {{ title: 'cURL' }}
  581. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
  582. --header 'Authorization: Bearer {api_key}' \
  583. ```
  584. </CodeGroup>
  585. <CodeGroup title="Response">
  586. ```json {{ title: 'Response' }}
  587. {
  588. "result": "success"
  589. }
  590. ```
  591. </CodeGroup>
  592. </Col>
  593. </Row>
  594. ---
  595. <Heading
  596. url='/datasets/{dataset_id}/documents'
  597. method='GET'
  598. title='数据集文档列表'
  599. name='#dataset_document_list'
  600. />
  601. <Row>
  602. <Col>
  603. ### Path
  604. <Properties>
  605. <Property name='dataset_id' type='string' key='dataset_id'>
  606. 数据集 ID
  607. </Property>
  608. </Properties>
  609. ### Query
  610. <Properties>
  611. <Property name='keyword' type='string' key='keyword'>
  612. 搜索关键词,可选,目前仅搜索文档名称
  613. </Property>
  614. <Property name='page' type='string' key='page'>
  615. 页码,可选
  616. </Property>
  617. <Property name='limit' type='string' key='limit'>
  618. 返回条数,可选,默认 20,范围 1-100
  619. </Property>
  620. </Properties>
  621. </Col>
  622. <Col sticky>
  623. <CodeGroup
  624. title="Request"
  625. tag="GET"
  626. label="/datasets/{dataset_id}/documents"
  627. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
  628. >
  629. ```bash {{ title: 'cURL' }}
  630. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
  631. --header 'Authorization: Bearer {api_key}' \
  632. ```
  633. </CodeGroup>
  634. <CodeGroup title="Response">
  635. ```json {{ title: 'Response' }}
  636. {
  637. "data": [
  638. {
  639. "id": "",
  640. "position": 1,
  641. "data_source_type": "file_upload",
  642. "data_source_info": null,
  643. "dataset_process_rule_id": null,
  644. "name": "dify",
  645. "created_from": "",
  646. "created_by": "",
  647. "created_at": 1681623639,
  648. "tokens": 0,
  649. "indexing_status": "waiting",
  650. "error": null,
  651. "enabled": true,
  652. "disabled_at": null,
  653. "disabled_by": null,
  654. "archived": false
  655. },
  656. ],
  657. "has_more": false,
  658. "limit": 20,
  659. "total": 9,
  660. "page": 1
  661. }
  662. ```
  663. </CodeGroup>
  664. </Col>
  665. </Row>
  666. ---
  667. <Heading
  668. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  669. method='POST'
  670. title='新增分段'
  671. name='#create_new_segment'
  672. />
  673. <Row>
  674. <Col>
  675. ### Path
  676. <Properties>
  677. <Property name='dataset_id' type='string' key='dataset_id'>
  678. 数据集 ID
  679. </Property>
  680. <Property name='document_id' type='string' key='document_id'>
  681. 文档 ID
  682. </Property>
  683. </Properties>
  684. ### Request Body
  685. <Properties>
  686. <Property name='segments' type='object list' key='segments'>
  687. - <code>content</code> (text) 文本内容/问题内容,必填
  688. - <code>answer</code> (text) 答案内容,非必填,如果数据集的模式为qa模式则传值
  689. - <code>keywords</code> (list) 关键字,非必填
  690. </Property>
  691. </Properties>
  692. </Col>
  693. <Col sticky>
  694. <CodeGroup
  695. title="Request"
  696. tag="POST"
  697. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  698. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
  699. >
  700. ```bash {{ title: 'cURL' }}
  701. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  702. --header 'Authorization: Bearer {api_key}' \
  703. --header 'Content-Type: application/json' \
  704. --data-raw '{
  705. "segments": [
  706. {
  707. "content": "1",
  708. "answer": "1",
  709. "keywords": ["a"]
  710. }
  711. ]
  712. }'
  713. ```
  714. </CodeGroup>
  715. <CodeGroup title="Response">
  716. ```json {{ title: 'Response' }}
  717. {
  718. "data": [{
  719. "id": "",
  720. "position": 1,
  721. "document_id": "",
  722. "content": "1",
  723. "answer": "1",
  724. "word_count": 25,
  725. "tokens": 0,
  726. "keywords": [
  727. "a"
  728. ],
  729. "index_node_id": "",
  730. "index_node_hash": "",
  731. "hit_count": 0,
  732. "enabled": true,
  733. "disabled_at": null,
  734. "disabled_by": null,
  735. "status": "completed",
  736. "created_by": "",
  737. "created_at": 1695312007,
  738. "indexing_at": 1695312007,
  739. "completed_at": 1695312007,
  740. "error": null,
  741. "stopped_at": null
  742. }],
  743. "doc_form": "text_model"
  744. }
  745. ```
  746. </CodeGroup>
  747. </Col>
  748. </Row>
  749. ---
  750. <Heading
  751. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  752. method='GET'
  753. title='查询文档分段'
  754. name='#get_segment'
  755. />
  756. <Row>
  757. <Col>
  758. ### Path
  759. <Properties>
  760. <Property name='dataset_id' type='string' key='dataset_id'>
  761. 数据集 ID
  762. </Property>
  763. <Property name='document_id' type='string' key='document_id'>
  764. 文档 ID
  765. </Property>
  766. </Properties>
  767. ### Query
  768. <Properties>
  769. <Property name='keyword' type='string' key='keyword'>
  770. 搜索关键词,可选
  771. </Property>
  772. <Property name='status' type='string' key='status'>
  773. 搜索状态,completed
  774. </Property>
  775. </Properties>
  776. </Col>
  777. <Col sticky>
  778. <CodeGroup
  779. title="Request"
  780. tag="GET"
  781. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  782. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  783. >
  784. ```bash {{ title: 'cURL' }}
  785. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  786. --header 'Authorization: Bearer {api_key}' \
  787. --header 'Content-Type: application/json'
  788. ```
  789. </CodeGroup>
  790. <CodeGroup title="Response">
  791. ```json {{ title: 'Response' }}
  792. {
  793. "data": [{
  794. "id": "",
  795. "position": 1,
  796. "document_id": "",
  797. "content": "1",
  798. "answer": "1",
  799. "word_count": 25,
  800. "tokens": 0,
  801. "keywords": [
  802. "a"
  803. ],
  804. "index_node_id": "",
  805. "index_node_hash": "",
  806. "hit_count": 0,
  807. "enabled": true,
  808. "disabled_at": null,
  809. "disabled_by": null,
  810. "status": "completed",
  811. "created_by": "",
  812. "created_at": 1695312007,
  813. "indexing_at": 1695312007,
  814. "completed_at": 1695312007,
  815. "error": null,
  816. "stopped_at": null
  817. }],
  818. "doc_form": "text_model"
  819. }
  820. ```
  821. </CodeGroup>
  822. </Col>
  823. </Row>
  824. ---
  825. <Heading
  826. url='/datasets/{dataset_id}/segments/{segment_id}'
  827. method='DELETE'
  828. title='删除文档分段'
  829. name='#delete_segment'
  830. />
  831. <Row>
  832. <Col>
  833. ### Path
  834. <Properties>
  835. <Property name='dataset_id' type='string' key='dataset_id'>
  836. 数据集 ID
  837. </Property>
  838. <Property name='segment_id' type='string' key='segment_id'>
  839. 文档分段ID
  840. </Property>
  841. </Properties>
  842. </Col>
  843. <Col sticky>
  844. <CodeGroup
  845. title="Request"
  846. tag="DELETE"
  847. label="/datasets/{dataset_id}/segments/{segment_id}"
  848. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  849. >
  850. ```bash {{ title: 'cURL' }}
  851. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  852. --header 'Authorization: Bearer {api_key}' \
  853. --header 'Content-Type: application/json'
  854. ```
  855. </CodeGroup>
  856. <CodeGroup title="Response">
  857. ```json {{ title: 'Response' }}
  858. {
  859. "result": "success"
  860. }
  861. ```
  862. </CodeGroup>
  863. </Col>
  864. </Row>
  865. ---
  866. <Heading
  867. url='/datasets/{dataset_id}/segments/{segment_id}'
  868. method='POST'
  869. title='更新文档分段'
  870. name='#update_segment'
  871. />
  872. <Row>
  873. <Col>
  874. ### POST
  875. <Properties>
  876. <Property name='dataset_id' type='string' key='dataset_id'>
  877. 数据集 ID
  878. </Property>
  879. <Property name='segment_id' type='string' key='segment_id'>
  880. 文档分段ID
  881. </Property>
  882. </Properties>
  883. ### Request Body
  884. <Properties>
  885. <Property name='segments' type='object list' key='segments'>
  886. - <code>content</code> (text) 文本内容/问题内容,必填
  887. - <code>answer</code> (text) 答案内容,非必填,如果数据集的模式为qa模式则传值
  888. - <code>keywords</code> (list) 关键字,非必填
  889. - <code>enabled</code> (bool) false/true,非必填
  890. </Property>
  891. </Properties>
  892. </Col>
  893. <Col sticky>
  894. <CodeGroup
  895. title="Request"
  896. tag="POST"
  897. label="/datasets/{dataset_id}/segments/{segment_id}"
  898. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
  899. >
  900. ```bash {{ title: 'cURL' }}
  901. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  902. --header 'Authorization: Bearer {api_key}' \
  903. --header 'Content-Type: application/json' \
  904. --data-raw '{
  905. "segments": {
  906. "content": "1",
  907. "answer": "1",
  908. "keywords": ["a"],
  909. "enabled": false
  910. }
  911. }'
  912. ```
  913. </CodeGroup>
  914. <CodeGroup title="Response">
  915. ```json {{ title: 'Response' }}
  916. {
  917. "data": [{
  918. "id": "",
  919. "position": 1,
  920. "document_id": "",
  921. "content": "1",
  922. "answer": "1",
  923. "word_count": 25,
  924. "tokens": 0,
  925. "keywords": [
  926. "a"
  927. ],
  928. "index_node_id": "",
  929. "index_node_hash": "",
  930. "hit_count": 0,
  931. "enabled": true,
  932. "disabled_at": null,
  933. "disabled_by": null,
  934. "status": "completed",
  935. "created_by": "",
  936. "created_at": 1695312007,
  937. "indexing_at": 1695312007,
  938. "completed_at": 1695312007,
  939. "error": null,
  940. "stopped_at": null
  941. }],
  942. "doc_form": "text_model"
  943. }
  944. ```
  945. </CodeGroup>
  946. </Col>
  947. </Row>
  948. ---
  949. <Row>
  950. <Col>
  951. ### 错误信息
  952. <Properties>
  953. <Property name='code' type='string' key='code'>
  954. 返回的错误代码
  955. </Property>
  956. </Properties>
  957. <Properties>
  958. <Property name='status' type='number' key='status'>
  959. 返回的错误状态
  960. </Property>
  961. </Properties>
  962. <Properties>
  963. <Property name='message' type='string' key='message'>
  964. 返回的错误信息
  965. </Property>
  966. </Properties>
  967. </Col>
  968. <Col>
  969. <CodeGroup title="Example">
  970. ```json {{ title: 'Response' }}
  971. {
  972. "code": "no_file_uploaded",
  973. "message": "Please upload your file.",
  974. "status": 400
  975. }
  976. ```
  977. </CodeGroup>
  978. </Col>
  979. </Row>
  980. <table className="max-w-auto border-collapse border border-slate-400" style={{ maxWidth: 'none', width: 'auto' }}>
  981. <thead style={{ background: '#f9fafc' }}>
  982. <tr>
  983. <th class="p-2 border border-slate-300">code</th>
  984. <th class="p-2 border border-slate-300">status</th>
  985. <th class="p-2 border border-slate-300">message</th>
  986. </tr>
  987. </thead>
  988. <tbody>
  989. <tr>
  990. <td class="p-2 border border-slate-300">no_file_uploaded</td>
  991. <td class="p-2 border border-slate-300">400</td>
  992. <td class="p-2 border border-slate-300">Please upload your file.</td>
  993. </tr>
  994. <tr>
  995. <td class="p-2 border border-slate-300">too_many_files</td>
  996. <td class="p-2 border border-slate-300">400</td>
  997. <td class="p-2 border border-slate-300">Only one file is allowed.</td>
  998. </tr>
  999. <tr>
  1000. <td class="p-2 border border-slate-300">file_too_large</td>
  1001. <td class="p-2 border border-slate-300">413</td>
  1002. <td class="p-2 border border-slate-300">File size exceeded.</td>
  1003. </tr>
  1004. <tr>
  1005. <td class="p-2 border border-slate-300">unsupported_file_type</td>
  1006. <td class="p-2 border border-slate-300">415</td>
  1007. <td class="p-2 border border-slate-300">File type not allowed.</td>
  1008. </tr>
  1009. <tr>
  1010. <td class="p-2 border border-slate-300">high_quality_dataset_only</td>
  1011. <td class="p-2 border border-slate-300">400</td>
  1012. <td class="p-2 border border-slate-300">Current operation only supports 'high-quality' datasets.</td>
  1013. </tr>
  1014. <tr>
  1015. <td class="p-2 border border-slate-300">dataset_not_initialized</td>
  1016. <td class="p-2 border border-slate-300">400</td>
  1017. <td class="p-2 border border-slate-300">The dataset is still being initialized or indexing. Please wait a moment.</td>
  1018. </tr>
  1019. <tr>
  1020. <td class="p-2 border border-slate-300">archived_document_immutable</td>
  1021. <td class="p-2 border border-slate-300">403</td>
  1022. <td class="p-2 border border-slate-300">The archived document is not editable.</td>
  1023. </tr>
  1024. <tr>
  1025. <td class="p-2 border border-slate-300">dataset_name_duplicate</td>
  1026. <td class="p-2 border border-slate-300">409</td>
  1027. <td class="p-2 border border-slate-300">The dataset name already exists. Please modify your dataset name.</td>
  1028. </tr>
  1029. <tr>
  1030. <td class="p-2 border border-slate-300">invalid_action</td>
  1031. <td class="p-2 border border-slate-300">400</td>
  1032. <td class="p-2 border border-slate-300">Invalid action.</td>
  1033. </tr>
  1034. <tr>
  1035. <td class="p-2 border border-slate-300">document_already_finished</td>
  1036. <td class="p-2 border border-slate-300">400</td>
  1037. <td class="p-2 border border-slate-300">The document has been processed. Please refresh the page or go to the document details.</td>
  1038. </tr>
  1039. <tr>
  1040. <td class="p-2 border border-slate-300">document_indexing</td>
  1041. <td class="p-2 border border-slate-300">400</td>
  1042. <td class="p-2 border border-slate-300">The document is being processed and cannot be edited.</td>
  1043. </tr>
  1044. <tr>
  1045. <td class="p-2 border border-slate-300">invalid_metadata</td>
  1046. <td class="p-2 border border-slate-300">400</td>
  1047. <td class="p-2 border border-slate-300">The metadata content is incorrect. Please check and verify.</td>
  1048. </tr>
  1049. </tbody>
  1050. </table>
  1051. <div class="pb-4" />