template.zh.mdx 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108
  1. import { CodeGroup } from '@/app/components/develop/code.tsx'
  2. import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
  3. # 知识库 API
  4. <div>
  5. ### 鉴权
  6. Dify Service API 使用 `API-Key` 进行鉴权。
  7. 建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。
  8. 所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示:
  9. <CodeGroup title="Code">
  10. ```javascript
  11. Authorization: Bearer {API_KEY}
  12. ```
  13. </CodeGroup>
  14. </div>
  15. ---
  16. <Heading
  17. url='/datasets/{dataset_id}/document/create_by_text'
  18. method='POST'
  19. title='通过文本创建文档'
  20. name='#create_by_text'
  21. />
  22. <Row>
  23. <Col>
  24. 此接口基于已存在知识库,在此知识库的基础上通过文本创建新的文档
  25. ### Path
  26. <Properties>
  27. <Property name='dataset_id' type='string' key='dataset_id'>
  28. 知识库 ID
  29. </Property>
  30. </Properties>
  31. ### Request Body
  32. <Properties>
  33. <Property name='name' type='string' key='name'>
  34. 文档名称
  35. </Property>
  36. <Property name='text' type='string' key='text'>
  37. 文档内容
  38. </Property>
  39. <Property name='indexing_technique' type='string' key='indexing_technique'>
  40. 索引方式
  41. - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
  42. - <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
  43. </Property>
  44. <Property name='process_rule' type='object' key='process_rule'>
  45. 处理规则
  46. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  47. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  48. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  49. - <code>id</code> (string) 预处理规则的唯一标识符
  50. - 枚举:
  51. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  52. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  53. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  54. - <code>segmentation</code> (object) 分段规则
  55. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  56. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  57. </Property>
  58. </Properties>
  59. </Col>
  60. <Col sticky>
  61. <CodeGroup
  62. title="Request"
  63. tag="POST"
  64. label="/datasets/{dataset_id}/document/create_by_text"
  65. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
  66. >
  67. ```bash {{ title: 'cURL' }}
  68. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
  69. --header 'Authorization: Bearer {api_key}' \
  70. --header 'Content-Type: application/json' \
  71. --data-raw '{
  72. "name": "text",
  73. "text": "text",
  74. "indexing_technique": "high_quality",
  75. "process_rule": {
  76. "mode": "automatic"
  77. }
  78. }'
  79. ```
  80. </CodeGroup>
  81. <CodeGroup title="Response">
  82. ```json {{ title: 'Response' }}
  83. {
  84. "document": {
  85. "id": "",
  86. "position": 1,
  87. "data_source_type": "upload_file",
  88. "data_source_info": {
  89. "upload_file_id": ""
  90. },
  91. "dataset_process_rule_id": "",
  92. "name": "text.txt",
  93. "created_from": "api",
  94. "created_by": "",
  95. "created_at": 1695690280,
  96. "tokens": 0,
  97. "indexing_status": "waiting",
  98. "error": null,
  99. "enabled": true,
  100. "disabled_at": null,
  101. "disabled_by": null,
  102. "archived": false,
  103. "display_status": "queuing",
  104. "word_count": 0,
  105. "hit_count": 0,
  106. "doc_form": "text_model"
  107. },
  108. "batch": ""
  109. }
  110. ```
  111. </CodeGroup>
  112. </Col>
  113. </Row>
  114. ---
  115. <Heading
  116. url='/datasets/{dataset_id}/document/create_by_file'
  117. method='POST'
  118. title='通过文件创建文档 '
  119. name='#create_by_file'
  120. />
  121. <Row>
  122. <Col>
  123. 此接口基于已存在知识库,在此知识库的基础上通过文件创建新的文档
  124. ### Path
  125. <Properties>
  126. <Property name='dataset_id' type='string' key='dataset_id'>
  127. 知识库 ID
  128. </Property>
  129. </Properties>
  130. ### Request Body
  131. <Properties>
  132. <Property name='original_document_id' type='string' key='original_document_id'>
  133. 源文档 ID (选填)
  134. - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
  135. - 源文档不可为归档的文档
  136. - 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
  137. - 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
  138. </Property>
  139. <Property name='file' type='multipart/form-data' key='file'>
  140. 需要上传的文件。
  141. </Property>
  142. <Property name='indexing_technique' type='string' key='indexing_technique'>
  143. 索引方式
  144. - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
  145. - <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
  146. </Property>
  147. <Property name='process_rule' type='object' key='process_rule'>
  148. 处理规则
  149. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  150. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  151. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  152. - <code>id</code> (string) 预处理规则的唯一标识符
  153. - 枚举:
  154. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  155. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  156. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  157. - <code>segmentation</code> (object) 分段规则
  158. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  159. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  160. </Property>
  161. </Properties>
  162. </Col>
  163. <Col sticky>
  164. <CodeGroup
  165. title="Request"
  166. tag="POST"
  167. label="/datasets/{dataset_id}/document/create_by_file"
  168. targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  169. >
  170. ```bash {{ title: 'cURL' }}
  171. curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
  172. --header 'Authorization: Bearer {api_key}' \
  173. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  174. --form 'file=@"/path/to/file"'
  175. ```
  176. </CodeGroup>
  177. <CodeGroup title="Response">
  178. ```json {{ title: 'Response' }}
  179. {
  180. "document": {
  181. "id": "",
  182. "position": 1,
  183. "data_source_type": "upload_file",
  184. "data_source_info": {
  185. "upload_file_id": ""
  186. },
  187. "dataset_process_rule_id": "",
  188. "name": "Dify.txt",
  189. "created_from": "api",
  190. "created_by": "",
  191. "created_at": 1695308667,
  192. "tokens": 0,
  193. "indexing_status": "waiting",
  194. "error": null,
  195. "enabled": true,
  196. "disabled_at": null,
  197. "disabled_by": null,
  198. "archived": false,
  199. "display_status": "queuing",
  200. "word_count": 0,
  201. "hit_count": 0,
  202. "doc_form": "text_model"
  203. },
  204. "batch": ""
  205. }
  206. ```
  207. </CodeGroup>
  208. </Col>
  209. </Row>
  210. ---
  211. <Heading
  212. url='/datasets'
  213. method='POST'
  214. title='创建空知识库'
  215. name='#create_empty_dataset'
  216. />
  217. <Row>
  218. <Col>
  219. ### Request Body
  220. <Properties>
  221. <Property name='name' type='string' key='name'>
  222. 知识库名称
  223. </Property>
  224. </Properties>
  225. </Col>
  226. <Col sticky>
  227. <CodeGroup
  228. title="Request"
  229. tag="POST"
  230. label="/datasets"
  231. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`}
  232. >
  233. ```bash {{ title: 'cURL' }}
  234. curl --location --request POST '${props.apiBaseUrl}/datasets' \
  235. --header 'Authorization: Bearer {api_key}' \
  236. --header 'Content-Type: application/json' \
  237. --data-raw '{
  238. "name": "name"
  239. }'
  240. ```
  241. </CodeGroup>
  242. <CodeGroup title="Response">
  243. ```json {{ title: 'Response' }}
  244. {
  245. "id": "",
  246. "name": "name",
  247. "description": null,
  248. "provider": "vendor",
  249. "permission": "only_me",
  250. "data_source_type": null,
  251. "indexing_technique": null,
  252. "app_count": 0,
  253. "document_count": 0,
  254. "word_count": 0,
  255. "created_by": "",
  256. "created_at": 1695636173,
  257. "updated_by": "",
  258. "updated_at": 1695636173,
  259. "embedding_model": null,
  260. "embedding_model_provider": null,
  261. "embedding_available": null
  262. }
  263. ```
  264. </CodeGroup>
  265. </Col>
  266. </Row>
  267. ---
  268. <Heading
  269. url='/datasets'
  270. method='GET'
  271. title='知识库列表'
  272. name='#dataset_list'
  273. />
  274. <Row>
  275. <Col>
  276. ### Query
  277. <Properties>
  278. <Property name='page' type='string' key='page'>
  279. 页码
  280. </Property>
  281. <Property name='limit' type='string' key='limit'>
  282. 返回条数,默认 20,范围 1-100
  283. </Property>
  284. </Properties>
  285. </Col>
  286. <Col sticky>
  287. <CodeGroup
  288. title="Request"
  289. tag="POST"
  290. label="/datasets"
  291. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  292. >
  293. ```bash {{ title: 'cURL' }}
  294. curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
  295. --header 'Authorization: Bearer {api_key}'
  296. ```
  297. </CodeGroup>
  298. <CodeGroup title="Response">
  299. ```json {{ title: 'Response' }}
  300. {
  301. "data": [
  302. {
  303. "id": "",
  304. "name": "知识库名称",
  305. "description": "描述信息",
  306. "permission": "only_me",
  307. "data_source_type": "upload_file",
  308. "indexing_technique": "",
  309. "app_count": 2,
  310. "document_count": 10,
  311. "word_count": 1200,
  312. "created_by": "",
  313. "created_at": "",
  314. "updated_by": "",
  315. "updated_at": ""
  316. },
  317. ...
  318. ],
  319. "has_more": true,
  320. "limit": 20,
  321. "total": 50,
  322. "page": 1
  323. }
  324. ```
  325. </CodeGroup>
  326. </Col>
  327. </Row>
  328. ---
  329. <Heading
  330. url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
  331. method='POST'
  332. title='通过文本更新文档 '
  333. name='#update_by_text'
  334. />
  335. <Row>
  336. <Col>
  337. 此接口基于已存在知识库,在此知识库的基础上通过文本更新文档
  338. ### Path
  339. <Properties>
  340. <Property name='dataset_id' type='string' key='dataset_id'>
  341. 知识库 ID
  342. </Property>
  343. <Property name='document_id' type='string' key='document_id'>
  344. 文档 ID
  345. </Property>
  346. </Properties>
  347. ### Request Body
  348. <Properties>
  349. <Property name='name' type='string' key='name'>
  350. 文档名称 (选填)
  351. </Property>
  352. <Property name='text' type='string' key='text'>
  353. 文档内容(选填)
  354. </Property>
  355. <Property name='process_rule' type='object' key='process_rule'>
  356. 处理规则(选填)
  357. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  358. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  359. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  360. - <code>id</code> (string) 预处理规则的唯一标识符
  361. - 枚举:
  362. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  363. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  364. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  365. - <code>segmentation</code> (object) 分段规则
  366. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  367. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  368. </Property>
  369. </Properties>
  370. </Col>
  371. <Col sticky>
  372. <CodeGroup
  373. title="Request"
  374. tag="POST"
  375. label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
  376. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
  377. >
  378. ```bash {{ title: 'cURL' }}
  379. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
  380. --header 'Authorization: Bearer {api_key}' \
  381. --header 'Content-Type: application/json' \
  382. --data-raw '{
  383. "name": "name",
  384. "text": "text"
  385. }'
  386. ```
  387. </CodeGroup>
  388. <CodeGroup title="Response">
  389. ```json {{ title: 'Response' }}
  390. {
  391. "document": {
  392. "id": "",
  393. "position": 1,
  394. "data_source_type": "upload_file",
  395. "data_source_info": {
  396. "upload_file_id": ""
  397. },
  398. "dataset_process_rule_id": "",
  399. "name": "name.txt",
  400. "created_from": "api",
  401. "created_by": "",
  402. "created_at": 1695308667,
  403. "tokens": 0,
  404. "indexing_status": "waiting",
  405. "error": null,
  406. "enabled": true,
  407. "disabled_at": null,
  408. "disabled_by": null,
  409. "archived": false,
  410. "display_status": "queuing",
  411. "word_count": 0,
  412. "hit_count": 0,
  413. "doc_form": "text_model"
  414. },
  415. "batch": ""
  416. }
  417. ```
  418. </CodeGroup>
  419. </Col>
  420. </Row>
  421. ---
  422. <Heading
  423. url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
  424. method='POST'
  425. title='通过文件更新文档 '
  426. name='#update_by_file'
  427. />
  428. <Row>
  429. <Col>
  430. 此接口基于已存在知识库,在此知识库的基础上通过文件更新文档的操作。
  431. ### Path
  432. <Properties>
  433. <Property name='dataset_id' type='string' key='dataset_id'>
  434. 知识库 ID
  435. </Property>
  436. <Property name='document_id' type='string' key='document_id'>
  437. 文档 ID
  438. </Property>
  439. </Properties>
  440. ### Request Body
  441. <Properties>
  442. <Property name='name' type='string' key='name'>
  443. 文档名称 (选填)
  444. </Property>
  445. <Property name='file' type='multipart/form-data' key='file'>
  446. 需要上传的文件
  447. </Property>
  448. <Property name='process_rule' type='object' key='process_rule'>
  449. 处理规则(选填)
  450. - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
  451. - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
  452. - <code>pre_processing_rules</code> (array[object]) 预处理规则
  453. - <code>id</code> (string) 预处理规则的唯一标识符
  454. - 枚举:
  455. - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
  456. - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
  457. - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
  458. - <code>segmentation</code> (object) 分段规则
  459. - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
  460. - <code>max_tokens</code> 最大长度 (token) 默认为 1000
  461. </Property>
  462. </Properties>
  463. </Col>
  464. <Col sticky>
  465. <CodeGroup
  466. title="Request"
  467. tag="POST"
  468. label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
  469. targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  470. >
  471. ```bash {{ title: 'cURL' }}
  472. curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \
  473. --header 'Authorization: Bearer {api_key}' \
  474. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  475. --form 'file=@"/path/to/file"'
  476. ```
  477. </CodeGroup>
  478. <CodeGroup title="Response">
  479. ```json {{ title: 'Response' }}
  480. {
  481. "document": {
  482. "id": "",
  483. "position": 1,
  484. "data_source_type": "upload_file",
  485. "data_source_info": {
  486. "upload_file_id": ""
  487. },
  488. "dataset_process_rule_id": "",
  489. "name": "Dify.txt",
  490. "created_from": "api",
  491. "created_by": "",
  492. "created_at": 1695308667,
  493. "tokens": 0,
  494. "indexing_status": "waiting",
  495. "error": null,
  496. "enabled": true,
  497. "disabled_at": null,
  498. "disabled_by": null,
  499. "archived": false,
  500. "display_status": "queuing",
  501. "word_count": 0,
  502. "hit_count": 0,
  503. "doc_form": "text_model"
  504. },
  505. "batch": "20230921150427533684"
  506. }
  507. ```
  508. </CodeGroup>
  509. </Col>
  510. </Row>
  511. ---
  512. <Heading
  513. url='/datasets/{dataset_id}/batch/{batch}/indexing-status'
  514. method='GET'
  515. title='获取文档嵌入状态(进度)'
  516. name='#indexing_status'
  517. />
  518. <Row>
  519. <Col>
  520. ### Path
  521. <Properties>
  522. <Property name='dataset_id' type='string' key='dataset_id'>
  523. 知识库 ID
  524. </Property>
  525. <Property name='batch' type='string' key='batch'>
  526. 上传文档的批次号
  527. </Property>
  528. </Properties>
  529. </Col>
  530. <Col sticky>
  531. <CodeGroup
  532. title="Request"
  533. tag="GET"
  534. label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
  535. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
  536. >
  537. ```bash {{ title: 'cURL' }}
  538. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
  539. --header 'Authorization: Bearer {api_key}' \
  540. ```
  541. </CodeGroup>
  542. <CodeGroup title="Response">
  543. ```json {{ title: 'Response' }}
  544. {
  545. "data":[{
  546. "id": "",
  547. "indexing_status": "indexing",
  548. "processing_started_at": 1681623462.0,
  549. "parsing_completed_at": 1681623462.0,
  550. "cleaning_completed_at": 1681623462.0,
  551. "splitting_completed_at": 1681623462.0,
  552. "completed_at": null,
  553. "paused_at": null,
  554. "error": null,
  555. "stopped_at": null,
  556. "completed_segments": 24,
  557. "total_segments": 100
  558. }]
  559. }
  560. ```
  561. </CodeGroup>
  562. </Col>
  563. </Row>
  564. ---
  565. <Heading
  566. url='/datasets/{dataset_id}/documents/{document_id}'
  567. method='DELETE'
  568. title='删除文档'
  569. name='#delete_document'
  570. />
  571. <Row>
  572. <Col>
  573. ### Path
  574. <Properties>
  575. <Property name='dataset_id' type='string' key='dataset_id'>
  576. 知识库 ID
  577. </Property>
  578. <Property name='document_id' type='string' key='document_id'>
  579. 文档 ID
  580. </Property>
  581. </Properties>
  582. </Col>
  583. <Col sticky>
  584. <CodeGroup
  585. title="Request"
  586. tag="DELETE"
  587. label="/datasets/{dataset_id}/documents/{document_id}"
  588. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  589. >
  590. ```bash {{ title: 'cURL' }}
  591. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
  592. --header 'Authorization: Bearer {api_key}' \
  593. ```
  594. </CodeGroup>
  595. <CodeGroup title="Response">
  596. ```json {{ title: 'Response' }}
  597. {
  598. "result": "success"
  599. }
  600. ```
  601. </CodeGroup>
  602. </Col>
  603. </Row>
  604. ---
  605. <Heading
  606. url='/datasets/{dataset_id}/documents'
  607. method='GET'
  608. title='知识库文档列表'
  609. name='#dataset_document_list'
  610. />
  611. <Row>
  612. <Col>
  613. ### Path
  614. <Properties>
  615. <Property name='dataset_id' type='string' key='dataset_id'>
  616. 知识库 ID
  617. </Property>
  618. </Properties>
  619. ### Query
  620. <Properties>
  621. <Property name='keyword' type='string' key='keyword'>
  622. 搜索关键词,可选,目前仅搜索文档名称
  623. </Property>
  624. <Property name='page' type='string' key='page'>
  625. 页码,可选
  626. </Property>
  627. <Property name='limit' type='string' key='limit'>
  628. 返回条数,可选,默认 20,范围 1-100
  629. </Property>
  630. </Properties>
  631. </Col>
  632. <Col sticky>
  633. <CodeGroup
  634. title="Request"
  635. tag="GET"
  636. label="/datasets/{dataset_id}/documents"
  637. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
  638. >
  639. ```bash {{ title: 'cURL' }}
  640. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
  641. --header 'Authorization: Bearer {api_key}' \
  642. ```
  643. </CodeGroup>
  644. <CodeGroup title="Response">
  645. ```json {{ title: 'Response' }}
  646. {
  647. "data": [
  648. {
  649. "id": "",
  650. "position": 1,
  651. "data_source_type": "file_upload",
  652. "data_source_info": null,
  653. "dataset_process_rule_id": null,
  654. "name": "dify",
  655. "created_from": "",
  656. "created_by": "",
  657. "created_at": 1681623639,
  658. "tokens": 0,
  659. "indexing_status": "waiting",
  660. "error": null,
  661. "enabled": true,
  662. "disabled_at": null,
  663. "disabled_by": null,
  664. "archived": false
  665. },
  666. ],
  667. "has_more": false,
  668. "limit": 20,
  669. "total": 9,
  670. "page": 1
  671. }
  672. ```
  673. </CodeGroup>
  674. </Col>
  675. </Row>
  676. ---
  677. <Heading
  678. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  679. method='POST'
  680. title='新增分段'
  681. name='#create_new_segment'
  682. />
  683. <Row>
  684. <Col>
  685. ### Path
  686. <Properties>
  687. <Property name='dataset_id' type='string' key='dataset_id'>
  688. 知识库 ID
  689. </Property>
  690. <Property name='document_id' type='string' key='document_id'>
  691. 文档 ID
  692. </Property>
  693. </Properties>
  694. ### Request Body
  695. <Properties>
  696. <Property name='segments' type='object list' key='segments'>
  697. - <code>content</code> (text) 文本内容/问题内容,必填
  698. - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值
  699. - <code>keywords</code> (list) 关键字,非必填
  700. </Property>
  701. </Properties>
  702. </Col>
  703. <Col sticky>
  704. <CodeGroup
  705. title="Request"
  706. tag="POST"
  707. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  708. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
  709. >
  710. ```bash {{ title: 'cURL' }}
  711. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  712. --header 'Authorization: Bearer {api_key}' \
  713. --header 'Content-Type: application/json' \
  714. --data-raw '{
  715. "segments": [
  716. {
  717. "content": "1",
  718. "answer": "1",
  719. "keywords": ["a"]
  720. }
  721. ]
  722. }'
  723. ```
  724. </CodeGroup>
  725. <CodeGroup title="Response">
  726. ```json {{ title: 'Response' }}
  727. {
  728. "data": [{
  729. "id": "",
  730. "position": 1,
  731. "document_id": "",
  732. "content": "1",
  733. "answer": "1",
  734. "word_count": 25,
  735. "tokens": 0,
  736. "keywords": [
  737. "a"
  738. ],
  739. "index_node_id": "",
  740. "index_node_hash": "",
  741. "hit_count": 0,
  742. "enabled": true,
  743. "disabled_at": null,
  744. "disabled_by": null,
  745. "status": "completed",
  746. "created_by": "",
  747. "created_at": 1695312007,
  748. "indexing_at": 1695312007,
  749. "completed_at": 1695312007,
  750. "error": null,
  751. "stopped_at": null
  752. }],
  753. "doc_form": "text_model"
  754. }
  755. ```
  756. </CodeGroup>
  757. </Col>
  758. </Row>
  759. ---
  760. <Heading
  761. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  762. method='GET'
  763. title='查询文档分段'
  764. name='#get_segment'
  765. />
  766. <Row>
  767. <Col>
  768. ### Path
  769. <Properties>
  770. <Property name='dataset_id' type='string' key='dataset_id'>
  771. 知识库 ID
  772. </Property>
  773. <Property name='document_id' type='string' key='document_id'>
  774. 文档 ID
  775. </Property>
  776. </Properties>
  777. ### Query
  778. <Properties>
  779. <Property name='keyword' type='string' key='keyword'>
  780. 搜索关键词,可选
  781. </Property>
  782. <Property name='status' type='string' key='status'>
  783. 搜索状态,completed
  784. </Property>
  785. </Properties>
  786. </Col>
  787. <Col sticky>
  788. <CodeGroup
  789. title="Request"
  790. tag="GET"
  791. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  792. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  793. >
  794. ```bash {{ title: 'cURL' }}
  795. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  796. --header 'Authorization: Bearer {api_key}' \
  797. --header 'Content-Type: application/json'
  798. ```
  799. </CodeGroup>
  800. <CodeGroup title="Response">
  801. ```json {{ title: 'Response' }}
  802. {
  803. "data": [{
  804. "id": "",
  805. "position": 1,
  806. "document_id": "",
  807. "content": "1",
  808. "answer": "1",
  809. "word_count": 25,
  810. "tokens": 0,
  811. "keywords": [
  812. "a"
  813. ],
  814. "index_node_id": "",
  815. "index_node_hash": "",
  816. "hit_count": 0,
  817. "enabled": true,
  818. "disabled_at": null,
  819. "disabled_by": null,
  820. "status": "completed",
  821. "created_by": "",
  822. "created_at": 1695312007,
  823. "indexing_at": 1695312007,
  824. "completed_at": 1695312007,
  825. "error": null,
  826. "stopped_at": null
  827. }],
  828. "doc_form": "text_model"
  829. }
  830. ```
  831. </CodeGroup>
  832. </Col>
  833. </Row>
  834. ---
  835. <Heading
  836. url='/datasets/{dataset_id}/segments/{segment_id}'
  837. method='DELETE'
  838. title='删除文档分段'
  839. name='#delete_segment'
  840. />
  841. <Row>
  842. <Col>
  843. ### Path
  844. <Properties>
  845. <Property name='dataset_id' type='string' key='dataset_id'>
  846. 知识库 ID
  847. </Property>
  848. <Property name='segment_id' type='string' key='segment_id'>
  849. 文档分段ID
  850. </Property>
  851. </Properties>
  852. </Col>
  853. <Col sticky>
  854. <CodeGroup
  855. title="Request"
  856. tag="DELETE"
  857. label="/datasets/{dataset_id}/segments/{segment_id}"
  858. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  859. >
  860. ```bash {{ title: 'cURL' }}
  861. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  862. --header 'Authorization: Bearer {api_key}' \
  863. --header 'Content-Type: application/json'
  864. ```
  865. </CodeGroup>
  866. <CodeGroup title="Response">
  867. ```json {{ title: 'Response' }}
  868. {
  869. "result": "success"
  870. }
  871. ```
  872. </CodeGroup>
  873. </Col>
  874. </Row>
  875. ---
  876. <Heading
  877. url='/datasets/{dataset_id}/segments/{segment_id}'
  878. method='POST'
  879. title='更新文档分段'
  880. name='#update_segment'
  881. />
  882. <Row>
  883. <Col>
  884. ### POST
  885. <Properties>
  886. <Property name='dataset_id' type='string' key='dataset_id'>
  887. 知识库 ID
  888. </Property>
  889. <Property name='segment_id' type='string' key='segment_id'>
  890. 文档分段ID
  891. </Property>
  892. </Properties>
  893. ### Request Body
  894. <Properties>
  895. <Property name='segments' type='object list' key='segments'>
  896. - <code>content</code> (text) 文本内容/问题内容,必填
  897. - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值
  898. - <code>keywords</code> (list) 关键字,非必填
  899. - <code>enabled</code> (bool) false/true,非必填
  900. </Property>
  901. </Properties>
  902. </Col>
  903. <Col sticky>
  904. <CodeGroup
  905. title="Request"
  906. tag="POST"
  907. label="/datasets/{dataset_id}/segments/{segment_id}"
  908. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
  909. >
  910. ```bash {{ title: 'cURL' }}
  911. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  912. --header 'Authorization: Bearer {api_key}' \
  913. --header 'Content-Type: application/json' \
  914. --data-raw '{
  915. "segments": {
  916. "content": "1",
  917. "answer": "1",
  918. "keywords": ["a"],
  919. "enabled": false
  920. }
  921. }'
  922. ```
  923. </CodeGroup>
  924. <CodeGroup title="Response">
  925. ```json {{ title: 'Response' }}
  926. {
  927. "data": [{
  928. "id": "",
  929. "position": 1,
  930. "document_id": "",
  931. "content": "1",
  932. "answer": "1",
  933. "word_count": 25,
  934. "tokens": 0,
  935. "keywords": [
  936. "a"
  937. ],
  938. "index_node_id": "",
  939. "index_node_hash": "",
  940. "hit_count": 0,
  941. "enabled": true,
  942. "disabled_at": null,
  943. "disabled_by": null,
  944. "status": "completed",
  945. "created_by": "",
  946. "created_at": 1695312007,
  947. "indexing_at": 1695312007,
  948. "completed_at": 1695312007,
  949. "error": null,
  950. "stopped_at": null
  951. }],
  952. "doc_form": "text_model"
  953. }
  954. ```
  955. </CodeGroup>
  956. </Col>
  957. </Row>
  958. ---
  959. <Row>
  960. <Col>
  961. ### 错误信息
  962. <Properties>
  963. <Property name='code' type='string' key='code'>
  964. 返回的错误代码
  965. </Property>
  966. </Properties>
  967. <Properties>
  968. <Property name='status' type='number' key='status'>
  969. 返回的错误状态
  970. </Property>
  971. </Properties>
  972. <Properties>
  973. <Property name='message' type='string' key='message'>
  974. 返回的错误信息
  975. </Property>
  976. </Properties>
  977. </Col>
  978. <Col>
  979. <CodeGroup title="Example">
  980. ```json {{ title: 'Response' }}
  981. {
  982. "code": "no_file_uploaded",
  983. "message": "Please upload your file.",
  984. "status": 400
  985. }
  986. ```
  987. </CodeGroup>
  988. </Col>
  989. </Row>
  990. <table className="max-w-auto border-collapse border border-slate-400" style={{ maxWidth: 'none', width: 'auto' }}>
  991. <thead style={{ background: '#f9fafc' }}>
  992. <tr>
  993. <th className="p-2 border border-slate-300">code</th>
  994. <th className="p-2 border border-slate-300">status</th>
  995. <th className="p-2 border border-slate-300">message</th>
  996. </tr>
  997. </thead>
  998. <tbody>
  999. <tr>
  1000. <td className="p-2 border border-slate-300">no_file_uploaded</td>
  1001. <td className="p-2 border border-slate-300">400</td>
  1002. <td className="p-2 border border-slate-300">Please upload your file.</td>
  1003. </tr>
  1004. <tr>
  1005. <td className="p-2 border border-slate-300">too_many_files</td>
  1006. <td className="p-2 border border-slate-300">400</td>
  1007. <td className="p-2 border border-slate-300">Only one file is allowed.</td>
  1008. </tr>
  1009. <tr>
  1010. <td className="p-2 border border-slate-300">file_too_large</td>
  1011. <td className="p-2 border border-slate-300">413</td>
  1012. <td className="p-2 border border-slate-300">File size exceeded.</td>
  1013. </tr>
  1014. <tr>
  1015. <td className="p-2 border border-slate-300">unsupported_file_type</td>
  1016. <td className="p-2 border border-slate-300">415</td>
  1017. <td className="p-2 border border-slate-300">File type not allowed.</td>
  1018. </tr>
  1019. <tr>
  1020. <td className="p-2 border border-slate-300">high_quality_dataset_only</td>
  1021. <td className="p-2 border border-slate-300">400</td>
  1022. <td className="p-2 border border-slate-300">Current operation only supports 'high-quality' datasets.</td>
  1023. </tr>
  1024. <tr>
  1025. <td className="p-2 border border-slate-300">dataset_not_initialized</td>
  1026. <td className="p-2 border border-slate-300">400</td>
  1027. <td className="p-2 border border-slate-300">The dataset is still being initialized or indexing. Please wait a moment.</td>
  1028. </tr>
  1029. <tr>
  1030. <td className="p-2 border border-slate-300">archived_document_immutable</td>
  1031. <td className="p-2 border border-slate-300">403</td>
  1032. <td className="p-2 border border-slate-300">The archived document is not editable.</td>
  1033. </tr>
  1034. <tr>
  1035. <td className="p-2 border border-slate-300">dataset_name_duplicate</td>
  1036. <td className="p-2 border border-slate-300">409</td>
  1037. <td className="p-2 border border-slate-300">The dataset name already exists. Please modify your dataset name.</td>
  1038. </tr>
  1039. <tr>
  1040. <td className="p-2 border border-slate-300">invalid_action</td>
  1041. <td className="p-2 border border-slate-300">400</td>
  1042. <td className="p-2 border border-slate-300">Invalid action.</td>
  1043. </tr>
  1044. <tr>
  1045. <td className="p-2 border border-slate-300">document_already_finished</td>
  1046. <td className="p-2 border border-slate-300">400</td>
  1047. <td className="p-2 border border-slate-300">The document has been processed. Please refresh the page or go to the document details.</td>
  1048. </tr>
  1049. <tr>
  1050. <td className="p-2 border border-slate-300">document_indexing</td>
  1051. <td className="p-2 border border-slate-300">400</td>
  1052. <td className="p-2 border border-slate-300">The document is being processed and cannot be edited.</td>
  1053. </tr>
  1054. <tr>
  1055. <td className="p-2 border border-slate-300">invalid_metadata</td>
  1056. <td className="p-2 border border-slate-300">400</td>
  1057. <td className="p-2 border border-slate-300">The metadata content is incorrect. Please check and verify.</td>
  1058. </tr>
  1059. </tbody>
  1060. </table>
  1061. <div className="pb-4" />