template.en.mdx 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107
  1. import { CodeGroup } from '@/app/components/develop/code.tsx'
  2. import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx'
  3. # Knowledge API
  4. <div>
  5. ### Authentication
  6. Service API of Dify authenticates using an `API-Key`.
  7. It is suggested that developers store the `API-Key` in the backend instead of sharing or storing it in the client side to avoid the leakage of the `API-Key`, which may lead to property loss.
  8. All API requests should include your `API-Key` in the **`Authorization`** HTTP Header, as shown below:
  9. <CodeGroup title="Code">
  10. ```javascript
  11. Authorization: Bearer {API_KEY}
  12. ```
  13. </CodeGroup>
  14. </div>
  15. ---
  16. <Heading
  17. url='/datasets/{dataset_id}/document/create_by_text'
  18. method='POST'
  19. title='Create a document from text'
  20. name='#create_by_text'
  21. />
  22. <Row>
  23. <Col>
  24. This api is based on an existing Knowledge and creates a new document through text based on this Knowledge.
  25. ### Params
  26. <Properties>
  27. <Property name='dataset_id' type='string' key='dataset_id'>
  28. Knowledge ID
  29. </Property>
  30. </Properties>
  31. ### Request Body
  32. <Properties>
  33. <Property name='name' type='string' key='name'>
  34. Document name
  35. </Property>
  36. <Property name='text' type='string' key='text'>
  37. Document content
  38. </Property>
  39. <Property name='indexing_technique' type='string' key='indexing_technique'>
  40. Index mode
  41. - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
  42. - <code>economy</code> Economy: Build using inverted index of Keyword Table Index
  43. </Property>
  44. <Property name='process_rule' type='object' key='process_rule'>
  45. Processing rules
  46. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  47. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  48. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  49. - <code>id</code> (string) Unique identifier for the preprocessing rule
  50. - enumerate
  51. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  52. - <code>remove_urls_emails</code> Delete URL, email address
  53. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  54. - <code>segmentation</code> (object) segmentation rules
  55. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  56. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  57. </Property>
  58. </Properties>
  59. </Col>
  60. <Col sticky>
  61. <CodeGroup
  62. title="Request"
  63. tag="POST"
  64. label="/datasets/{dataset_id}/document/create_by_text"
  65. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
  66. >
  67. ```bash {{ title: 'cURL' }}
  68. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
  69. --header 'Authorization: Bearer {api_key}' \
  70. --header 'Content-Type: application/json' \
  71. --data-raw '{
  72. "name": "text",
  73. "text": "text",
  74. "indexing_technique": "high_quality",
  75. "process_rule": {
  76. "mode": "automatic"
  77. }
  78. }'
  79. ```
  80. </CodeGroup>
  81. <CodeGroup title="Response">
  82. ```json {{ title: 'Response' }}
  83. {
  84. "document": {
  85. "id": "",
  86. "position": 1,
  87. "data_source_type": "upload_file",
  88. "data_source_info": {
  89. "upload_file_id": ""
  90. },
  91. "dataset_process_rule_id": "",
  92. "name": "text.txt",
  93. "created_from": "api",
  94. "created_by": "",
  95. "created_at": 1695690280,
  96. "tokens": 0,
  97. "indexing_status": "waiting",
  98. "error": null,
  99. "enabled": true,
  100. "disabled_at": null,
  101. "disabled_by": null,
  102. "archived": false,
  103. "display_status": "queuing",
  104. "word_count": 0,
  105. "hit_count": 0,
  106. "doc_form": "text_model"
  107. },
  108. "batch": ""
  109. }
  110. ```
  111. </CodeGroup>
  112. </Col>
  113. </Row>
  114. ---
  115. <Heading
  116. url='/datasets/{dataset_id}/document/create_by_file'
  117. method='POST'
  118. title='Create documents from files'
  119. name='#create_by_file'
  120. />
  121. <Row>
  122. <Col>
  123. This api is based on an existing Knowledge and creates a new document through a file based on this Knowledge.
  124. ### Params
  125. <Properties>
  126. <Property name='dataset_id' type='string' key='dataset_id'>
  127. Knowledge ID
  128. </Property>
  129. </Properties>
  130. ### Request Body
  131. <Properties>
  132. <Property name='original_document_id' type='string' key='original_document_id'>
  133. Source document ID (optional)
  134. - Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
  135. - The source document cannot be an archived document
  136. - When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by defaul
  137. - When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
  138. </Property>
  139. <Property name='file' type='multipart/form-data' key='file'>
  140. Files that need to be uploaded.
  141. </Property>
  142. <Property name='indexing_technique' type='string' key='indexing_technique'>
  143. Index mode
  144. - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
  145. - <code>economy</code> Economy: Build using inverted index of Keyword Table Index
  146. </Property>
  147. <Property name='process_rule' type='object' key='process_rule'>
  148. Processing rules
  149. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  150. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  151. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  152. - <code>id</code> (string) Unique identifier for the preprocessing rule
  153. - enumerate
  154. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  155. - <code>remove_urls_emails</code> Delete URL, email address
  156. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  157. - <code>segmentation</code> (object) segmentation rules
  158. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  159. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  160. </Property>
  161. </Properties>
  162. </Col>
  163. <Col sticky>
  164. <CodeGroup
  165. title="Request"
  166. tag="POST"
  167. label="/datasets/{dataset_id}/document/create_by_file"
  168. targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  169. >
  170. ```bash {{ title: 'cURL' }}
  171. curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
  172. --header 'Authorization: Bearer {api_key}' \
  173. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  174. --form 'file=@"/path/to/file"'
  175. ```
  176. </CodeGroup>
  177. <CodeGroup title="Response">
  178. ```json {{ title: 'Response' }}
  179. {
  180. "document": {
  181. "id": "",
  182. "position": 1,
  183. "data_source_type": "upload_file",
  184. "data_source_info": {
  185. "upload_file_id": ""
  186. },
  187. "dataset_process_rule_id": "",
  188. "name": "Dify.txt",
  189. "created_from": "api",
  190. "created_by": "",
  191. "created_at": 1695308667,
  192. "tokens": 0,
  193. "indexing_status": "waiting",
  194. "error": null,
  195. "enabled": true,
  196. "disabled_at": null,
  197. "disabled_by": null,
  198. "archived": false,
  199. "display_status": "queuing",
  200. "word_count": 0,
  201. "hit_count": 0,
  202. "doc_form": "text_model"
  203. },
  204. "batch": ""
  205. }
  206. ```
  207. </CodeGroup>
  208. </Col>
  209. </Row>
  210. ---
  211. <Heading
  212. url='/datasets'
  213. method='POST'
  214. title='Create an empty Knowledge'
  215. name='#create_empty_dataset'
  216. />
  217. <Row>
  218. <Col>
  219. ### Request Body
  220. <Properties>
  221. <Property name='name' type='string' key='name'>
  222. Knowledge name
  223. </Property>
  224. </Properties>
  225. </Col>
  226. <Col sticky>
  227. <CodeGroup
  228. title="Request"
  229. tag="POST"
  230. label="/datasets"
  231. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`}
  232. >
  233. ```bash {{ title: 'cURL' }}
  234. curl --location --request POST '${apiBaseUrl}/v1/datasets' \
  235. --header 'Authorization: Bearer {api_key}' \
  236. --header 'Content-Type: application/json' \
  237. --data-raw '{
  238. "name": "name"
  239. }'
  240. ```
  241. </CodeGroup>
  242. <CodeGroup title="Response">
  243. ```json {{ title: 'Response' }}
  244. {
  245. "id": "",
  246. "name": "name",
  247. "description": null,
  248. "provider": "vendor",
  249. "permission": "only_me",
  250. "data_source_type": null,
  251. "indexing_technique": null,
  252. "app_count": 0,
  253. "document_count": 0,
  254. "word_count": 0,
  255. "created_by": "",
  256. "created_at": 1695636173,
  257. "updated_by": "",
  258. "updated_at": 1695636173,
  259. "embedding_model": null,
  260. "embedding_model_provider": null,
  261. "embedding_available": null
  262. }
  263. ```
  264. </CodeGroup>
  265. </Col>
  266. </Row>
  267. ---
  268. <Heading
  269. url='/datasets'
  270. method='GET'
  271. title='Knowledge list'
  272. name='#dataset_list'
  273. />
  274. <Row>
  275. <Col>
  276. ### Query
  277. <Properties>
  278. <Property name='page' type='string' key='page'>
  279. Page number
  280. </Property>
  281. <Property name='limit' type='string' key='limit'>
  282. Number of items returned, default 20, range 1-100
  283. </Property>
  284. </Properties>
  285. </Col>
  286. <Col sticky>
  287. <CodeGroup
  288. title="Request"
  289. tag="POST"
  290. label="/datasets"
  291. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
  292. >
  293. ```bash {{ title: 'cURL' }}
  294. curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
  295. --header 'Authorization: Bearer {api_key}'
  296. ```
  297. </CodeGroup>
  298. <CodeGroup title="Response">
  299. ```json {{ title: 'Response' }}
  300. {
  301. "data": [
  302. {
  303. "id": "",
  304. "name": "name",
  305. "description": "desc",
  306. "permission": "only_me",
  307. "data_source_type": "upload_file",
  308. "indexing_technique": "",
  309. "app_count": 2,
  310. "document_count": 10,
  311. "word_count": 1200,
  312. "created_by": "",
  313. "created_at": "",
  314. "updated_by": "",
  315. "updated_at": ""
  316. },
  317. ...
  318. ],
  319. "has_more": true,
  320. "limit": 20,
  321. "total": 50,
  322. "page": 1
  323. }
  324. ```
  325. </CodeGroup>
  326. </Col>
  327. </Row>
  328. ---
  329. <Heading
  330. url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
  331. method='POST'
  332. title='Update document via text'
  333. name='#update_by_text'
  334. />
  335. <Row>
  336. <Col>
  337. This api is based on an existing Knowledge and updates the document through text based on this Knowledge.
  338. ### Params
  339. <Properties>
  340. <Property name='dataset_id' type='string' key='dataset_id'>
  341. Knowledge ID
  342. </Property>
  343. <Property name='document_id' type='string' key='document_id'>
  344. Document ID
  345. </Property>
  346. </Properties>
  347. ### Request Body
  348. <Properties>
  349. <Property name='name' type='string' key='name'>
  350. Document name (optional)
  351. </Property>
  352. <Property name='text' type='string' key='text'>
  353. Document content (optional)
  354. </Property>
  355. <Property name='process_rule' type='object' key='process_rule'>
  356. Processing rules
  357. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  358. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  359. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  360. - <code>id</code> (string) Unique identifier for the preprocessing rule
  361. - enumerate
  362. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  363. - <code>remove_urls_emails</code> Delete URL, email address
  364. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  365. - <code>segmentation</code> (object) segmentation rules
  366. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  367. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  368. </Property>
  369. </Properties>
  370. </Col>
  371. <Col sticky>
  372. <CodeGroup
  373. title="Request"
  374. tag="POST"
  375. label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
  376. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
  377. >
  378. ```bash {{ title: 'cURL' }}
  379. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
  380. --header 'Authorization: Bearer {api_key}' \
  381. --header 'Content-Type: application/json' \
  382. --data-raw '{
  383. "name": "name",
  384. "text": "text"
  385. }'
  386. ```
  387. </CodeGroup>
  388. <CodeGroup title="Response">
  389. ```json {{ title: 'Response' }}
  390. {
  391. "document": {
  392. "id": "",
  393. "position": 1,
  394. "data_source_type": "upload_file",
  395. "data_source_info": {
  396. "upload_file_id": ""
  397. },
  398. "dataset_process_rule_id": "",
  399. "name": "name.txt",
  400. "created_from": "api",
  401. "created_by": "",
  402. "created_at": 1695308667,
  403. "tokens": 0,
  404. "indexing_status": "waiting",
  405. "error": null,
  406. "enabled": true,
  407. "disabled_at": null,
  408. "disabled_by": null,
  409. "archived": false,
  410. "display_status": "queuing",
  411. "word_count": 0,
  412. "hit_count": 0,
  413. "doc_form": "text_model"
  414. },
  415. "batch": ""
  416. }
  417. ```
  418. </CodeGroup>
  419. </Col>
  420. </Row>
  421. ---
  422. <Heading
  423. url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
  424. method='POST'
  425. title='Update a document from a file'
  426. name='#update_by_file'
  427. />
  428. <Row>
  429. <Col>
  430. This api is based on an existing Knowledge, and updates documents through files based on this Knowledge
  431. ### Params
  432. <Properties>
  433. <Property name='dataset_id' type='string' key='dataset_id'>
  434. Knowledge ID
  435. </Property>
  436. <Property name='document_id' type='string' key='document_id'>
  437. Document ID
  438. </Property>
  439. </Properties>
  440. ### Request Body
  441. <Properties>
  442. <Property name='name' type='string' key='name'>
  443. Document name (optional)
  444. </Property>
  445. <Property name='file' type='multipart/form-data' key='file'>
  446. Files to be uploaded
  447. </Property>
  448. <Property name='process_rule' type='object' key='process_rule'>
  449. Processing rules
  450. - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
  451. - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
  452. - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
  453. - <code>id</code> (string) Unique identifier for the preprocessing rule
  454. - enumerate
  455. - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
  456. - <code>remove_urls_emails</code> Delete URL, email address
  457. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
  458. - <code>segmentation</code> (object) segmentation rules
  459. - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
  460. - <code>max_tokens</code> Maximum length (token) defaults to 1000
  461. </Property>
  462. </Properties>
  463. </Col>
  464. <Col sticky>
  465. <CodeGroup
  466. title="Request"
  467. tag="POST"
  468. label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
  469. targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
  470. >
  471. ```bash {{ title: 'cURL' }}
  472. curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \
  473. --header 'Authorization: Bearer {api_key}' \
  474. --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
  475. --form 'file=@"/path/to/file"'
  476. ```
  477. </CodeGroup>
  478. <CodeGroup title="Response">
  479. ```json {{ title: 'Response' }}
  480. {
  481. "document": {
  482. "id": "",
  483. "position": 1,
  484. "data_source_type": "upload_file",
  485. "data_source_info": {
  486. "upload_file_id": ""
  487. },
  488. "dataset_process_rule_id": "",
  489. "name": "Dify.txt",
  490. "created_from": "api",
  491. "created_by": "",
  492. "created_at": 1695308667,
  493. "tokens": 0,
  494. "indexing_status": "waiting",
  495. "error": null,
  496. "enabled": true,
  497. "disabled_at": null,
  498. "disabled_by": null,
  499. "archived": false,
  500. "display_status": "queuing",
  501. "word_count": 0,
  502. "hit_count": 0,
  503. "doc_form": "text_model"
  504. },
  505. "batch": "20230921150427533684"
  506. }
  507. ```
  508. </CodeGroup>
  509. </Col>
  510. </Row>
  511. ---
  512. <Heading
  513. url='/datasets/{dataset_id}/batch/{batch}/indexing-status'
  514. method='GET'
  515. title='Get document embedding status (progress)'
  516. name='#indexing_status'
  517. />
  518. <Row>
  519. <Col>
  520. ### Params
  521. <Properties>
  522. <Property name='dataset_id' type='string' key='dataset_id'>
  523. Knowledge ID
  524. </Property>
  525. <Property name='batch' type='string' key='batch'>
  526. Batch number of uploaded documents
  527. </Property>
  528. </Properties>
  529. </Col>
  530. <Col sticky>
  531. <CodeGroup
  532. title="Request"
  533. tag="GET"
  534. label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
  535. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
  536. >
  537. ```bash {{ title: 'cURL' }}
  538. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
  539. --header 'Authorization: Bearer {api_key}' \
  540. ```
  541. </CodeGroup>
  542. <CodeGroup title="Response">
  543. ```json {{ title: 'Response' }}
  544. {
  545. "data":[{
  546. "id": "",
  547. "indexing_status": "indexing",
  548. "processing_started_at": 1681623462.0,
  549. "parsing_completed_at": 1681623462.0,
  550. "cleaning_completed_at": 1681623462.0,
  551. "splitting_completed_at": 1681623462.0,
  552. "completed_at": null,
  553. "paused_at": null,
  554. "error": null,
  555. "stopped_at": null,
  556. "completed_segments": 24,
  557. "total_segments": 100
  558. }]
  559. }
  560. ```
  561. </CodeGroup>
  562. </Col>
  563. </Row>
  564. ---
  565. <Heading
  566. url='/datasets/{dataset_id}/documents/{document_id}'
  567. method='DELETE'
  568. title='Delete document'
  569. name='#delete_document'
  570. />
  571. <Row>
  572. <Col>
  573. ### Params
  574. <Properties>
  575. <Property name='dataset_id' type='string' key='dataset_id'>
  576. Knowledge ID
  577. </Property>
  578. <Property name='document_id' type='string' key='document_id'>
  579. Document ID
  580. </Property>
  581. </Properties>
  582. </Col>
  583. <Col sticky>
  584. <CodeGroup
  585. title="Request"
  586. tag="DELETE"
  587. label="/datasets/{dataset_id}/documents/{document_id}"
  588. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
  589. >
  590. ```bash {{ title: 'cURL' }}
  591. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
  592. --header 'Authorization: Bearer {api_key}' \
  593. ```
  594. </CodeGroup>
  595. <CodeGroup title="Response">
  596. ```json {{ title: 'Response' }}
  597. {
  598. "result": "success"
  599. }
  600. ```
  601. </CodeGroup>
  602. </Col>
  603. </Row>
  604. ---
  605. <Heading
  606. url='/datasets/{dataset_id}/documents'
  607. method='GET'
  608. title='Knowledge document list'
  609. name='#dataset_document_list'
  610. />
  611. <Row>
  612. <Col>
  613. ### Params
  614. <Properties>
  615. <Property name='dataset_id' type='string' key='dataset_id'>
  616. Knowledge ID
  617. </Property>
  618. </Properties>
  619. ### Query
  620. <Properties>
  621. <Property name='keyword' type='string' key='keyword'>
  622. Search keywords, currently only search document names(optional)
  623. </Property>
  624. <Property name='page' type='string' key='page'>
  625. Page number(optional)
  626. </Property>
  627. <Property name='limit' type='string' key='limit'>
  628. Number of items returned, default 20, range 1-100(optional)
  629. </Property>
  630. </Properties>
  631. </Col>
  632. <Col sticky>
  633. <CodeGroup
  634. title="Request"
  635. tag="GET"
  636. label="/datasets/{dataset_id}/documents"
  637. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
  638. >
  639. ```bash {{ title: 'cURL' }}
  640. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
  641. --header 'Authorization: Bearer {api_key}' \
  642. ```
  643. </CodeGroup>
  644. <CodeGroup title="Response">
  645. ```json {{ title: 'Response' }}
  646. {
  647. "data": [
  648. {
  649. "id": "",
  650. "position": 1,
  651. "data_source_type": "file_upload",
  652. "data_source_info": null,
  653. "dataset_process_rule_id": null,
  654. "name": "dify",
  655. "created_from": "",
  656. "created_by": "",
  657. "created_at": 1681623639,
  658. "tokens": 0,
  659. "indexing_status": "waiting",
  660. "error": null,
  661. "enabled": true,
  662. "disabled_at": null,
  663. "disabled_by": null,
  664. "archived": false
  665. },
  666. ],
  667. "has_more": false,
  668. "limit": 20,
  669. "total": 9,
  670. "page": 1
  671. }
  672. ```
  673. </CodeGroup>
  674. </Col>
  675. </Row>
  676. ---
  677. <Heading
  678. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  679. method='POST'
  680. title='Add segment'
  681. name='#create_new_segment'
  682. />
  683. <Row>
  684. <Col>
  685. ### Params
  686. <Properties>
  687. <Property name='dataset_id' type='string' key='dataset_id'>
  688. Knowledge ID
  689. </Property>
  690. <Property name='document_id' type='string' key='document_id'>
  691. Document ID
  692. </Property>
  693. </Properties>
  694. ### Request Body
  695. <Properties>
  696. <Property name='segments' type='object list' key='segments'>
  697. - <code>content</code> (text) Text content/question content, required
  698. - <code>answer</code> (text) Answer content, if the mode of the Knowledge is qa mode, pass the value(optional)
  699. - <code>keywords</code> (list) Keywords(optional)
  700. </Property>
  701. </Properties>
  702. </Col>
  703. <Col sticky>
  704. <CodeGroup
  705. title="Request"
  706. tag="POST"
  707. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  708. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
  709. >
  710. ```bash {{ title: 'cURL' }}
  711. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  712. --header 'Authorization: Bearer {api_key}' \
  713. --header 'Content-Type: application/json' \
  714. --data-raw '{
  715. "segments": [
  716. {
  717. "content": "1",
  718. "answer": "1",
  719. "keywords": ["a"]
  720. }
  721. ]
  722. }'
  723. ```
  724. </CodeGroup>
  725. <CodeGroup title="Response">
  726. ```json {{ title: 'Response' }}
  727. {
  728. "data": [{
  729. "id": "",
  730. "position": 1,
  731. "document_id": "",
  732. "content": "1",
  733. "answer": "1",
  734. "word_count": 25,
  735. "tokens": 0,
  736. "keywords": [
  737. "a"
  738. ],
  739. "index_node_id": "",
  740. "index_node_hash": "",
  741. "hit_count": 0,
  742. "enabled": true,
  743. "disabled_at": null,
  744. "disabled_by": null,
  745. "status": "completed",
  746. "created_by": "",
  747. "created_at": 1695312007,
  748. "indexing_at": 1695312007,
  749. "completed_at": 1695312007,
  750. "error": null,
  751. "stopped_at": null
  752. }],
  753. "doc_form": "text_model"
  754. }
  755. ```
  756. </CodeGroup>
  757. </Col>
  758. </Row>
  759. ---
  760. <Heading
  761. url='/datasets/{dataset_id}/documents/{document_id}/segments'
  762. method='GET'
  763. title='get documents segments'
  764. name='#get_segment'
  765. />
  766. <Row>
  767. <Col>
  768. ### Path
  769. <Properties>
  770. <Property name='dataset_id' type='string' key='dataset_id'>
  771. Knowledge ID
  772. </Property>
  773. <Property name='document_id' type='string' key='document_id'>
  774. Document ID
  775. </Property>
  776. </Properties>
  777. ### Query
  778. <Properties>
  779. <Property name='keyword' type='string' key='keyword'>
  780. keyword,choosable
  781. </Property>
  782. <Property name='status' type='string' key='status'>
  783. Search status,completed
  784. </Property>
  785. </Properties>
  786. </Col>
  787. <Col sticky>
  788. <CodeGroup
  789. title="Request"
  790. tag="GET"
  791. label="/datasets/{dataset_id}/documents/{document_id}/segments"
  792. targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  793. >
  794. ```bash {{ title: 'cURL' }}
  795. curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
  796. --header 'Authorization: Bearer {api_key}' \
  797. --header 'Content-Type: application/json'
  798. ```
  799. </CodeGroup>
  800. <CodeGroup title="Response">
  801. ```json {{ title: 'Response' }}
  802. {
  803. "data": [{
  804. "id": "",
  805. "position": 1,
  806. "document_id": "",
  807. "content": "1",
  808. "answer": "1",
  809. "word_count": 25,
  810. "tokens": 0,
  811. "keywords": [
  812. "a"
  813. ],
  814. "index_node_id": "",
  815. "index_node_hash": "",
  816. "hit_count": 0,
  817. "enabled": true,
  818. "disabled_at": null,
  819. "disabled_by": null,
  820. "status": "completed",
  821. "created_by": "",
  822. "created_at": 1695312007,
  823. "indexing_at": 1695312007,
  824. "completed_at": 1695312007,
  825. "error": null,
  826. "stopped_at": null
  827. }],
  828. "doc_form": "text_model"
  829. }
  830. ```
  831. </CodeGroup>
  832. </Col>
  833. </Row>
  834. ---
  835. <Heading
  836. url='/datasets/{dataset_id}/segments/{segment_id}'
  837. method='DELETE'
  838. title='delete document segment'
  839. name='#delete_segment'
  840. />
  841. <Row>
  842. <Col>
  843. ### Path
  844. <Properties>
  845. <Property name='dataset_id' type='string' key='dataset_id'>
  846. Knowledge ID
  847. </Property>
  848. <Property name='segment_id' type='string' key='segment_id'>
  849. Document Segment ID
  850. </Property>
  851. </Properties>
  852. </Col>
  853. <Col sticky>
  854. <CodeGroup
  855. title="Request"
  856. tag="DELETE"
  857. label="/datasets/{dataset_id}/segments/{segment_id}"
  858. targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
  859. >
  860. ```bash {{ title: 'cURL' }}
  861. curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \
  862. --header 'Authorization: Bearer {api_key}' \
  863. --header 'Content-Type: application/json'
  864. ```
  865. </CodeGroup>
  866. <CodeGroup title="Response">
  867. ```json {{ title: 'Response' }}
  868. {
  869. "result": "success"
  870. }
  871. ```
  872. </CodeGroup>
  873. </Col>
  874. </Row>
  875. ---
  876. <Heading
  877. url='/datasets/{dataset_id}/segments/{segment_id}'
  878. method='POST'
  879. title='update document segment'
  880. name='#update_segment'
  881. />
  882. <Row>
  883. <Col>
  884. ### POST
  885. <Properties>
  886. <Property name='dataset_id' type='string' key='dataset_id'>
  887. Knowledge ID
  888. </Property>
  889. <Property name='segment_id' type='string' key='segment_id'>
  890. Document Segment ID
  891. </Property>
  892. </Properties>
  893. ### Request Body
  894. <Properties>
  895. <Property name='segments' type='object list' key='segments'>
  896. - <code>content</code> (text) text content/question content,required
  897. - <code>answer</code> (text) Answer content, not required, passed if the Knowledge is in qa mode
  898. - <code>keywords</code> (list) keyword, not required
  899. - <code>enabled</code> (bool) false/true, not required
  900. </Property>
  901. </Properties>
  902. </Col>
  903. <Col sticky>
  904. <CodeGroup
  905. title="Request"
  906. tag="POST"
  907. label="/datasets/{dataset_id}/segments/{segment_id}"
  908. targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
  909. >
  910. ```bash {{ title: 'cURL' }}
  911. curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
  912. --header 'Content-Type: application/json' \
  913. --data-raw '{
  914. "segments": {
  915. "content": "1",
  916. "answer": "1",
  917. "keywords": ["a"],
  918. "enabled": false
  919. }
  920. }'
  921. ```
  922. </CodeGroup>
  923. <CodeGroup title="Response">
  924. ```json {{ title: 'Response' }}
  925. {
  926. "data": [{
  927. "id": "",
  928. "position": 1,
  929. "document_id": "",
  930. "content": "1",
  931. "answer": "1",
  932. "word_count": 25,
  933. "tokens": 0,
  934. "keywords": [
  935. "a"
  936. ],
  937. "index_node_id": "",
  938. "index_node_hash": "",
  939. "hit_count": 0,
  940. "enabled": true,
  941. "disabled_at": null,
  942. "disabled_by": null,
  943. "status": "completed",
  944. "created_by": "",
  945. "created_at": 1695312007,
  946. "indexing_at": 1695312007,
  947. "completed_at": 1695312007,
  948. "error": null,
  949. "stopped_at": null
  950. }],
  951. "doc_form": "text_model"
  952. }
  953. ```
  954. </CodeGroup>
  955. </Col>
  956. </Row>
  957. ---
  958. <Row>
  959. <Col>
  960. ### Error message
  961. <Properties>
  962. <Property name='code' type='string' key='code'>
  963. Error code
  964. </Property>
  965. </Properties>
  966. <Properties>
  967. <Property name='status' type='number' key='status'>
  968. Error status
  969. </Property>
  970. </Properties>
  971. <Properties>
  972. <Property name='message' type='string' key='message'>
  973. Error message
  974. </Property>
  975. </Properties>
  976. </Col>
  977. <Col>
  978. <CodeGroup title="Example">
  979. ```json {{ title: 'Response' }}
  980. {
  981. "code": "no_file_uploaded",
  982. "message": "Please upload your file.",
  983. "status": 400
  984. }
  985. ```
  986. </CodeGroup>
  987. </Col>
  988. </Row>
  989. <table className="max-w-auto border-collapse border border-slate-400" style={{ maxWidth: 'none', width: 'auto' }}>
  990. <thead style={{ background: '#f9fafc' }}>
  991. <tr>
  992. <th className="p-2 border border-slate-300">code</th>
  993. <th className="p-2 border border-slate-300">status</th>
  994. <th className="p-2 border border-slate-300">message</th>
  995. </tr>
  996. </thead>
  997. <tbody>
  998. <tr>
  999. <td className="p-2 border border-slate-300">no_file_uploaded</td>
  1000. <td className="p-2 border border-slate-300">400</td>
  1001. <td className="p-2 border border-slate-300">Please upload your file.</td>
  1002. </tr>
  1003. <tr>
  1004. <td className="p-2 border border-slate-300">too_many_files</td>
  1005. <td className="p-2 border border-slate-300">400</td>
  1006. <td className="p-2 border border-slate-300">Only one file is allowed.</td>
  1007. </tr>
  1008. <tr>
  1009. <td className="p-2 border border-slate-300">file_too_large</td>
  1010. <td className="p-2 border border-slate-300">413</td>
  1011. <td className="p-2 border border-slate-300">File size exceeded.</td>
  1012. </tr>
  1013. <tr>
  1014. <td className="p-2 border border-slate-300">unsupported_file_type</td>
  1015. <td className="p-2 border border-slate-300">415</td>
  1016. <td className="p-2 border border-slate-300">File type not allowed.</td>
  1017. </tr>
  1018. <tr>
  1019. <td className="p-2 border border-slate-300">high_quality_dataset_only</td>
  1020. <td className="p-2 border border-slate-300">400</td>
  1021. <td className="p-2 border border-slate-300">Current operation only supports 'high-quality' datasets.</td>
  1022. </tr>
  1023. <tr>
  1024. <td className="p-2 border border-slate-300">dataset_not_initialized</td>
  1025. <td className="p-2 border border-slate-300">400</td>
  1026. <td className="p-2 border border-slate-300">The dataset is still being initialized or indexing. Please wait a moment.</td>
  1027. </tr>
  1028. <tr>
  1029. <td className="p-2 border border-slate-300">archived_document_immutable</td>
  1030. <td className="p-2 border border-slate-300">403</td>
  1031. <td className="p-2 border border-slate-300">The archived document is not editable.</td>
  1032. </tr>
  1033. <tr>
  1034. <td className="p-2 border border-slate-300">dataset_name_duplicate</td>
  1035. <td className="p-2 border border-slate-300">409</td>
  1036. <td className="p-2 border border-slate-300">The dataset name already exists. Please modify your dataset name.</td>
  1037. </tr>
  1038. <tr>
  1039. <td className="p-2 border border-slate-300">invalid_action</td>
  1040. <td className="p-2 border border-slate-300">400</td>
  1041. <td className="p-2 border border-slate-300">Invalid action.</td>
  1042. </tr>
  1043. <tr>
  1044. <td className="p-2 border border-slate-300">document_already_finished</td>
  1045. <td className="p-2 border border-slate-300">400</td>
  1046. <td className="p-2 border border-slate-300">The document has been processed. Please refresh the page or go to the document details.</td>
  1047. </tr>
  1048. <tr>
  1049. <td className="p-2 border border-slate-300">document_indexing</td>
  1050. <td className="p-2 border border-slate-300">400</td>
  1051. <td className="p-2 border border-slate-300">The document is being processed and cannot be edited.</td>
  1052. </tr>
  1053. <tr>
  1054. <td className="p-2 border border-slate-300">invalid_metadata</td>
  1055. <td className="p-2 border border-slate-300">400</td>
  1056. <td className="p-2 border border-slate-300">The metadata content is incorrect. Please check and verify.</td>
  1057. </tr>
  1058. </tbody>
  1059. </table>
  1060. <div className="pb-4" />