commands.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. import base64
  2. import datetime
  3. import json
  4. import math
  5. import random
  6. import secrets
  7. import string
  8. import threading
  9. import time
  10. import uuid
  11. import click
  12. import qdrant_client
  13. from constants.languages import user_input_form_template
  14. from core.embedding.cached_embedding import CacheEmbedding
  15. from core.index.index import IndexBuilder
  16. from core.model_manager import ModelManager
  17. from core.model_runtime.entities.model_entities import ModelType
  18. from extensions.ext_database import db
  19. from flask import Flask, current_app
  20. from libs.helper import email as email_validate
  21. from libs.password import hash_password, password_pattern, valid_password
  22. from libs.rsa import generate_key_pair
  23. from models.account import InvitationCode, Tenant, TenantAccountJoin
  24. from models.dataset import Dataset, DatasetCollectionBinding, DatasetQuery, Document
  25. from models.model import Account, App, AppModelConfig, Message, MessageAnnotation, InstalledApp
  26. from models.provider import Provider, ProviderModel, ProviderQuotaType, ProviderType
  27. from qdrant_client.http.models import TextIndexParams, TextIndexType, TokenizerType
  28. from tqdm import tqdm
  29. from werkzeug.exceptions import NotFound
  30. @click.command('reset-password', help='Reset the account password.')
  31. @click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
  32. @click.option('--new-password', prompt=True, help='the new password.')
  33. @click.option('--password-confirm', prompt=True, help='the new password confirm.')
  34. def reset_password(email, new_password, password_confirm):
  35. if str(new_password).strip() != str(password_confirm).strip():
  36. click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
  37. return
  38. account = db.session.query(Account). \
  39. filter(Account.email == email). \
  40. one_or_none()
  41. if not account:
  42. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  43. return
  44. try:
  45. valid_password(new_password)
  46. except:
  47. click.echo(
  48. click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
  49. return
  50. # generate password salt
  51. salt = secrets.token_bytes(16)
  52. base64_salt = base64.b64encode(salt).decode()
  53. # encrypt password with salt
  54. password_hashed = hash_password(new_password, salt)
  55. base64_password_hashed = base64.b64encode(password_hashed).decode()
  56. account.password = base64_password_hashed
  57. account.password_salt = base64_salt
  58. db.session.commit()
  59. click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
  60. @click.command('reset-email', help='Reset the account email.')
  61. @click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
  62. @click.option('--new-email', prompt=True, help='the new email.')
  63. @click.option('--email-confirm', prompt=True, help='the new email confirm.')
  64. def reset_email(email, new_email, email_confirm):
  65. if str(new_email).strip() != str(email_confirm).strip():
  66. click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
  67. return
  68. account = db.session.query(Account). \
  69. filter(Account.email == email). \
  70. one_or_none()
  71. if not account:
  72. click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
  73. return
  74. try:
  75. email_validate(new_email)
  76. except:
  77. click.echo(
  78. click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
  79. return
  80. account.email = new_email
  81. db.session.commit()
  82. click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
  83. @click.command('reset-encrypt-key-pair', help='Reset the asymmetric key pair of workspace for encrypt LLM credentials. '
  84. 'After the reset, all LLM credentials will become invalid, '
  85. 'requiring re-entry.'
  86. 'Only support SELF_HOSTED mode.')
  87. @click.confirmation_option(prompt=click.style('Are you sure you want to reset encrypt key pair?'
  88. ' this operation cannot be rolled back!', fg='red'))
  89. def reset_encrypt_key_pair():
  90. if current_app.config['EDITION'] != 'SELF_HOSTED':
  91. click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
  92. return
  93. tenant = db.session.query(Tenant).first()
  94. if not tenant:
  95. click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
  96. return
  97. tenant.encrypt_public_key = generate_key_pair(tenant.id)
  98. db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
  99. db.session.query(ProviderModel).delete()
  100. db.session.commit()
  101. click.echo(click.style('Congratulations! '
  102. 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
  103. @click.command('generate-invitation-codes', help='Generate invitation codes.')
  104. @click.option('--batch', help='The batch of invitation codes.')
  105. @click.option('--count', prompt=True, help='Invitation codes count.')
  106. def generate_invitation_codes(batch, count):
  107. if not batch:
  108. now = datetime.datetime.now()
  109. batch = now.strftime('%Y%m%d%H%M%S')
  110. if not count or int(count) <= 0:
  111. click.echo(click.style('sorry. the count must be greater than 0.', fg='red'))
  112. return
  113. count = int(count)
  114. click.echo('Start generate {} invitation codes for batch {}.'.format(count, batch))
  115. codes = ''
  116. for i in range(count):
  117. code = generate_invitation_code()
  118. invitation_code = InvitationCode(
  119. code=code,
  120. batch=batch
  121. )
  122. db.session.add(invitation_code)
  123. click.echo(code)
  124. codes += code + "\n"
  125. db.session.commit()
  126. filename = 'storage/invitation-codes-{}.txt'.format(batch)
  127. with open(filename, 'w') as f:
  128. f.write(codes)
  129. click.echo(click.style(
  130. 'Congratulations! Generated {} invitation codes for batch {} and saved to the file \'{}\''.format(count, batch,
  131. filename),
  132. fg='green'))
  133. def generate_invitation_code():
  134. code = generate_upper_string()
  135. while db.session.query(InvitationCode).filter(InvitationCode.code == code).count() > 0:
  136. code = generate_upper_string()
  137. return code
  138. def generate_upper_string():
  139. letters_digits = string.ascii_uppercase + string.digits
  140. result = ""
  141. for i in range(8):
  142. result += random.choice(letters_digits)
  143. return result
  144. @click.command('recreate-all-dataset-indexes', help='Recreate all dataset indexes.')
  145. def recreate_all_dataset_indexes():
  146. click.echo(click.style('Start recreate all dataset indexes.', fg='green'))
  147. recreate_count = 0
  148. page = 1
  149. while True:
  150. try:
  151. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  152. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  153. except NotFound:
  154. break
  155. page += 1
  156. for dataset in datasets:
  157. try:
  158. click.echo('Recreating dataset index: {}'.format(dataset.id))
  159. index = IndexBuilder.get_index(dataset, 'high_quality')
  160. if index and index._is_origin():
  161. index.recreate_dataset(dataset)
  162. recreate_count += 1
  163. else:
  164. click.echo('passed.')
  165. except Exception as e:
  166. click.echo(
  167. click.style('Recreate dataset index error: {} {}'.format(e.__class__.__name__, str(e)), fg='red'))
  168. continue
  169. click.echo(click.style('Congratulations! Recreate {} dataset indexes.'.format(recreate_count), fg='green'))
  170. @click.command('clean-unused-dataset-indexes', help='Clean unused dataset indexes.')
  171. def clean_unused_dataset_indexes():
  172. click.echo(click.style('Start clean unused dataset indexes.', fg='green'))
  173. clean_days = int(current_app.config.get('CLEAN_DAY_SETTING'))
  174. start_at = time.perf_counter()
  175. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  176. page = 1
  177. while True:
  178. try:
  179. datasets = db.session.query(Dataset).filter(Dataset.created_at < thirty_days_ago) \
  180. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  181. except NotFound:
  182. break
  183. page += 1
  184. for dataset in datasets:
  185. dataset_query = db.session.query(DatasetQuery).filter(
  186. DatasetQuery.created_at > thirty_days_ago,
  187. DatasetQuery.dataset_id == dataset.id
  188. ).all()
  189. if not dataset_query or len(dataset_query) == 0:
  190. documents = db.session.query(Document).filter(
  191. Document.dataset_id == dataset.id,
  192. Document.indexing_status == 'completed',
  193. Document.enabled == True,
  194. Document.archived == False,
  195. Document.updated_at > thirty_days_ago
  196. ).all()
  197. if not documents or len(documents) == 0:
  198. try:
  199. # remove index
  200. vector_index = IndexBuilder.get_index(dataset, 'high_quality')
  201. kw_index = IndexBuilder.get_index(dataset, 'economy')
  202. # delete from vector index
  203. if vector_index:
  204. if dataset.collection_binding_id:
  205. vector_index.delete_by_group_id(dataset.id)
  206. else:
  207. if dataset.collection_binding_id:
  208. vector_index.delete_by_group_id(dataset.id)
  209. else:
  210. vector_index.delete()
  211. kw_index.delete()
  212. # update document
  213. update_params = {
  214. Document.enabled: False
  215. }
  216. Document.query.filter_by(dataset_id=dataset.id).update(update_params)
  217. db.session.commit()
  218. click.echo(click.style('Cleaned unused dataset {} from db success!'.format(dataset.id),
  219. fg='green'))
  220. except Exception as e:
  221. click.echo(
  222. click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  223. fg='red'))
  224. end_at = time.perf_counter()
  225. click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green'))
  226. @click.command('sync-anthropic-hosted-providers', help='Sync anthropic hosted providers.')
  227. def sync_anthropic_hosted_providers():
  228. if not hosted_model_providers.anthropic:
  229. click.echo(click.style('Anthropic hosted provider is not configured.', fg='red'))
  230. return
  231. click.echo(click.style('Start sync anthropic hosted providers.', fg='green'))
  232. count = 0
  233. new_quota_limit = hosted_model_providers.anthropic.quota_limit
  234. page = 1
  235. while True:
  236. try:
  237. providers = db.session.query(Provider).filter(
  238. Provider.provider_name == 'anthropic',
  239. Provider.provider_type == ProviderType.SYSTEM.value,
  240. Provider.quota_type == ProviderQuotaType.TRIAL.value,
  241. Provider.quota_limit != new_quota_limit
  242. ).order_by(Provider.created_at.desc()).paginate(page=page, per_page=100)
  243. except NotFound:
  244. break
  245. page += 1
  246. for provider in providers:
  247. try:
  248. click.echo('Syncing tenant anthropic hosted provider: {}, origin: limit {}, used {}'
  249. .format(provider.tenant_id, provider.quota_limit, provider.quota_used))
  250. original_quota_limit = provider.quota_limit
  251. division = math.ceil(new_quota_limit / 1000)
  252. provider.quota_limit = new_quota_limit if original_quota_limit == 1000 \
  253. else original_quota_limit * division
  254. provider.quota_used = division * provider.quota_used
  255. db.session.commit()
  256. count += 1
  257. except Exception as e:
  258. click.echo(click.style(
  259. 'Sync tenant anthropic hosted provider error: {} {}'.format(e.__class__.__name__, str(e)),
  260. fg='red'))
  261. continue
  262. click.echo(click.style('Congratulations! Synced {} anthropic hosted providers.'.format(count), fg='green'))
  263. @click.command('create-qdrant-indexes', help='Create qdrant indexes.')
  264. def create_qdrant_indexes():
  265. click.echo(click.style('Start create qdrant indexes.', fg='green'))
  266. create_count = 0
  267. page = 1
  268. while True:
  269. try:
  270. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  271. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  272. except NotFound:
  273. break
  274. model_manager = ModelManager()
  275. page += 1
  276. for dataset in datasets:
  277. if dataset.index_struct_dict:
  278. if dataset.index_struct_dict['type'] != 'qdrant':
  279. try:
  280. click.echo('Create dataset qdrant index: {}'.format(dataset.id))
  281. try:
  282. embedding_model = model_manager.get_model_instance(
  283. tenant_id=dataset.tenant_id,
  284. provider=dataset.embedding_model_provider,
  285. model_type=ModelType.TEXT_EMBEDDING,
  286. model=dataset.embedding_model
  287. )
  288. except Exception:
  289. try:
  290. embedding_model = model_manager.get_default_model_instance(
  291. tenant_id=dataset.tenant_id,
  292. model_type=ModelType.TEXT_EMBEDDING,
  293. )
  294. dataset.embedding_model = embedding_model.model
  295. dataset.embedding_model_provider = embedding_model.provider
  296. except Exception:
  297. provider = Provider(
  298. id='provider_id',
  299. tenant_id=dataset.tenant_id,
  300. provider_name='openai',
  301. provider_type=ProviderType.SYSTEM.value,
  302. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  303. is_valid=True,
  304. )
  305. model_provider = OpenAIProvider(provider=provider)
  306. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  307. model_provider=model_provider)
  308. embeddings = CacheEmbedding(embedding_model)
  309. from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex
  310. index = QdrantVectorIndex(
  311. dataset=dataset,
  312. config=QdrantConfig(
  313. endpoint=current_app.config.get('QDRANT_URL'),
  314. api_key=current_app.config.get('QDRANT_API_KEY'),
  315. root_path=current_app.root_path
  316. ),
  317. embeddings=embeddings
  318. )
  319. if index:
  320. index.create_qdrant_dataset(dataset)
  321. index_struct = {
  322. "type": 'qdrant',
  323. "vector_store": {
  324. "class_prefix": dataset.index_struct_dict['vector_store']['class_prefix']}
  325. }
  326. dataset.index_struct = json.dumps(index_struct)
  327. db.session.commit()
  328. create_count += 1
  329. else:
  330. click.echo('passed.')
  331. except Exception as e:
  332. click.echo(
  333. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  334. fg='red'))
  335. continue
  336. click.echo(click.style('Congratulations! Create {} dataset indexes.'.format(create_count), fg='green'))
  337. @click.command('update-qdrant-indexes', help='Update qdrant indexes.')
  338. def update_qdrant_indexes():
  339. click.echo(click.style('Start Update qdrant indexes.', fg='green'))
  340. create_count = 0
  341. page = 1
  342. while True:
  343. try:
  344. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  345. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
  346. except NotFound:
  347. break
  348. page += 1
  349. for dataset in datasets:
  350. if dataset.index_struct_dict:
  351. if dataset.index_struct_dict['type'] != 'qdrant':
  352. try:
  353. click.echo('Update dataset qdrant index: {}'.format(dataset.id))
  354. try:
  355. embedding_model = ModelFactory.get_embedding_model(
  356. tenant_id=dataset.tenant_id,
  357. model_provider_name=dataset.embedding_model_provider,
  358. model_name=dataset.embedding_model
  359. )
  360. except Exception:
  361. provider = Provider(
  362. id='provider_id',
  363. tenant_id=dataset.tenant_id,
  364. provider_name='openai',
  365. provider_type=ProviderType.CUSTOM.value,
  366. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  367. is_valid=True,
  368. )
  369. model_provider = OpenAIProvider(provider=provider)
  370. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  371. model_provider=model_provider)
  372. embeddings = CacheEmbedding(embedding_model)
  373. from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex
  374. index = QdrantVectorIndex(
  375. dataset=dataset,
  376. config=QdrantConfig(
  377. endpoint=current_app.config.get('QDRANT_URL'),
  378. api_key=current_app.config.get('QDRANT_API_KEY'),
  379. root_path=current_app.root_path
  380. ),
  381. embeddings=embeddings
  382. )
  383. if index:
  384. index.update_qdrant_dataset(dataset)
  385. create_count += 1
  386. else:
  387. click.echo('passed.')
  388. except Exception as e:
  389. click.echo(
  390. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  391. fg='red'))
  392. continue
  393. click.echo(click.style('Congratulations! Update {} dataset indexes.'.format(create_count), fg='green'))
  394. @click.command('normalization-collections', help='restore all collections in one')
  395. def normalization_collections():
  396. click.echo(click.style('Start normalization collections.', fg='green'))
  397. normalization_count = []
  398. page = 1
  399. while True:
  400. try:
  401. datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
  402. .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=100)
  403. except NotFound:
  404. break
  405. datasets_result = datasets.items
  406. page += 1
  407. for i in range(0, len(datasets_result), 5):
  408. threads = []
  409. sub_datasets = datasets_result[i:i + 5]
  410. for dataset in sub_datasets:
  411. document_format_thread = threading.Thread(target=deal_dataset_vector, kwargs={
  412. 'flask_app': current_app._get_current_object(),
  413. 'dataset': dataset,
  414. 'normalization_count': normalization_count
  415. })
  416. threads.append(document_format_thread)
  417. document_format_thread.start()
  418. for thread in threads:
  419. thread.join()
  420. click.echo(click.style('Congratulations! restore {} dataset indexes.'.format(len(normalization_count)), fg='green'))
  421. @click.command('add-qdrant-full-text-index', help='add qdrant full text index')
  422. def add_qdrant_full_text_index():
  423. click.echo(click.style('Start add full text index.', fg='green'))
  424. binds = db.session.query(DatasetCollectionBinding).all()
  425. if binds and current_app.config['VECTOR_STORE'] == 'qdrant':
  426. qdrant_url = current_app.config['QDRANT_URL']
  427. qdrant_api_key = current_app.config['QDRANT_API_KEY']
  428. client = qdrant_client.QdrantClient(
  429. qdrant_url,
  430. api_key=qdrant_api_key, # For Qdrant Cloud, None for local instance
  431. )
  432. for bind in binds:
  433. try:
  434. text_index_params = TextIndexParams(
  435. type=TextIndexType.TEXT,
  436. tokenizer=TokenizerType.MULTILINGUAL,
  437. min_token_len=2,
  438. max_token_len=20,
  439. lowercase=True
  440. )
  441. client.create_payload_index(bind.collection_name, 'page_content',
  442. field_schema=text_index_params)
  443. except Exception as e:
  444. click.echo(
  445. click.style('Create full text index error: {} {}'.format(e.__class__.__name__, str(e)),
  446. fg='red'))
  447. click.echo(
  448. click.style(
  449. 'Congratulations! add collection {} full text index successful.'.format(bind.collection_name),
  450. fg='green'))
  451. def deal_dataset_vector(flask_app: Flask, dataset: Dataset, normalization_count: list):
  452. with flask_app.app_context():
  453. try:
  454. click.echo('restore dataset index: {}'.format(dataset.id))
  455. try:
  456. embedding_model = ModelFactory.get_embedding_model(
  457. tenant_id=dataset.tenant_id,
  458. model_provider_name=dataset.embedding_model_provider,
  459. model_name=dataset.embedding_model
  460. )
  461. except Exception:
  462. provider = Provider(
  463. id='provider_id',
  464. tenant_id=dataset.tenant_id,
  465. provider_name='openai',
  466. provider_type=ProviderType.CUSTOM.value,
  467. encrypted_config=json.dumps({'openai_api_key': 'TEST'}),
  468. is_valid=True,
  469. )
  470. model_provider = OpenAIProvider(provider=provider)
  471. embedding_model = OpenAIEmbedding(name="text-embedding-ada-002",
  472. model_provider=model_provider)
  473. embeddings = CacheEmbedding(embedding_model)
  474. dataset_collection_binding = db.session.query(DatasetCollectionBinding). \
  475. filter(DatasetCollectionBinding.provider_name == embedding_model.model_provider.provider_name,
  476. DatasetCollectionBinding.model_name == embedding_model.name). \
  477. order_by(DatasetCollectionBinding.created_at). \
  478. first()
  479. if not dataset_collection_binding:
  480. dataset_collection_binding = DatasetCollectionBinding(
  481. provider_name=embedding_model.model_provider.provider_name,
  482. model_name=embedding_model.name,
  483. collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node'
  484. )
  485. db.session.add(dataset_collection_binding)
  486. db.session.commit()
  487. from core.index.vector_index.qdrant_vector_index import QdrantConfig, QdrantVectorIndex
  488. index = QdrantVectorIndex(
  489. dataset=dataset,
  490. config=QdrantConfig(
  491. endpoint=current_app.config.get('QDRANT_URL'),
  492. api_key=current_app.config.get('QDRANT_API_KEY'),
  493. root_path=current_app.root_path
  494. ),
  495. embeddings=embeddings
  496. )
  497. if index:
  498. # index.delete_by_group_id(dataset.id)
  499. index.restore_dataset_in_one(dataset, dataset_collection_binding)
  500. else:
  501. click.echo('passed.')
  502. normalization_count.append(1)
  503. except Exception as e:
  504. click.echo(
  505. click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
  506. fg='red'))
  507. @click.command('update_app_model_configs', help='Migrate data to support paragraph variable.')
  508. @click.option("--batch-size", default=500, help="Number of records to migrate in each batch.")
  509. def update_app_model_configs(batch_size):
  510. pre_prompt_template = '{{default_input}}'
  511. click.secho("Start migrate old data that the text generator can support paragraph variable.", fg='green')
  512. total_records = db.session.query(AppModelConfig) \
  513. .join(App, App.app_model_config_id == AppModelConfig.id) \
  514. .filter(App.mode == 'completion') \
  515. .count()
  516. if total_records == 0:
  517. click.secho("No data to migrate.", fg='green')
  518. return
  519. num_batches = (total_records + batch_size - 1) // batch_size
  520. with tqdm(total=total_records, desc="Migrating Data") as pbar:
  521. for i in range(num_batches):
  522. offset = i * batch_size
  523. limit = min(batch_size, total_records - offset)
  524. click.secho(f"Fetching batch {i + 1}/{num_batches} from source database...", fg='green')
  525. data_batch = db.session.query(AppModelConfig) \
  526. .join(App, App.app_model_config_id == AppModelConfig.id) \
  527. .filter(App.mode == 'completion') \
  528. .order_by(App.created_at) \
  529. .offset(offset).limit(limit).all()
  530. if not data_batch:
  531. click.secho("No more data to migrate.", fg='green')
  532. break
  533. try:
  534. click.secho(f"Migrating {len(data_batch)} records...", fg='green')
  535. for data in data_batch:
  536. # click.secho(f"Migrating data {data.id}, pre_prompt: {data.pre_prompt}, user_input_form: {data.user_input_form}", fg='green')
  537. if data.pre_prompt is None:
  538. data.pre_prompt = pre_prompt_template
  539. else:
  540. if pre_prompt_template in data.pre_prompt:
  541. continue
  542. data.pre_prompt += pre_prompt_template
  543. app_data = db.session.query(App) \
  544. .filter(App.id == data.app_id) \
  545. .one()
  546. account_data = db.session.query(Account) \
  547. .join(TenantAccountJoin, Account.id == TenantAccountJoin.account_id) \
  548. .filter(TenantAccountJoin.role == 'owner') \
  549. .filter(TenantAccountJoin.tenant_id == app_data.tenant_id) \
  550. .one_or_none()
  551. if not account_data:
  552. continue
  553. if data.user_input_form is None or data.user_input_form == 'null':
  554. data.user_input_form = json.dumps(user_input_form_template[account_data.interface_language])
  555. else:
  556. raw_json_data = json.loads(data.user_input_form)
  557. raw_json_data.append(user_input_form_template[account_data.interface_language][0])
  558. data.user_input_form = json.dumps(raw_json_data)
  559. # click.secho(f"Updated data {data.id}, pre_prompt: {data.pre_prompt}, user_input_form: {data.user_input_form}", fg='green')
  560. db.session.commit()
  561. except Exception as e:
  562. click.secho(f"Error while migrating data: {e}, app_id: {data.app_id}, app_model_config_id: {data.id}",
  563. fg='red')
  564. continue
  565. click.secho(f"Successfully migrated batch {i + 1}/{num_batches}.", fg='green')
  566. pbar.update(len(data_batch))
  567. @click.command('migrate_default_input_to_dataset_query_variable')
  568. @click.option("--batch-size", default=500, help="Number of records to migrate in each batch.")
  569. def migrate_default_input_to_dataset_query_variable(batch_size):
  570. click.secho("Starting...", fg='green')
  571. total_records = db.session.query(AppModelConfig) \
  572. .join(App, App.app_model_config_id == AppModelConfig.id) \
  573. .filter(App.mode == 'completion') \
  574. .filter(AppModelConfig.dataset_query_variable == None) \
  575. .count()
  576. if total_records == 0:
  577. click.secho("No data to migrate.", fg='green')
  578. return
  579. num_batches = (total_records + batch_size - 1) // batch_size
  580. with tqdm(total=total_records, desc="Migrating Data") as pbar:
  581. for i in range(num_batches):
  582. offset = i * batch_size
  583. limit = min(batch_size, total_records - offset)
  584. click.secho(f"Fetching batch {i + 1}/{num_batches} from source database...", fg='green')
  585. data_batch = db.session.query(AppModelConfig) \
  586. .join(App, App.app_model_config_id == AppModelConfig.id) \
  587. .filter(App.mode == 'completion') \
  588. .filter(AppModelConfig.dataset_query_variable == None) \
  589. .order_by(App.created_at) \
  590. .offset(offset).limit(limit).all()
  591. if not data_batch:
  592. click.secho("No more data to migrate.", fg='green')
  593. break
  594. try:
  595. click.secho(f"Migrating {len(data_batch)} records...", fg='green')
  596. for data in data_batch:
  597. config = AppModelConfig.to_dict(data)
  598. tools = config["agent_mode"]["tools"]
  599. dataset_exists = "dataset" in str(tools)
  600. if not dataset_exists:
  601. continue
  602. user_input_form = config.get("user_input_form", [])
  603. for form in user_input_form:
  604. paragraph = form.get('paragraph')
  605. if paragraph \
  606. and paragraph.get('variable') == 'query':
  607. data.dataset_query_variable = 'query'
  608. break
  609. if paragraph \
  610. and paragraph.get('variable') == 'default_input':
  611. data.dataset_query_variable = 'default_input'
  612. break
  613. db.session.commit()
  614. except Exception as e:
  615. click.secho(f"Error while migrating data: {e}, app_id: {data.app_id}, app_model_config_id: {data.id}",
  616. fg='red')
  617. continue
  618. click.secho(f"Successfully migrated batch {i + 1}/{num_batches}.", fg='green')
  619. pbar.update(len(data_batch))
  620. @click.command('add-annotation-question-field-value', help='add annotation question value')
  621. def add_annotation_question_field_value():
  622. click.echo(click.style('Start add annotation question value.', fg='green'))
  623. message_annotations = db.session.query(MessageAnnotation).all()
  624. message_annotation_deal_count = 0
  625. if message_annotations:
  626. for message_annotation in message_annotations:
  627. try:
  628. if message_annotation.message_id and not message_annotation.question:
  629. message = db.session.query(Message).filter(
  630. Message.id == message_annotation.message_id
  631. ).first()
  632. message_annotation.question = message.query
  633. db.session.add(message_annotation)
  634. db.session.commit()
  635. message_annotation_deal_count += 1
  636. except Exception as e:
  637. click.echo(
  638. click.style('Add annotation question value error: {} {}'.format(e.__class__.__name__, str(e)),
  639. fg='red'))
  640. click.echo(
  641. click.style(f'Congratulations! add annotation question value successful. Deal count {message_annotation_deal_count}', fg='green'))
  642. def register_commands(app):
  643. app.cli.add_command(reset_password)
  644. app.cli.add_command(reset_email)
  645. app.cli.add_command(generate_invitation_codes)
  646. app.cli.add_command(reset_encrypt_key_pair)
  647. app.cli.add_command(recreate_all_dataset_indexes)
  648. app.cli.add_command(sync_anthropic_hosted_providers)
  649. app.cli.add_command(clean_unused_dataset_indexes)
  650. app.cli.add_command(create_qdrant_indexes)
  651. app.cli.add_command(update_qdrant_indexes)
  652. app.cli.add_command(update_app_model_configs)
  653. app.cli.add_command(normalization_collections)
  654. app.cli.add_command(migrate_default_input_to_dataset_query_variable)
  655. app.cli.add_command(add_qdrant_full_text_index)
  656. app.cli.add_command(add_annotation_question_field_value)