123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- import base64
- import json
- import secrets
- import click
- from flask import current_app
- from werkzeug.exceptions import NotFound
- from core.rag.datasource.vdb.vector_factory import Vector
- from core.rag.models.document import Document
- from extensions.ext_database import db
- from libs.helper import email as email_validate
- from libs.password import hash_password, password_pattern, valid_password
- from libs.rsa import generate_key_pair
- from models.account import Tenant
- from models.dataset import Dataset, DatasetCollectionBinding, DocumentSegment
- from models.dataset import Document as DatasetDocument
- from models.model import Account, App, AppAnnotationSetting, MessageAnnotation
- from models.provider import Provider, ProviderModel
- @click.command('reset-password', help='Reset the account password.')
- @click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
- @click.option('--new-password', prompt=True, help='the new password.')
- @click.option('--password-confirm', prompt=True, help='the new password confirm.')
- def reset_password(email, new_password, password_confirm):
- """
- Reset password of owner account
- Only available in SELF_HOSTED mode
- """
- if str(new_password).strip() != str(password_confirm).strip():
- click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
- return
- account = db.session.query(Account). \
- filter(Account.email == email). \
- one_or_none()
- if not account:
- click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
- return
- try:
- valid_password(new_password)
- except:
- click.echo(
- click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
- return
- # generate password salt
- salt = secrets.token_bytes(16)
- base64_salt = base64.b64encode(salt).decode()
- # encrypt password with salt
- password_hashed = hash_password(new_password, salt)
- base64_password_hashed = base64.b64encode(password_hashed).decode()
- account.password = base64_password_hashed
- account.password_salt = base64_salt
- db.session.commit()
- click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
- @click.command('reset-email', help='Reset the account email.')
- @click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
- @click.option('--new-email', prompt=True, help='the new email.')
- @click.option('--email-confirm', prompt=True, help='the new email confirm.')
- def reset_email(email, new_email, email_confirm):
- """
- Replace account email
- :return:
- """
- if str(new_email).strip() != str(email_confirm).strip():
- click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
- return
- account = db.session.query(Account). \
- filter(Account.email == email). \
- one_or_none()
- if not account:
- click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
- return
- try:
- email_validate(new_email)
- except:
- click.echo(
- click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
- return
- account.email = new_email
- db.session.commit()
- click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
- @click.command('reset-encrypt-key-pair', help='Reset the asymmetric key pair of workspace for encrypt LLM credentials. '
- 'After the reset, all LLM credentials will become invalid, '
- 'requiring re-entry.'
- 'Only support SELF_HOSTED mode.')
- @click.confirmation_option(prompt=click.style('Are you sure you want to reset encrypt key pair?'
- ' this operation cannot be rolled back!', fg='red'))
- def reset_encrypt_key_pair():
- """
- Reset the encrypted key pair of workspace for encrypt LLM credentials.
- After the reset, all LLM credentials will become invalid, requiring re-entry.
- Only support SELF_HOSTED mode.
- """
- if current_app.config['EDITION'] != 'SELF_HOSTED':
- click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
- return
- tenants = db.session.query(Tenant).all()
- for tenant in tenants:
- if not tenant:
- click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
- return
- tenant.encrypt_public_key = generate_key_pair(tenant.id)
- db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete()
- db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete()
- db.session.commit()
- click.echo(click.style('Congratulations! '
- 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
- @click.command('vdb-migrate', help='migrate vector db.')
- @click.option('--scope', default='all', prompt=False, help='The scope of vector database to migrate, Default is All.')
- def vdb_migrate(scope: str):
- if scope in ['knowledge', 'all']:
- migrate_knowledge_vector_database()
- if scope in ['annotation', 'all']:
- migrate_annotation_vector_database()
- def migrate_annotation_vector_database():
- """
- Migrate annotation datas to target vector database .
- """
- click.echo(click.style('Start migrate annotation data.', fg='green'))
- create_count = 0
- skipped_count = 0
- total_count = 0
- page = 1
- while True:
- try:
- # get apps info
- apps = db.session.query(App).filter(
- App.status == 'normal'
- ).order_by(App.created_at.desc()).paginate(page=page, per_page=50)
- except NotFound:
- break
- page += 1
- for app in apps:
- total_count = total_count + 1
- click.echo(f'Processing the {total_count} app {app.id}. '
- + f'{create_count} created, {skipped_count} skipped.')
- try:
- click.echo('Create app annotation index: {}'.format(app.id))
- app_annotation_setting = db.session.query(AppAnnotationSetting).filter(
- AppAnnotationSetting.app_id == app.id
- ).first()
- if not app_annotation_setting:
- skipped_count = skipped_count + 1
- click.echo('App annotation setting is disabled: {}'.format(app.id))
- continue
- # get dataset_collection_binding info
- dataset_collection_binding = db.session.query(DatasetCollectionBinding).filter(
- DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id
- ).first()
- if not dataset_collection_binding:
- click.echo('App annotation collection binding is not exist: {}'.format(app.id))
- continue
- annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app.id).all()
- dataset = Dataset(
- id=app.id,
- tenant_id=app.tenant_id,
- indexing_technique='high_quality',
- embedding_model_provider=dataset_collection_binding.provider_name,
- embedding_model=dataset_collection_binding.model_name,
- collection_binding_id=dataset_collection_binding.id
- )
- documents = []
- if annotations:
- for annotation in annotations:
- document = Document(
- page_content=annotation.question,
- metadata={
- "annotation_id": annotation.id,
- "app_id": app.id,
- "doc_id": annotation.id
- }
- )
- documents.append(document)
- vector = Vector(dataset, attributes=['doc_id', 'annotation_id', 'app_id'])
- click.echo(f"Start to migrate annotation, app_id: {app.id}.")
- try:
- vector.delete()
- click.echo(
- click.style(f'Successfully delete vector index for app: {app.id}.',
- fg='green'))
- except Exception as e:
- click.echo(
- click.style(f'Failed to delete vector index for app {app.id}.',
- fg='red'))
- raise e
- if documents:
- try:
- click.echo(click.style(
- f'Start to created vector index with {len(documents)} annotations for app {app.id}.',
- fg='green'))
- vector.create(documents)
- click.echo(
- click.style(f'Successfully created vector index for app {app.id}.', fg='green'))
- except Exception as e:
- click.echo(click.style(f'Failed to created vector index for app {app.id}.', fg='red'))
- raise e
- click.echo(f'Successfully migrated app annotation {app.id}.')
- create_count += 1
- except Exception as e:
- click.echo(
- click.style('Create app annotation index error: {} {}'.format(e.__class__.__name__, str(e)),
- fg='red'))
- continue
- click.echo(
- click.style(f'Congratulations! Create {create_count} app annotation indexes, and skipped {skipped_count} apps.',
- fg='green'))
- def migrate_knowledge_vector_database():
- """
- Migrate vector database datas to target vector database .
- """
- click.echo(click.style('Start migrate vector db.', fg='green'))
- create_count = 0
- skipped_count = 0
- total_count = 0
- config = current_app.config
- vector_type = config.get('VECTOR_STORE')
- page = 1
- while True:
- try:
- datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \
- .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50)
- except NotFound:
- break
- page += 1
- for dataset in datasets:
- total_count = total_count + 1
- click.echo(f'Processing the {total_count} dataset {dataset.id}. '
- + f'{create_count} created, {skipped_count} skipped.')
- try:
- click.echo('Create dataset vdb index: {}'.format(dataset.id))
- if dataset.index_struct_dict:
- if dataset.index_struct_dict['type'] == vector_type:
- skipped_count = skipped_count + 1
- continue
- collection_name = ''
- if vector_type == "weaviate":
- dataset_id = dataset.id
- collection_name = Dataset.gen_collection_name_by_id(dataset_id)
- index_struct_dict = {
- "type": 'weaviate',
- "vector_store": {"class_prefix": collection_name}
- }
- dataset.index_struct = json.dumps(index_struct_dict)
- elif vector_type == "qdrant":
- if dataset.collection_binding_id:
- dataset_collection_binding = db.session.query(DatasetCollectionBinding). \
- filter(DatasetCollectionBinding.id == dataset.collection_binding_id). \
- one_or_none()
- if dataset_collection_binding:
- collection_name = dataset_collection_binding.collection_name
- else:
- raise ValueError('Dataset Collection Bindings is not exist!')
- else:
- dataset_id = dataset.id
- collection_name = Dataset.gen_collection_name_by_id(dataset_id)
- index_struct_dict = {
- "type": 'qdrant',
- "vector_store": {"class_prefix": collection_name}
- }
- dataset.index_struct = json.dumps(index_struct_dict)
- elif vector_type == "milvus":
- dataset_id = dataset.id
- collection_name = Dataset.gen_collection_name_by_id(dataset_id)
- index_struct_dict = {
- "type": 'milvus',
- "vector_store": {"class_prefix": collection_name}
- }
- dataset.index_struct = json.dumps(index_struct_dict)
- else:
- raise ValueError(f"Vector store {config.get('VECTOR_STORE')} is not supported.")
- vector = Vector(dataset)
- click.echo(f"Start to migrate dataset {dataset.id}.")
- try:
- vector.delete()
- click.echo(
- click.style(f'Successfully delete vector index {collection_name} for dataset {dataset.id}.',
- fg='green'))
- except Exception as e:
- click.echo(
- click.style(f'Failed to delete vector index {collection_name} for dataset {dataset.id}.',
- fg='red'))
- raise e
- dataset_documents = db.session.query(DatasetDocument).filter(
- DatasetDocument.dataset_id == dataset.id,
- DatasetDocument.indexing_status == 'completed',
- DatasetDocument.enabled == True,
- DatasetDocument.archived == False,
- ).all()
- documents = []
- segments_count = 0
- for dataset_document in dataset_documents:
- segments = db.session.query(DocumentSegment).filter(
- DocumentSegment.document_id == dataset_document.id,
- DocumentSegment.status == 'completed',
- DocumentSegment.enabled == True
- ).all()
- for segment in segments:
- document = Document(
- page_content=segment.content,
- metadata={
- "doc_id": segment.index_node_id,
- "doc_hash": segment.index_node_hash,
- "document_id": segment.document_id,
- "dataset_id": segment.dataset_id,
- }
- )
- documents.append(document)
- segments_count = segments_count + 1
- if documents:
- try:
- click.echo(click.style(
- f'Start to created vector index with {len(documents)} documents of {segments_count} segments for dataset {dataset.id}.',
- fg='green'))
- vector.create(documents)
- click.echo(
- click.style(f'Successfully created vector index for dataset {dataset.id}.', fg='green'))
- except Exception as e:
- click.echo(click.style(f'Failed to created vector index for dataset {dataset.id}.', fg='red'))
- raise e
- db.session.add(dataset)
- db.session.commit()
- click.echo(f'Successfully migrated dataset {dataset.id}.')
- create_count += 1
- except Exception as e:
- db.session.rollback()
- click.echo(
- click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)),
- fg='red'))
- continue
- click.echo(
- click.style(f'Congratulations! Create {create_count} dataset indexes, and skipped {skipped_count} datasets.',
- fg='green'))
- def register_commands(app):
- app.cli.add_command(reset_password)
- app.cli.add_command(reset_email)
- app.cli.add_command(reset_encrypt_key_pair)
- app.cli.add_command(vdb_migrate)
|