clean_embedding_cache_task.py 1.1 KB

12345678910111213141516171819202122232425262728293031
  1. import datetime
  2. import time
  3. import click
  4. from flask import current_app
  5. from werkzeug.exceptions import NotFound
  6. import app
  7. from extensions.ext_database import db
  8. from models.dataset import Embedding
  9. @app.celery.task(queue='dataset')
  10. def clean_embedding_cache_task():
  11. click.echo(click.style('Start clean embedding cache.', fg='green'))
  12. clean_days = int(current_app.config.get('CLEAN_DAY_SETTING'))
  13. start_at = time.perf_counter()
  14. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  15. page = 1
  16. while True:
  17. try:
  18. embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \
  19. .order_by(Embedding.created_at.desc()).paginate(page=page, per_page=100)
  20. except NotFound:
  21. break
  22. for embedding in embeddings:
  23. db.session.delete(embedding)
  24. db.session.commit()
  25. page += 1
  26. end_at = time.perf_counter()
  27. click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green'))