website.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. from flask_restful import Resource, reqparse
  2. from controllers.console import api
  3. from controllers.console.datasets.error import WebsiteCrawlError
  4. from controllers.console.setup import setup_required
  5. from controllers.console.wraps import account_initialization_required
  6. from libs.login import login_required
  7. from services.website_service import WebsiteService
  8. class WebsiteCrawlApi(Resource):
  9. @setup_required
  10. @login_required
  11. @account_initialization_required
  12. def post(self):
  13. parser = reqparse.RequestParser()
  14. parser.add_argument("provider", type=str, choices=["firecrawl"], required=True, nullable=True, location="json")
  15. parser.add_argument("url", type=str, required=True, nullable=True, location="json")
  16. parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
  17. args = parser.parse_args()
  18. WebsiteService.document_create_args_validate(args)
  19. # crawl url
  20. try:
  21. result = WebsiteService.crawl_url(args)
  22. except Exception as e:
  23. raise WebsiteCrawlError(str(e))
  24. return result, 200
  25. class WebsiteCrawlStatusApi(Resource):
  26. @setup_required
  27. @login_required
  28. @account_initialization_required
  29. def get(self, job_id: str):
  30. parser = reqparse.RequestParser()
  31. parser.add_argument("provider", type=str, choices=["firecrawl"], required=True, location="args")
  32. args = parser.parse_args()
  33. # get crawl status
  34. try:
  35. result = WebsiteService.get_crawl_status(job_id, args["provider"])
  36. except Exception as e:
  37. raise WebsiteCrawlError(str(e))
  38. return result, 200
  39. api.add_resource(WebsiteCrawlApi, "/website/crawl")
  40. api.add_resource(WebsiteCrawlStatusApi, "/website/crawl/status/<string:job_id>")