check_no_chinese_comments.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435
  1. import os
  2. import re
  3. from zhon.hanzi import punctuation
  4. def has_chinese_characters(text):
  5. for char in text:
  6. if '\u4e00' <= char <= '\u9fff' or char in punctuation:
  7. return True
  8. return False
  9. def check_file_for_chinese_comments(file_path):
  10. with open(file_path, 'r', encoding='utf-8') as file:
  11. for line_number, line in enumerate(file, start=1):
  12. if has_chinese_characters(line):
  13. print(f"Found Chinese characters in {file_path} on line {line_number}:")
  14. print(line.strip())
  15. return True
  16. return False
  17. def main():
  18. has_chinese = False
  19. excluded_files = ["model_template.py", 'stopwords.py', 'commands.py', 'indexing_runner.py']
  20. for root, _, files in os.walk("."):
  21. for file in files:
  22. if file.endswith(".py") and file not in excluded_files:
  23. file_path = os.path.join(root, file)
  24. if check_file_for_chinese_comments(file_path):
  25. has_chinese = True
  26. if has_chinese:
  27. raise Exception("Found Chinese characters in Python files. Please remove them.")
  28. if __name__ == "__main__":
  29. main()