@@ -632,8 +632,8 @@ class IndexingRunner:
return text
def format_split_text(self, text):
- regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
+ regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q\d+:|$)"
- matches = re.findall(regex, text, re.MULTILINE)
+ matches = re.findall(regex, text, re.UNICODE)
return [
{