|  | @@ -18,6 +18,7 @@ from core.docstore.dataset_docstore import DatesetDocumentStore
 | 
											
												
													
														|  |  from core.index.keyword_table_index import KeywordTableIndex
 |  |  from core.index.keyword_table_index import KeywordTableIndex
 | 
											
												
													
														|  |  from core.index.readers.html_parser import HTMLParser
 |  |  from core.index.readers.html_parser import HTMLParser
 | 
											
												
													
														|  |  from core.index.readers.pdf_parser import PDFParser
 |  |  from core.index.readers.pdf_parser import PDFParser
 | 
											
												
													
														|  | 
 |  | +from core.index.spiltter.fixed_text_splitter import FixedRecursiveCharacterTextSplitter
 | 
											
												
													
														|  |  from core.index.vector_index import VectorIndex
 |  |  from core.index.vector_index import VectorIndex
 | 
											
												
													
														|  |  from core.llm.token_calculator import TokenCalculator
 |  |  from core.llm.token_calculator import TokenCalculator
 | 
											
												
													
														|  |  from extensions.ext_database import db
 |  |  from extensions.ext_database import db
 | 
											
										
											
												
													
														|  | @@ -267,16 +268,14 @@ class IndexingRunner:
 | 
											
												
													
														|  |                  raise ValueError("Custom segment length should be between 50 and 1000.")
 |  |                  raise ValueError("Custom segment length should be between 50 and 1000.")
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |              separator = segmentation["separator"]
 |  |              separator = segmentation["separator"]
 | 
											
												
													
														|  | -            if not separator:
 |  | 
 | 
											
												
													
														|  | -                separators = ["\n\n", "。", ".", " ", ""]
 |  | 
 | 
											
												
													
														|  | -            else:
 |  | 
 | 
											
												
													
														|  | 
 |  | +            if separator:
 | 
											
												
													
														|  |                  separator = separator.replace('\\n', '\n')
 |  |                  separator = separator.replace('\\n', '\n')
 | 
											
												
													
														|  | -                separators = [separator, ""]
 |  | 
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -            character_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
 |  | 
 | 
											
												
													
														|  | 
 |  | +            character_splitter = FixedRecursiveCharacterTextSplitter.from_tiktoken_encoder(
 | 
											
												
													
														|  |                  chunk_size=segmentation["max_tokens"],
 |  |                  chunk_size=segmentation["max_tokens"],
 | 
											
												
													
														|  |                  chunk_overlap=0,
 |  |                  chunk_overlap=0,
 | 
											
												
													
														|  | -                separators=separators
 |  | 
 | 
											
												
													
														|  | 
 |  | +                fixed_separator=separator,
 | 
											
												
													
														|  | 
 |  | +                separators=["\n\n", "。", ".", " ", ""]
 | 
											
												
													
														|  |              )
 |  |              )
 | 
											
												
													
														|  |          else:
 |  |          else:
 | 
											
												
													
														|  |              # Automatic segmentation
 |  |              # Automatic segmentation
 |