| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 | from abc import ABC, abstractmethodfrom collections.abc import Sequencefrom typing import Any, Optionalfrom pydantic import BaseModel, Fieldclass Document(BaseModel):    """Class for storing a piece of text and associated metadata."""    page_content: str    vector: Optional[list[float]] = None    """Arbitrary metadata about the page content (e.g., source, relationships to other        documents, etc.).    """    metadata: Optional[dict] = Field(default_factory=dict)class BaseDocumentTransformer(ABC):    """Abstract base class for document transformation systems.    A document transformation system takes a sequence of Documents and returns a    sequence of transformed Documents.    Example:        .. code-block:: python            class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):                embeddings: Embeddings                similarity_fn: Callable = cosine_similarity                similarity_threshold: float = 0.95                class Config:                    arbitrary_types_allowed = True                def transform_documents(                    self, documents: Sequence[Document], **kwargs: Any                ) -> Sequence[Document]:                    stateful_documents = get_stateful_documents(documents)                    embedded_documents = _get_embeddings_from_stateful_docs(                        self.embeddings, stateful_documents                    )                    included_idxs = _filter_similar_embeddings(                        embedded_documents, self.similarity_fn, self.similarity_threshold                    )                    return [stateful_documents[i] for i in sorted(included_idxs)]                async def atransform_documents(                    self, documents: Sequence[Document], **kwargs: Any                ) -> Sequence[Document]:                    raise NotImplementedError    """    @abstractmethod    def transform_documents(        self, documents: Sequence[Document], **kwargs: Any    ) -> Sequence[Document]:        """Transform a list of documents.        Args:            documents: A sequence of Documents to be transformed.        Returns:            A list of transformed Documents.        """    @abstractmethod    async def atransform_documents(        self, documents: Sequence[Document], **kwargs: Any    ) -> Sequence[Document]:        """Asynchronously transform a list of documents.        Args:            documents: A sequence of Documents to be transformed.        Returns:            A list of transformed Documents.        """
 |