from tqdm import tqdm

from deepsearcher.loader.splitter import Chunk


class BaseEmbedding:
    """
    Abstract base class for embedding model implementations.

    This class defines the interface for embedding model implementations,
    including methods for embedding queries and documents, and a property
    for the dimensionality of the embeddings.
    """

    def embed_query(self, text: str) -> list[float]:
        """
        Embed a single query text.

        Args:
            text: The query text to embed.

        Returns:
            A list of floats representing the embedding vector.
        """
        pass

    def embed_documents(self, texts: list[str]) -> list[list[float]]:
        """
        Embed a list of document texts.

        This default implementation calls embed_query for each text,
        but implementations may override this with a more efficient batch method.

        Args:
            texts: A list of document texts to embed.

        Returns:
            A list of embedding vectors, one for each input text.
        """
        return [self.embed_query(text) for text in texts]

    def embed_chunks(self, chunks: list[Chunk], batch_size: int = 256) -> list[Chunk]:
        """
        Embed a list of Chunk objects.

        This method extracts the text from each chunk, embeds it in batches,
        and updates the chunks with their embeddings.

        Args:
            chunks: A list of Chunk objects to embed.
            batch_size: The number of chunks to process in each batch.

        Returns:
            The input list of Chunk objects, updated with embeddings.
        """
        texts = [chunk.text for chunk in chunks]
        batch_texts = [texts[i : i + batch_size] for i in range(0, len(texts), batch_size)]
        embeddings = []
        for batch_text in tqdm(batch_texts, desc="Embedding chunks"):
            batch_embeddings = self.embed_documents(batch_text)
            embeddings.extend(batch_embeddings)
        for chunk, embedding in zip(chunks, embeddings):
            chunk.embedding = embedding
        return chunks

    @property
    def dimension(self) -> int:
        """
        Get the dimensionality of the embeddings.

        Returns:
            The number of dimensions in the embedding vectors.
        """
        pass