deep-searcher/tests/embedding/test_ollama_embedding.py


								import unittest

								import sys

								from unittest.mock import patch, MagicMock


								from deepsearcher.embedding import OllamaEmbedding


								class TestOllamaEmbedding(unittest.TestCase):

								    """Tests for the OllamaEmbedding class."""


								    def setUp(self):

								        """Set up test fixtures."""

								        # Create mock module for ollama

								        mock_ollama_module = MagicMock()


								        # Create mock Client class

								        self.mock_ollama_client = MagicMock()

								        mock_ollama_module.Client = self.mock_ollama_client


								        # Add the mock module to sys.modules

								        self.module_patcher = patch.dict('sys.modules', {'ollama': mock_ollama_module})

								        self.module_patcher.start()


								        # Set up mock client instance

								        self.mock_client = MagicMock()

								        self.mock_ollama_client.return_value = self.mock_client


								        # Configure mock embed method

								        self.mock_client.embed.return_value = {"embeddings": [[0.1] * 1024]}


								    def tearDown(self):

								        """Clean up test fixtures."""

								        self.module_patcher.stop()


								    @patch.dict('os.environ', {}, clear=True)

								    def test_init_default(self):

								        """Test initialization with default parameters."""

								        # Create instance to test

								        embedding = OllamaEmbedding(model="bge-m3")


								        # Check that Client was initialized correctly

								        self.mock_ollama_client.assert_called_once_with(host="http://localhost:11434/")


								        # Check instance attributes

								        self.assertEqual(embedding.model, "bge-m3")

								        self.assertEqual(embedding.dim, 1024)

								        self.assertEqual(embedding.batch_size, 32)


								    @patch.dict('os.environ', {}, clear=True)

								    def test_init_with_base_url(self):

								        """Test initialization with custom base URL."""

								        # Reset mock

								        self.mock_ollama_client.reset_mock()


								        # Create embedding with custom base URL

								        embedding = OllamaEmbedding(base_url="http://custom-ollama-server:11434/")


								        # Check that Client was initialized with custom base URL

								        self.mock_ollama_client.assert_called_with(host="http://custom-ollama-server:11434/")


								    @patch.dict('os.environ', {}, clear=True)

								    def test_init_with_model_name(self):

								        """Test initialization with model_name parameter."""

								        # Reset mock

								        self.mock_ollama_client.reset_mock()


								        # Create embedding with model_name

								        embedding = OllamaEmbedding(model_name="mxbai-embed-large")


								        # Check model attribute

								        self.assertEqual(embedding.model, "mxbai-embed-large")

								        # Check dimension is set correctly based on model

								        self.assertEqual(embedding.dim, 768)


								    @patch.dict('os.environ', {}, clear=True)

								    def test_init_with_dimension(self):

								        """Test initialization with custom dimension."""

								        # Reset mock

								        self.mock_ollama_client.reset_mock()


								        # Create embedding with custom dimension

								        embedding = OllamaEmbedding(dimension=512)


								        # Check dimension attribute

								        self.assertEqual(embedding.dim, 512)


								    @patch.dict('os.environ', {}, clear=True)

								    def test_embed_query(self):

								        """Test embedding a single query."""

								        # Create instance to test

								        embedding = OllamaEmbedding(model="bge-m3")


								        # Set up mock response

								        self.mock_client.embed.return_value = {"embeddings": [[0.1, 0.2, 0.3] * 341 + [0.4]]}  # 1024 dimensions


								        # Call the method

								        result = embedding.embed_query("test query")


								        # Verify embed was called correctly

								        self.mock_client.embed.assert_called_once_with(model="bge-m3", input="test query")


								        # Check the result

								        self.assertEqual(len(result), 1024)

								        self.assertEqual(result, [0.1, 0.2, 0.3] * 341 + [0.4])


								    @patch.dict('os.environ', {}, clear=True)

								    def test_embed_documents_small_batch(self):

								        """Test embedding documents with a small batch (less than batch size)."""

								        # Create instance to test

								        embedding = OllamaEmbedding(model="bge-m3")


								        # Set up mock response for multiple documents

								        mock_embeddings = [

								            [0.1, 0.2, 0.3] * 341 + [0.4],  # 1024 dimensions

								            [0.4, 0.5, 0.6] * 341 + [0.7],

								            [0.7, 0.8, 0.9] * 341 + [0.1]

								        ]

								        self.mock_client.embed.return_value = {"embeddings": mock_embeddings}


								        # Create test texts

								        texts = ["text 1", "text 2", "text 3"]


								        # Call the method

								        results = embedding.embed_documents(texts)


								        # Verify embed was called correctly

								        self.mock_client.embed.assert_called_once_with(model="bge-m3", input=texts)


								        # Check the results

								        self.assertEqual(len(results), 3)

								        for i, result in enumerate(results):

								            self.assertEqual(len(result), 1024)

								            self.assertEqual(result, mock_embeddings[i])


								    @patch.dict('os.environ', {}, clear=True)

								    def test_embed_documents_large_batch(self):

								        """Test embedding documents with a large batch (more than batch size)."""

								        # Create instance to test

								        embedding = OllamaEmbedding(model="bge-m3")


								        # Set a smaller batch size for testing

								        embedding.batch_size = 2


								        # Set up mock responses for batches

								        batch1_embeddings = [

								            [0.1, 0.2, 0.3] * 341 + [0.4],  # 1024 dimensions

								            [0.4, 0.5, 0.6] * 341 + [0.7]

								        ]

								        batch2_embeddings = [

								            [0.7, 0.8, 0.9] * 341 + [0.1]

								        ]


								        # Configure mock to return different responses for each call

								        self.mock_client.embed.side_effect = [

								            {"embeddings": batch1_embeddings},

								            {"embeddings": batch2_embeddings}

								        ]


								        # Create test texts

								        texts = ["text 1", "text 2", "text 3"]


								        # Call the method

								        results = embedding.embed_documents(texts)


								        # Verify embed was called twice with the right batches

								        self.assertEqual(self.mock_client.embed.call_count, 2)

								        self.mock_client.embed.assert_any_call(model="bge-m3", input=["text 1", "text 2"])

								        self.mock_client.embed.assert_any_call(model="bge-m3", input=["text 3"])


								        # Check the results

								        self.assertEqual(len(results), 3)

								        self.assertEqual(results[0], batch1_embeddings[0])

								        self.assertEqual(results[1], batch1_embeddings[1])

								        self.assertEqual(results[2], batch2_embeddings[0])


								    @patch.dict('os.environ', {}, clear=True)

								    def test_embed_documents_no_batching(self):

								        """Test embedding documents with batching disabled."""

								        # Create instance to test

								        embedding = OllamaEmbedding(model="bge-m3")


								        # Disable batching

								        embedding.batch_size = 0


								        # Mock the embed_query method

								        original_embed_query = embedding.embed_query

								        embed_query_calls = []


								        def mock_embed_query(text):

								            embed_query_calls.append(text)

								            return [0.1] * 1024  # Return a simple mock embedding


								        embedding.embed_query = mock_embed_query


								        # Create test texts

								        texts = ["text 1", "text 2", "text 3"]


								        # Call the method

								        results = embedding.embed_documents(texts)


								        # Check that embed_query was called for each text

								        self.assertEqual(len(embed_query_calls), 3)

								        self.assertEqual(embed_query_calls, texts)


								        # Check the results

								        self.assertEqual(len(results), 3)

								        for result in results:

								            self.assertEqual(len(result), 1024)

								            self.assertEqual(result, [0.1] * 1024)


								        # Restore original method

								        embedding.embed_query = original_embed_query


								    @patch.dict('os.environ', {}, clear=True)

								    def test_dimension_property(self):

								        """Test the dimension property."""

								        # Create instance to test

								        embedding = OllamaEmbedding(model="bge-m3")


								        # Check dimension for bge-m3

								        self.assertEqual(embedding.dimension, 1024)


								        # Test with different models

								        self.mock_ollama_client.reset_mock()

								        embedding = OllamaEmbedding(model="mxbai-embed-large")

								        self.assertEqual(embedding.dimension, 768)


								        self.mock_ollama_client.reset_mock()

								        embedding = OllamaEmbedding(model="nomic-embed-text")

								        self.assertEqual(embedding.dimension, 768)


								        # Test with custom dimension

								        self.mock_ollama_client.reset_mock()

								        embedding = OllamaEmbedding(dimension=512)

								        self.assertEqual(embedding.dimension, 512)


								if __name__ == "__main__":

								    unittest.main()