import unittest
import os
import tempfile
from unittest.mock import patch, MagicMock

from langchain_core.documents import Document

from deepsearcher.loader.file_loader import PDFLoader


class TestPDFLoader(unittest.TestCase):
    """Tests for the PDFLoader class."""
    
    def setUp(self):
        """Set up the test environment."""
        # Create a temporary directory
        self.temp_dir = tempfile.TemporaryDirectory()
        
        # Create a text file for testing
        self.text_file_path = os.path.join(self.temp_dir.name, "test.txt")
        with open(self.text_file_path, "w", encoding="utf-8") as f:
            f.write("This is a test text file.")
        
        # Create a markdown file for testing
        self.md_file_path = os.path.join(self.temp_dir.name, "test.md")
        with open(self.md_file_path, "w", encoding="utf-8") as f:
            f.write("# Test Markdown\nThis is a test markdown file.")
        
        # PDF file path (will be mocked)
        self.pdf_file_path = os.path.join(self.temp_dir.name, "test.pdf")
        
        # Create the loader
        self.loader = PDFLoader()
    
    def tearDown(self):
        """Clean up the test environment."""
        self.temp_dir.cleanup()
    
    def test_supported_file_types(self):
        """Test the supported_file_types property."""
        file_types = self.loader.supported_file_types
        self.assertIsInstance(file_types, list)
        self.assertIn("pdf", file_types)
        self.assertIn("md", file_types)
        self.assertIn("txt", file_types)
    
    def test_load_text_file(self):
        """Test loading a text file."""
        documents = self.loader.load_file(self.text_file_path)
        
        # Check that we got one document
        self.assertEqual(len(documents), 1)
        
        # Check the document content
        document = documents[0]
        self.assertEqual(document.page_content, "This is a test text file.")
        
        # Check the metadata
        self.assertEqual(document.metadata["reference"], self.text_file_path)
    
    def test_load_markdown_file(self):
        """Test loading a markdown file."""
        documents = self.loader.load_file(self.md_file_path)
        
        # Check that we got one document
        self.assertEqual(len(documents), 1)
        
        # Check the document content
        document = documents[0]
        self.assertEqual(document.page_content, "# Test Markdown\nThis is a test markdown file.")
        
        # Check the metadata
        self.assertEqual(document.metadata["reference"], self.md_file_path)
    
    @patch("pdfplumber.open")
    def test_load_pdf_file(self, mock_pdf_open):
        """Test loading a PDF file."""
        # Set up mock PDF pages
        mock_page1 = MagicMock()
        mock_page1.extract_text.return_value = "Page 1 content"
        
        mock_page2 = MagicMock()
        mock_page2.extract_text.return_value = "Page 2 content"
        
        # Set up mock PDF file
        mock_pdf = MagicMock()
        mock_pdf.pages = [mock_page1, mock_page2]
        mock_pdf.__enter__.return_value = mock_pdf
        mock_pdf.__exit__.return_value = None
        
        # Configure the mock to return our mock PDF
        mock_pdf_open.return_value = mock_pdf
        
        # Create a dummy PDF file
        with open(self.pdf_file_path, "w") as f:
            f.write("dummy pdf content")
        
        # Load the PDF file
        documents = self.loader.load_file(self.pdf_file_path)
        
        # Verify pdfplumber.open was called
        mock_pdf_open.assert_called_once_with(self.pdf_file_path)
        
        # Check that we got one document
        self.assertEqual(len(documents), 1)
        
        # Check the document content
        document = documents[0]
        self.assertEqual(document.page_content, "Page 1 content\n\nPage 2 content")
        
        # Check the metadata
        self.assertEqual(document.metadata["reference"], self.pdf_file_path)
    
    def test_load_directory(self):
        """Test loading a directory with mixed file types."""
        # Create the loader
        loader = PDFLoader()
        
        # Mock the load_file method to track calls
        original_load_file = loader.load_file
        calls = []
        
        def mock_load_file(file_path):
            calls.append(file_path)
            return original_load_file(file_path)
        
        loader.load_file = mock_load_file
        
        # Load the directory
        documents = loader.load_directory(self.temp_dir.name)
        
        # Check that we processed both text and markdown files
        self.assertEqual(len(calls), 2)  # text and markdown files
        self.assertIn(self.text_file_path, calls)
        self.assertIn(self.md_file_path, calls)
        
        # Check that we got two documents
        self.assertEqual(len(documents), 2)


if __name__ == "__main__":
    unittest.main()