You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

82 lines
2.9 KiB

import unittest
import os
import tempfile
from deepsearcher.loader.file_loader import TextLoader
class TestTextLoader(unittest.TestCase):
"""Tests for the TextLoader class."""
def setUp(self):
"""Set up the test environment."""
self.loader = TextLoader()
# Create a temporary directory and file for testing
self.temp_dir = tempfile.TemporaryDirectory()
self.test_file_path = os.path.join(self.temp_dir.name, "test.txt")
self.test_content = "This is a test file content.\nWith multiple lines."
# Write test content to the file
with open(self.test_file_path, "w", encoding="utf-8") as f:
f.write(self.test_content)
def tearDown(self):
"""Clean up the test environment."""
self.temp_dir.cleanup()
def test_supported_file_types(self):
"""Test the supported_file_types property."""
supported_types = self.loader.supported_file_types
self.assertIsInstance(supported_types, list)
self.assertIn("txt", supported_types)
self.assertIn("md", supported_types)
def test_load_file(self):
"""Test loading a text file."""
documents = self.loader.load_file(self.test_file_path)
# Check that we got a list with one document
self.assertIsInstance(documents, list)
self.assertEqual(len(documents), 1)
# Check the document content
document = documents[0]
self.assertEqual(document.page_content, self.test_content)
# Check the metadata
self.assertIn("reference", document.metadata)
self.assertEqual(document.metadata["reference"], self.test_file_path)
def test_load_directory(self):
"""Test loading a directory with text files."""
# Create additional test files
md_file_path = os.path.join(self.temp_dir.name, "test.md")
with open(md_file_path, "w", encoding="utf-8") as f:
f.write("# Markdown Test\nThis is a markdown file.")
# Create a non-supported file
pdf_file_path = os.path.join(self.temp_dir.name, "test.pdf")
with open(pdf_file_path, "w", encoding="utf-8") as f:
f.write("PDF content")
# Load the directory
documents = self.loader.load_directory(self.temp_dir.name)
# Check that we got documents for supported files only
self.assertEqual(len(documents), 2)
# Get references
references = [doc.metadata["reference"] for doc in documents]
# Check that supported files were loaded
self.assertIn(self.test_file_path, references)
self.assertIn(md_file_path, references)
# Check that unsupported file was not loaded
for doc in documents:
self.assertNotEqual(doc.metadata["reference"], pdf_file_path)
if __name__ == "__main__":
unittest.main()