You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

53 lines
1.8 KiB

import unittest
from unittest.mock import patch, MagicMock
from deepsearcher.loader.web_crawler.base import BaseCrawler
class TestBaseCrawler(unittest.TestCase):
"""Tests for the BaseCrawler class."""
def test_abstract_methods(self):
"""Test that BaseCrawler defines abstract methods."""
# For abstract base classes, we can check if methods are defined
# but not implemented in the base class
self.assertTrue(hasattr(BaseCrawler, 'crawl_url'))
def test_crawl_urls(self):
"""Test the crawl_urls method."""
# Create a subclass of BaseCrawler for testing
class TestCrawler(BaseCrawler):
def crawl_url(self, url, **kwargs):
# Mock implementation that returns a list of documents
from langchain_core.documents import Document
return [Document(
page_content=f"Content from {url}",
metadata={"reference": url, "kwargs": kwargs}
)]
# Create test URLs
urls = [
"https://example.com",
"https://example.org",
"https://example.net"
]
# Test crawling multiple URLs
crawler = TestCrawler()
documents = crawler.crawl_urls(urls, param1="value1")
# Check the results
self.assertEqual(len(documents), 3) # One document per URL
# Verify each document
references = [doc.metadata["reference"] for doc in documents]
for url in urls:
self.assertIn(url, references)
# Check that kwargs were passed correctly
for doc in documents:
self.assertEqual(doc.metadata["kwargs"]["param1"], "value1")
if __name__ == "__main__":
unittest.main()