You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

113 lines
4.1 KiB

import http.client
import json
import time
from deepsearcher.vector_db import RetrievalResult
from deepsearcher.utils import log
class WebSearch:
"""Web search class for calling metaso.cn API to perform web searches"""
def __init__(self, api_key: str = "mk-CCEA085159C048597435780530A55403"):
"""
Initialize web search
Args:
api_key (str): metaso.cn API key
"""
self.api_key = api_key
self.base_url = "metaso.cn"
self.endpoint = "/api/v1/search"
def search(self, query: str, size: int = 4) -> list[RetrievalResult]:
"""
Execute web search
Args:
query (str): Search query
size (int): Number of results to return, default is 4
Returns:
List[RetrievalResult]: List of search results
"""
try:
# Build request data
payload = json.dumps({
"q": query,
"scope": "webpage",
"includeSummary": False,
"size": str(size),
"includeRawContent": True,
"conciseSnippet": True
})
headers = {
'Authorization': f'Bearer {self.api_key}',
'Accept': 'application/json',
'Content-Type': 'application/json'
}
# Send request
conn = http.client.HTTPSConnection(self.base_url)
conn.request("POST", self.endpoint, payload, headers)
res = conn.getresponse()
data = res.read()
if res.status != 200:
log.error(f"Web search request failed: {res.status} - {data.decode('utf-8')}")
return []
response_data = json.loads(data.decode("utf-8"))
# Parse search results
results = []
if "webpages" in response_data:
for i, webpage in enumerate(response_data["webpages"]):
# Use content field as primary text content
content = webpage.get("content", "")
if not content:
content = webpage.get("snippet", "")
# Create RetrievalResult object
result = RetrievalResult(
embedding=None, # Web search results don't have vectors
text=content,
reference=webpage.get("link", ""),
score=None, # Web search results don't have scores
metadata={
"title": webpage.get("title", ""),
"date": webpage.get("date", ""),
"authors": webpage.get("authors", []),
"position": webpage.get("position", i + 1),
"source": "webpage"
}
)
results.append(result)
log.info(f"Web search successful, found {len(results)} results")
return results
except Exception as e:
log.error(f"Web search error: {str(e)}")
return []
finally:
if 'conn' in locals():
conn.close()
def search_with_retry(self, query: str, size: int = 4, max_retries: int = 3) -> list[RetrievalResult]:
"""
Web search with retry mechanism
Args:
query (str): Search query
size (int): Number of results to return
max_retries (int): Maximum number of retries
Returns:
List[RetrievalResult]: List of search results
"""
for attempt in range(max_retries):
try:
results = self.search(query, size)
if results:
return results
except Exception as e:
log.warning(f"Web search attempt {attempt + 1} failed: {str(e)}")
if attempt < max_retries - 1:
time.sleep(1) # Wait 1 second before retrying
log.error(f"Web search failed after {max_retries} attempts")
return []