|
@ -6,12 +6,12 @@ from deepsearcher.utils import log |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WebSearch: |
|
|
class WebSearch: |
|
|
"""网页搜索类,用于调用metaso.cn API进行网页搜索""" |
|
|
"""Web search class for calling metaso.cn API to perform web searches""" |
|
|
def __init__(self, api_key: str = "mk-CCEA085159C048597435780530A55403"): |
|
|
def __init__(self, api_key: str = "mk-CCEA085159C048597435780530A55403"): |
|
|
""" |
|
|
""" |
|
|
初始化网页搜索 |
|
|
Initialize web search |
|
|
Args: |
|
|
Args: |
|
|
api_key (str): metaso.cn API密钥 |
|
|
api_key (str): metaso.cn API key |
|
|
""" |
|
|
""" |
|
|
self.api_key = api_key |
|
|
self.api_key = api_key |
|
|
self.base_url = "metaso.cn" |
|
|
self.base_url = "metaso.cn" |
|
@ -19,15 +19,15 @@ class WebSearch: |
|
|
|
|
|
|
|
|
def search(self, query: str, size: int = 4) -> list[RetrievalResult]: |
|
|
def search(self, query: str, size: int = 4) -> list[RetrievalResult]: |
|
|
""" |
|
|
""" |
|
|
执行网页搜索 |
|
|
Execute web search |
|
|
Args: |
|
|
Args: |
|
|
query (str): 搜索查询 |
|
|
query (str): Search query |
|
|
size (int): 返回结果数量,默认为4 |
|
|
size (int): Number of results to return, default is 4 |
|
|
Returns: |
|
|
Returns: |
|
|
List[RetrievalResult]: 搜索结果列表 |
|
|
List[RetrievalResult]: List of search results |
|
|
""" |
|
|
""" |
|
|
try: |
|
|
try: |
|
|
# 构建请求数据 |
|
|
# Build request data |
|
|
payload = json.dumps({ |
|
|
payload = json.dumps({ |
|
|
"q": query, |
|
|
"q": query, |
|
|
"scope": "webpage", |
|
|
"scope": "webpage", |
|
@ -43,33 +43,33 @@ class WebSearch: |
|
|
'Content-Type': 'application/json' |
|
|
'Content-Type': 'application/json' |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# 发送请求 |
|
|
# Send request |
|
|
conn = http.client.HTTPSConnection(self.base_url) |
|
|
conn = http.client.HTTPSConnection(self.base_url) |
|
|
conn.request("POST", self.endpoint, payload, headers) |
|
|
conn.request("POST", self.endpoint, payload, headers) |
|
|
res = conn.getresponse() |
|
|
res = conn.getresponse() |
|
|
data = res.read() |
|
|
data = res.read() |
|
|
|
|
|
|
|
|
if res.status != 200: |
|
|
if res.status != 200: |
|
|
log.error(f"网页搜索请求失败: {res.status} - {data.decode('utf-8')}") |
|
|
log.error(f"Web search request failed: {res.status} - {data.decode('utf-8')}") |
|
|
return [] |
|
|
return [] |
|
|
|
|
|
|
|
|
response_data = json.loads(data.decode("utf-8")) |
|
|
response_data = json.loads(data.decode("utf-8")) |
|
|
|
|
|
|
|
|
# 解析搜索结果 |
|
|
# Parse search results |
|
|
results = [] |
|
|
results = [] |
|
|
if "webpages" in response_data: |
|
|
if "webpages" in response_data: |
|
|
for i, webpage in enumerate(response_data["webpages"]): |
|
|
for i, webpage in enumerate(response_data["webpages"]): |
|
|
# 使用content字段作为主要文本内容 |
|
|
# Use content field as primary text content |
|
|
content = webpage.get("content", "") |
|
|
content = webpage.get("content", "") |
|
|
if not content: |
|
|
if not content: |
|
|
content = webpage.get("snippet", "") |
|
|
content = webpage.get("snippet", "") |
|
|
|
|
|
|
|
|
# 创建RetrievalResult对象 |
|
|
# Create RetrievalResult object |
|
|
result = RetrievalResult( |
|
|
result = RetrievalResult( |
|
|
embedding=None, # 网页搜索结果没有向量 |
|
|
embedding=None, # Web search results don't have vectors |
|
|
text=content, |
|
|
text=content, |
|
|
reference=webpage.get("link", ""), |
|
|
reference=webpage.get("link", ""), |
|
|
score=1.0 - (i * (1 / size)), # 根据位置计算分数 |
|
|
score=None, # Web search results don't have scores |
|
|
metadata={ |
|
|
metadata={ |
|
|
"title": webpage.get("title", ""), |
|
|
"title": webpage.get("title", ""), |
|
|
"date": webpage.get("date", ""), |
|
|
"date": webpage.get("date", ""), |
|
@ -80,11 +80,11 @@ class WebSearch: |
|
|
) |
|
|
) |
|
|
results.append(result) |
|
|
results.append(result) |
|
|
|
|
|
|
|
|
log.info(f"网页搜索成功,找到 {len(results)} 个结果") |
|
|
log.info(f"Web search successful, found {len(results)} results") |
|
|
return results |
|
|
return results |
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
except Exception as e: |
|
|
log.error(f"网页搜索出错: {str(e)}") |
|
|
log.error(f"Web search error: {str(e)}") |
|
|
return [] |
|
|
return [] |
|
|
finally: |
|
|
finally: |
|
|
if 'conn' in locals(): |
|
|
if 'conn' in locals(): |
|
@ -92,13 +92,13 @@ class WebSearch: |
|
|
|
|
|
|
|
|
def search_with_retry(self, query: str, size: int = 4, max_retries: int = 3) -> list[RetrievalResult]: |
|
|
def search_with_retry(self, query: str, size: int = 4, max_retries: int = 3) -> list[RetrievalResult]: |
|
|
""" |
|
|
""" |
|
|
带重试机制的网页搜索 |
|
|
Web search with retry mechanism |
|
|
Args: |
|
|
Args: |
|
|
query (str): 搜索查询 |
|
|
query (str): Search query |
|
|
size (int): 返回结果数量 |
|
|
size (int): Number of results to return |
|
|
max_retries (int): 最大重试次数 |
|
|
max_retries (int): Maximum number of retries |
|
|
Returns: |
|
|
Returns: |
|
|
List[RetrievalResult]: 搜索结果列表 |
|
|
List[RetrievalResult]: List of search results |
|
|
""" |
|
|
""" |
|
|
for attempt in range(max_retries): |
|
|
for attempt in range(max_retries): |
|
|
try: |
|
|
try: |
|
@ -106,8 +106,8 @@ class WebSearch: |
|
|
if results: |
|
|
if results: |
|
|
return results |
|
|
return results |
|
|
except Exception as e: |
|
|
except Exception as e: |
|
|
log.warning(f"网页搜索第 {attempt + 1} 次尝试失败: {str(e)}") |
|
|
log.warning(f"Web search attempt {attempt + 1} failed: {str(e)}") |
|
|
if attempt < max_retries - 1: |
|
|
if attempt < max_retries - 1: |
|
|
time.sleep(1) # 等待1秒后重试 |
|
|
time.sleep(1) # Wait 1 second before retrying |
|
|
log.error(f"网页搜索在 {max_retries} 次尝试后仍然失败") |
|
|
log.error(f"Web search failed after {max_retries} attempts") |
|
|
return [] |
|
|
return [] |
|
|