import http.client
import json
import time
from deepsearcher.vector_db import RetrievalResult
from deepsearcher.utils import log


class WebSearch:
    """网页搜索类，用于调用metaso.cn API进行网页搜索"""
    def __init__(self, api_key: str = "mk-CCEA085159C048597435780530A55403"):
        """
        初始化网页搜索
        Args:
            api_key (str): metaso.cn API密钥
        """
        self.api_key = api_key
        self.base_url = "metaso.cn"
        self.endpoint = "/api/v1/search"

    def search(self, query: str, size: int = 4) -> list[RetrievalResult]:
        """
        执行网页搜索
        Args:
            query (str): 搜索查询
            size (int): 返回结果数量，默认为4
        Returns:
            List[RetrievalResult]: 搜索结果列表
        """
        try:
            # 构建请求数据
            payload = json.dumps({
                "q": query,
                "scope": "webpage",
                "includeSummary": False,
                "size": str(size),
                "includeRawContent": True,
                "conciseSnippet": True
            })

            headers = {
                'Authorization': f'Bearer {self.api_key}',
                'Accept': 'application/json',
                'Content-Type': 'application/json'
            }

            # 发送请求
            conn = http.client.HTTPSConnection(self.base_url)
            conn.request("POST", self.endpoint, payload, headers)
            res = conn.getresponse()
            data = res.read()

            if res.status != 200:
                log.error(f"网页搜索请求失败: {res.status} - {data.decode('utf-8')}")
                return []

            response_data = json.loads(data.decode("utf-8"))

            # 解析搜索结果
            results = []
            if "webpages" in response_data:
                for i, webpage in enumerate(response_data["webpages"]):
                    # 使用content字段作为主要文本内容
                    content = webpage.get("content", "")
                    if not content:
                        content = webpage.get("snippet", "")

                    # 创建RetrievalResult对象
                    result = RetrievalResult(
                        embedding=None,  # 网页搜索结果没有向量
                        text=content,
                        reference=webpage.get("link", ""),
                        score=1.0 - (i * (1 / size)),  # 根据位置计算分数
                        metadata={
                            "title": webpage.get("title", ""),
                            "date": webpage.get("date", ""),
                            "authors": webpage.get("authors", []),
                            "position": webpage.get("position", i + 1),
                            "source": "webpage"
                        }
                    )
                    results.append(result)

            log.info(f"网页搜索成功，找到 {len(results)} 个结果")
            return results

        except Exception as e:
            log.error(f"网页搜索出错: {str(e)}")
            return []
        finally:
            if 'conn' in locals():
                conn.close()

    def search_with_retry(self, query: str, size: int = 4, max_retries: int = 3) -> list[RetrievalResult]:
        """
        带重试机制的网页搜索
        Args:
            query (str): 搜索查询
            size (int): 返回结果数量
            max_retries (int): 最大重试次数
        Returns:
            List[RetrievalResult]: 搜索结果列表
        """
        for attempt in range(max_retries):
            try:
                results = self.search(query, size)
                if results:
                    return results
            except Exception as e:
                log.warning(f"网页搜索第 {attempt + 1} 次尝试失败: {str(e)}")
                if attempt < max_retries - 1:
                    time.sleep(1)  # 等待1秒后重试
        log.error(f"网页搜索在 {max_retries} 次尝试后仍然失败")
        return []