feat: 添加latex主题文字

refactor: 优化前端到后端的参数传递链
7 hours ago · fc7ac11c5e
17 changed files with 180 additions and 199 deletions
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@ -20,7 +20,7 @@ COLLECTION_ROUTE_PROMPT = """
 "合集信息": {collection_info}

 使用的语言与问题相同
-你需要返回的格式是 a python list of str without any addtional content:
+你需要返回的格式是 a python list[str] without any addtional content:
 """


@ -29,7 +29,12 @@ SUB_QUERY_PROMPT = """
 请你使用自顶向下和自底向上两种方向来思考如何拆分问题
 子问题的数量不可以太多，但是也不可以太少，应当保证问题的回答全面性，请根据问题复杂程度来决定子问题的数量
 如果原问题本身非常简单，没有必要进行拆分，则保留输出原问题本身
-需要保证每个子问题都具体、清晰、不可分（原子性，即不可以再包含更细分的子问题），子问题中不要包含"请你回答"、"请你总结"、"请你分析"等祈使类型词语
+需要保证每个子问题都具体、清晰、不可分（原子性，即不可以再包含更细分的子问题），但是也不可以过于片面狭窄，否则会降低回答的全面性
+例如对于机器学习，如果问题是"XGBoost是什么"，那么子问题:
+"XGBoost和其他Boosting算法有什么区别"(正确)
+"XGBoost和Gradient Boost有什么区别"（错误）
+"XGBoost和其他Boosting算法有什么区别（如AdaBoost、Gradient Boost）"（错误）
+同时，子问题中不要包含"请你回答"、"请你总结"、"请你分析"等祈使类型词语
 你需要最终返回一个字符串列表

 原问题: {original_query}
@ -43,14 +48,14 @@ SUB_QUERY_PROMPT = """
    "什么是机器学习?",
    "机器学习的使用目的",
    "机器学习的常用算法",
-    "机器学习的历史演进过程",
-    "机器学习和深度学习的区别是什么？"
+    "机器学习算法的演进过程",
+    "机器学习和现在流行的深度学习的区别是什么？"
 ]

 </EXAMPLE>

 使用的语言与原问题相同
-你需要返回的是 a python list of str without any addtional content:
+你需要返回的是 a python list[str] without any addtional content:
 """


@ -66,7 +71,7 @@ RERANK_PROMPT = """

 例如，假如给出4个chunks（实际检索到的文档片段不一定是这么多），返回4个"True"或者"False"（注意这只是一个示例，不代表实际判断）: ["True", "False", "True", "True"]
 使用的语言与问题相同
-你需要返回的是 a python list of str(bool) without any addtional content:
+你需要返回的是 a python list[str(bool)] without any addtional content:
 """


@ -85,7 +90,7 @@ REFLECT_PROMPT = """
 {chunks}

 使用的语言与原问题相同
-你需要返回的是 a python list of str without any addtional content:
+你需要返回的是 a python list[str] without any addtional content:
 """


@ -93,6 +98,7 @@ SUMMARY_PROMPT = """
 你是一个内容分析专家
 请你综合已经提出的问题和检索到的信息，以原问题为中心，生成详细准确、层次分明（多级标题，从一级开始）、尽可能长的回答。
 如果检索到的信息不足以回答问题，你应该使用你的知识来进行扩展补充。
+如果检索到的文档片段中有其他有用的信息，但是没有在之前的问题中被提出，你也应该添加进最终回答中。
 注意，不要逐个回答问题，而是应该综合所有问题和信息，生成一个完整的回答。
 同时，你应该根据提供的信息生成文内引用"[^index]"(markdown文内引用)。
 来自<chunk><reference>的引用序号从[^index]从index=1开始，来源需要与前文<reference>中的"id"一致。
@ -248,9 +254,9 @@ class DeepSearch(BaseAgent):
            send_info(f"本地向量搜索找到 {len(vector_results)} 个结果")

        # 网页搜索
-        self.web_search = WebSearch() if kwargs.get('web_search', False) else None
+        self.web_search = True if kwargs.get('web_search', False) else None
        if self.web_search:
-            web_results = self.web_search.search_with_retry(query, size=2)
+            web_results = WebSearch().search_with_retry(query, size=4)
            if web_results:
                send_info(f"网页搜索找到 {len(web_results)} 个结果")
            else:
@ -473,7 +479,7 @@ class DeepSearch(BaseAgent):
                    absolute_path = str(Path(reference).resolve())
                    encoded_path = urllib.parse.quote(absolute_path, safe='')
                    # 使用相对路径，这样可以在不同的服务器配置下工作
-                    formated_refs.append(f"[^{i + 1}]: [/file/{encoded_path}](/file/{encoded_path})\n")
+                    formated_refs.append(f"[^{i + 1}]: [{absolute_path}](/file/{encoded_path})\n")
                except Exception as _:
                    formated_refs.append(f"[^{i + 1}]: {reference}\n")

--- a/deepsearcher/config.yaml
+++ b/deepsearcher/config.yaml
@ -2,12 +2,12 @@ provide_settings:
  llm:
    provider: "OpenAILLM"
    config:
-      # model: "Qwen/Qwen3-32B"
-      # api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt"
-      #base_url: "https://api.siliconflow.cn/v1"
-      model: qwen3-32b
-      api_key: sk-14f39f0c530d4aa0b5588454bff859d6
-      base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+      model: "Qwen/Qwen3-32B"
+      api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt"
+      base_url: "https://api.siliconflow.cn/v1"
+      # model: qwen3-32b
+      # api_key: sk-14f39f0c530d4aa0b5588454bff859d6
+      # base_url: https://dashscope.aliyuncs.com/compatible-mode/v1

  embedding:
    provider: "OpenAIEmbedding"
--- a/deepsearcher/online_query.py
+++ b/deepsearcher/online_query.py
@ -23,24 +23,3 @@ def query(original_query: str, **kwargs) -> tuple[str, list[RetrievalResult]]:
    max_iter = kwargs.get("max_iter", 3)
    web_search = kwargs.get("web_search", False)
    return default_searcher.query(original_query, max_iter=max_iter, web_search=web_search)
-
-
-def retrieve(original_query: str, max_iter: int | None = None) -> tuple[list[RetrievalResult], list[str]]:
-    """
-    Retrieve relevant information from the knowledge base without generating an answer.
-
-    This function uses the default searcher to retrieve information from the knowledge base
-    that is relevant to the query.
-
-    Args:
-        original_query: The question or query to search for.
-        max_iter: Maximum number of iterations for the search process.
-
-    Returns:
-        A tuple containing:
-            - A list of retrieval results
-            - A list of strings representing consumed tokens
-    """
-    default_searcher = configuration.default_searcher
-    retrieved_results, metadata = default_searcher.retrieve(original_query, max_iter=max_iter)
-    return retrieved_results
--- a/deepsearcher/templates/static/themes/Readme.md
+++ b/deepsearcher/templates/static/themes/Readme.md
@ -1,4 +0,0 @@
-The built-in CSS will be replaced after update / reinstall, DO NOT MODIFY THEM.
-
-Refer https://support.typora.io/Add-Custom-CSS/ when you want to modify those CSS.
-Refer https://support.typora.io/About-Themes/ if you want to create / install new themes. 
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-105-Heavy.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-105-Heavy.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-115-Black.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-115-Black.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-35-Thin.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-35-Thin.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-45-Light.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-45-Light.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-55-Regular.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-55-Regular.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-65-Medium.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-65-Medium.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-75-SemiBold.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-75-SemiBold.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-85-Bold.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-85-Bold.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-95-ExtraBold.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-95-ExtraBold.ttf
--- a/deepsearcher/templates/static/themes/latex/STSongti-SC-Black.ttf
+++ b/deepsearcher/templates/static/themes/latex/STSongti-SC-Black.ttf
--- a/deepsearcher/templates/static/themes/latex/STSongti-SC-Regular.ttf
+++ b/deepsearcher/templates/static/themes/latex/STSongti-SC-Regular.ttf
--- a/deepsearcher/web_search.py
+++ b/deepsearcher/web_search.py
@ -6,12 +6,12 @@ from deepsearcher.utils import log


 class WebSearch:
-    """网页搜索类，用于调用metaso.cn API进行网页搜索"""
+    """Web search class for calling metaso.cn API to perform web searches"""
    def __init__(self, api_key: str = "mk-CCEA085159C048597435780530A55403"):
        """
-        初始化网页搜索
+        Initialize web search
        Args:
-            api_key (str): metaso.cn API密钥
+            api_key (str): metaso.cn API key
        """
        self.api_key = api_key
        self.base_url = "metaso.cn"
@ -19,15 +19,15 @@ class WebSearch:

    def search(self, query: str, size: int = 4) -> list[RetrievalResult]:
        """
-        执行网页搜索
+        Execute web search
        Args:
-            query (str): 搜索查询
-            size (int): 返回结果数量，默认为4
+            query (str): Search query
+            size (int): Number of results to return, default is 4
        Returns:
-            List[RetrievalResult]: 搜索结果列表
+            List[RetrievalResult]: List of search results
        """
        try:
-            # 构建请求数据
+            # Build request data
            payload = json.dumps({
                "q": query,
                "scope": "webpage",
@ -43,33 +43,33 @@ class WebSearch:
                'Content-Type': 'application/json'
            }

-            # 发送请求
+            # Send request
            conn = http.client.HTTPSConnection(self.base_url)
            conn.request("POST", self.endpoint, payload, headers)
            res = conn.getresponse()
            data = res.read()

            if res.status != 200:
-                log.error(f"网页搜索请求失败: {res.status} - {data.decode('utf-8')}")
+                log.error(f"Web search request failed: {res.status} - {data.decode('utf-8')}")
                return []

            response_data = json.loads(data.decode("utf-8"))

-            # 解析搜索结果
+            # Parse search results
            results = []
            if "webpages" in response_data:
                for i, webpage in enumerate(response_data["webpages"]):
-                    # 使用content字段作为主要文本内容
+                    # Use content field as primary text content
                    content = webpage.get("content", "")
                    if not content:
                        content = webpage.get("snippet", "")

-                    # 创建RetrievalResult对象
+                    # Create RetrievalResult object
                    result = RetrievalResult(
-                        embedding=None,  # 网页搜索结果没有向量
+                        embedding=None,  # Web search results don't have vectors
                        text=content,
                        reference=webpage.get("link", ""),
-                        score=1.0 - (i * (1 / size)),  # 根据位置计算分数
+                        score=None,  # Web search results don't have scores
                        metadata={
                            "title": webpage.get("title", ""),
                            "date": webpage.get("date", ""),
@ -80,11 +80,11 @@ class WebSearch:
                    )
                    results.append(result)

-            log.info(f"网页搜索成功，找到 {len(results)} 个结果")
+            log.info(f"Web search successful, found {len(results)} results")
            return results

        except Exception as e:
-            log.error(f"网页搜索出错: {str(e)}")
+            log.error(f"Web search error: {str(e)}")
            return []
        finally:
            if 'conn' in locals():
@ -92,13 +92,13 @@ class WebSearch:

    def search_with_retry(self, query: str, size: int = 4, max_retries: int = 3) -> list[RetrievalResult]:
        """
-        带重试机制的网页搜索
+        Web search with retry mechanism
        Args:
-            query (str): 搜索查询
-            size (int): 返回结果数量
-            max_retries (int): 最大重试次数
+            query (str): Search query
+            size (int): Number of results to return
+            max_retries (int): Maximum number of retries
        Returns:
-            List[RetrievalResult]: 搜索结果列表
+            List[RetrievalResult]: List of search results
        """
        for attempt in range(max_retries):
            try:
@ -106,8 +106,8 @@ class WebSearch:
                if results:
                    return results
            except Exception as e:
-                log.warning(f"网页搜索第 {attempt + 1} 次尝试失败: {str(e)}")
+                log.warning(f"Web search attempt {attempt + 1} failed: {str(e)}")
                if attempt < max_retries - 1:
-                    time.sleep(1)  # 等待1秒后重试
-        log.error(f"网页搜索在 {max_retries} 次尝试后仍然失败")
+                    time.sleep(1)  # Wait 1 second before retrying
+        log.error(f"Web search failed after {max_retries} attempts")
        return []
--- a/main.py
+++ b/main.py
@ -141,7 +141,7 @@ def load_files(
            batch_size=batch_size if batch_size is not None else 8,
            force_rebuild=force_rebuild,
        )
-        return {"message": "成功加载"}
+        return {"message": "加载完成"}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@ -432,12 +432,12 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe

        # 尝试读取文件内容
        try:
-            with open(file_path_obj, 'r', encoding='utf-8') as f:
+            with open(file_path_obj, encoding='utf-8') as f:
                content = f.read()
        except UnicodeDecodeError:
            # 如果UTF-8解码失败，尝试其他编码
            try:
-                with open(file_path_obj, 'r', encoding='latin-1') as f:
+                with open(file_path_obj, encoding='latin-1') as f:
                    content = f.read()
            except Exception as e:
                raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
@ -454,9 +454,9 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
        else:
            # 其他文件类型创建HTML页面显示
            html_content = f"""
-<!DOCTYPE html>
-<html lang="zh-CN">
-<head>
+                <!DOCTYPE html>
+                <html lang="zh-CN">
+                <head>
                    <meta charset="UTF-8">
                    <meta name="viewport" content="width=device-width, initial-scale=1.0">
                    <title>文件查看器 - {file_path_obj.name}</title>
@ -529,8 +529,8 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
                            color: #856404;
                        }}
                    </style>
-</head>
-<body>
+                </head>
+                <body>
                    <div class="container">
                        <div class="header">
                            <div>
@ -543,7 +543,7 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
                            <a href="/file/{file_path}?download=true" class="download-btn">下载文件</a>
                        </div>
                        <div class="content">
-"""
+            """

            # 检查是否为二进制文件
            try:
@ -552,37 +552,37 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
                    sample = f.read(1024)
                    # 检查是否包含null字节，这是二进制文件的特征
                    if b'\x00' in sample:
-                        html_content += f"""
+                        html_content += """
                            <div class="binary-notice">
                                <strong>注意：</strong>这是一个二进制文件，无法在浏览器中直接显示内容。
                            </div>
-"""
+                        """
                    else:
                        # 尝试以文本形式显示
                        try:
                            text_content = sample.decode('utf-8')
                            html_content += f"""
                                <pre>{text_content}</pre>
-"""
+                            """
                        except UnicodeDecodeError:
-                            html_content += f"""
+                            html_content += """
                                <div class="binary-notice">
                                    <strong>注意：</strong>此文件包含非文本内容，无法在浏览器中直接显示。
                                </div>
-"""
+                            """
            except Exception:
-                html_content += f"""
+                html_content += """
                            <div class="binary-notice">
                                <strong>注意：</strong>无法读取文件内容。
                            </div>
-"""
+                        """

            html_content += """
                        </div>
                    </div>
-</body>
-</html>
-"""
+                </body>
+                </html>
+            """
            return HTMLResponse(content=html_content)

    except HTTPException: