feat: 添加latex主题文字

refactor: 优化前端到后端的参数传递链
10 hours ago · fc7ac11c5e
17 changed files with 180 additions and 199 deletions
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@ -20,7 +20,7 @@ COLLECTION_ROUTE_PROMPT = """
 "合集信息": {collection_info}
 使用的语言与问题相同
-你需要返回的格式是 a python list of str without any addtional content:
+你需要返回的格式是 a python list[str] without any addtional content:
 """
@ -29,7 +29,12 @@ SUB_QUERY_PROMPT = """
 请你使用自顶向下和自底向上两种方向来思考如何拆分问题
 子问题的数量不可以太多，但是也不可以太少，应当保证问题的回答全面性，请根据问题复杂程度来决定子问题的数量
 如果原问题本身非常简单，没有必要进行拆分，则保留输出原问题本身
-需要保证每个子问题都具体、清晰、不可分（原子性，即不可以再包含更细分的子问题），子问题中不要包含"请你回答"、"请你总结"、"请你分析"等祈使类型词语
+需要保证每个子问题都具体、清晰、不可分（原子性，即不可以再包含更细分的子问题），但是也不可以过于片面狭窄，否则会降低回答的全面性
 例如对于机器学习，如果问题是"XGBoost是什么"，那么子问题:
 "XGBoost和其他Boosting算法有什么区别"(正确)
 "XGBoost和Gradient Boost有什么区别"（错误）
 "XGBoost和其他Boosting算法有什么区别（如AdaBoost、Gradient Boost）"（错误）
 同时，子问题中不要包含"请你回答"、"请你总结"、"请你分析"等祈使类型词语
 你需要最终返回一个字符串列表
 原问题: {original_query}
@ -43,14 +48,14 @@ SUB_QUERY_PROMPT = """
    "什么是机器学习?",
    "机器学习的使用目的",
    "机器学习的常用算法",
-    "机器学习的历史演进过程",
+    "机器学习算法的演进过程",
-    "机器学习和深度学习的区别是什么？"
+    "机器学习和现在流行的深度学习的区别是什么？"
 ]
 </EXAMPLE>
 使用的语言与原问题相同
-你需要返回的是 a python list of str without any addtional content:
+你需要返回的是 a python list[str] without any addtional content:
 """
@ -66,7 +71,7 @@ RERANK_PROMPT = """
 例如，假如给出4个chunks（实际检索到的文档片段不一定是这么多），返回4个"True"或者"False"（注意这只是一个示例，不代表实际判断）: ["True", "False", "True", "True"]
 使用的语言与问题相同
-你需要返回的是 a python list of str(bool) without any addtional content:
+你需要返回的是 a python list[str(bool)] without any addtional content:
 """
@ -85,7 +90,7 @@ REFLECT_PROMPT = """
 {chunks}
 使用的语言与原问题相同
-你需要返回的是 a python list of str without any addtional content:
+你需要返回的是 a python list[str] without any addtional content:
 """
@ -93,6 +98,7 @@ SUMMARY_PROMPT = """
 你是一个内容分析专家
 请你综合已经提出的问题和检索到的信息，以原问题为中心，生成详细准确、层次分明（多级标题，从一级开始）、尽可能长的回答。
 如果检索到的信息不足以回答问题，你应该使用你的知识来进行扩展补充。
 如果检索到的文档片段中有其他有用的信息，但是没有在之前的问题中被提出，你也应该添加进最终回答中。
 注意，不要逐个回答问题，而是应该综合所有问题和信息，生成一个完整的回答。
 同时，你应该根据提供的信息生成文内引用"[^index]"(markdown文内引用)。
 来自<chunk><reference>的引用序号从[^index]从index=1开始，来源需要与前文<reference>中的"id"一致。
@ -248,9 +254,9 @@ class DeepSearch(BaseAgent):
            send_info(f"本地向量搜索找到 {len(vector_results)} 个结果")
        # 网页搜索
-        self.web_search = WebSearch() if kwargs.get('web_search', False) else None
+        self.web_search = True if kwargs.get('web_search', False) else None
        if self.web_search:
-            web_results = self.web_search.search_with_retry(query, size=2)
+            web_results = WebSearch().search_with_retry(query, size=4)
            if web_results:
                send_info(f"网页搜索找到 {len(web_results)} 个结果")
            else:
@ -473,7 +479,7 @@ class DeepSearch(BaseAgent):
                    absolute_path = str(Path(reference).resolve())
                    encoded_path = urllib.parse.quote(absolute_path, safe='')
                    # 使用相对路径，这样可以在不同的服务器配置下工作
-                    formated_refs.append(f"[^{i + 1}]: [/file/{encoded_path}](/file/{encoded_path})\n")
+                    formated_refs.append(f"[^{i + 1}]: [{absolute_path}](/file/{encoded_path})\n")
                except Exception as _:
                    formated_refs.append(f"[^{i + 1}]: {reference}\n")
--- a/deepsearcher/config.yaml
+++ b/deepsearcher/config.yaml
@ -2,12 +2,12 @@ provide_settings:
  llm:
    provider: "OpenAILLM"
    config:
-      # model: "Qwen/Qwen3-32B"
+      model: "Qwen/Qwen3-32B"
-      # api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt"
+      api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt"
-      #base_url: "https://api.siliconflow.cn/v1"
+      base_url: "https://api.siliconflow.cn/v1"
-      model: qwen3-32b
+      # model: qwen3-32b
-      api_key: sk-14f39f0c530d4aa0b5588454bff859d6
+      # api_key: sk-14f39f0c530d4aa0b5588454bff859d6
-      base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+      # base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
  embedding:
    provider: "OpenAIEmbedding"
--- a/deepsearcher/online_query.py
+++ b/deepsearcher/online_query.py
@ -23,24 +23,3 @@ def query(original_query: str, **kwargs) -> tuple[str, list[RetrievalResult]]:
    max_iter = kwargs.get("max_iter", 3)
    web_search = kwargs.get("web_search", False)
    return default_searcher.query(original_query, max_iter=max_iter, web_search=web_search)
 def retrieve(original_query: str, max_iter: int | None = None) -> tuple[list[RetrievalResult], list[str]]:
    """
    Retrieve relevant information from the knowledge base without generating an answer.
    This function uses the default searcher to retrieve information from the knowledge base
    that is relevant to the query.
    Args:
        original_query: The question or query to search for.
        max_iter: Maximum number of iterations for the search process.
    Returns:
        A tuple containing:
            - A list of retrieval results
            - A list of strings representing consumed tokens
    """
    default_searcher = configuration.default_searcher
    retrieved_results, metadata = default_searcher.retrieve(original_query, max_iter=max_iter)
    return retrieved_results
--- a/deepsearcher/templates/static/themes/Readme.md
+++ b/deepsearcher/templates/static/themes/Readme.md
@ -1,4 +0,0 @@
 The built-in CSS will be replaced after update / reinstall, DO NOT MODIFY THEM.
 Refer https://support.typora.io/Add-Custom-CSS/ when you want to modify those CSS.
 Refer https://support.typora.io/About-Themes/ if you want to create / install new themes. 
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-105-Heavy.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-105-Heavy.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-115-Black.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-115-Black.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-35-Thin.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-35-Thin.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-45-Light.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-45-Light.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-55-Regular.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-55-Regular.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-65-Medium.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-65-Medium.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-75-SemiBold.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-75-SemiBold.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-85-Bold.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-85-Bold.ttf
--- a/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-95-ExtraBold.ttf
+++ b/deepsearcher/templates/static/themes/latex/AlibabaPuHuiTi-2-95-ExtraBold.ttf
--- a/deepsearcher/templates/static/themes/latex/STSongti-SC-Black.ttf
+++ b/deepsearcher/templates/static/themes/latex/STSongti-SC-Black.ttf
--- a/deepsearcher/templates/static/themes/latex/STSongti-SC-Regular.ttf
+++ b/deepsearcher/templates/static/themes/latex/STSongti-SC-Regular.ttf
--- a/deepsearcher/web_search.py
+++ b/deepsearcher/web_search.py
@ -6,12 +6,12 @@ from deepsearcher.utils import log
 class WebSearch:
-    """网页搜索类，用于调用metaso.cn API进行网页搜索"""
+    """Web search class for calling metaso.cn API to perform web searches"""
    def __init__(self, api_key: str = "mk-CCEA085159C048597435780530A55403"):
        """
-        初始化网页搜索
+        Initialize web search
        Args:
-            api_key (str): metaso.cn API密钥
+            api_key (str): metaso.cn API key
        """
        self.api_key = api_key
        self.base_url = "metaso.cn"
@ -19,15 +19,15 @@ class WebSearch:
    def search(self, query: str, size: int = 4) -> list[RetrievalResult]:
        """
-        执行网页搜索
+        Execute web search
        Args:
-            query (str): 搜索查询
+            query (str): Search query
-            size (int): 返回结果数量，默认为4
+            size (int): Number of results to return, default is 4
        Returns:
-            List[RetrievalResult]: 搜索结果列表
+            List[RetrievalResult]: List of search results
        """
        try:
-            # 构建请求数据
+            # Build request data
            payload = json.dumps({
                "q": query,
                "scope": "webpage",
@ -43,33 +43,33 @@ class WebSearch:
                'Content-Type': 'application/json'
            }
-            # 发送请求
+            # Send request
            conn = http.client.HTTPSConnection(self.base_url)
            conn.request("POST", self.endpoint, payload, headers)
            res = conn.getresponse()
            data = res.read()
            if res.status != 200:
-                log.error(f"网页搜索请求失败: {res.status} - {data.decode('utf-8')}")
+                log.error(f"Web search request failed: {res.status} - {data.decode('utf-8')}")
                return []
            response_data = json.loads(data.decode("utf-8"))
-            # 解析搜索结果
+            # Parse search results
            results = []
            if "webpages" in response_data:
                for i, webpage in enumerate(response_data["webpages"]):
-                    # 使用content字段作为主要文本内容
+                    # Use content field as primary text content
                    content = webpage.get("content", "")
                    if not content:
                        content = webpage.get("snippet", "")
-                    # 创建RetrievalResult对象
+                    # Create RetrievalResult object
                    result = RetrievalResult(
-                        embedding=None,  # 网页搜索结果没有向量
+                        embedding=None,  # Web search results don't have vectors
                        text=content,
                        reference=webpage.get("link", ""),
-                        score=1.0 - (i * (1 / size)),  # 根据位置计算分数
+                        score=None,  # Web search results don't have scores
                        metadata={
                            "title": webpage.get("title", ""),
                            "date": webpage.get("date", ""),
@ -80,11 +80,11 @@ class WebSearch:
                    )
                    results.append(result)
-            log.info(f"网页搜索成功，找到 {len(results)} 个结果")
+            log.info(f"Web search successful, found {len(results)} results")
            return results
        except Exception as e:
-            log.error(f"网页搜索出错: {str(e)}")
+            log.error(f"Web search error: {str(e)}")
            return []
        finally:
            if 'conn' in locals():
@ -92,13 +92,13 @@ class WebSearch:
    def search_with_retry(self, query: str, size: int = 4, max_retries: int = 3) -> list[RetrievalResult]:
        """
-        带重试机制的网页搜索
+        Web search with retry mechanism
        Args:
-            query (str): 搜索查询
+            query (str): Search query
-            size (int): 返回结果数量
+            size (int): Number of results to return
-            max_retries (int): 最大重试次数
+            max_retries (int): Maximum number of retries
        Returns:
-            List[RetrievalResult]: 搜索结果列表
+            List[RetrievalResult]: List of search results
        """
        for attempt in range(max_retries):
            try:
@ -106,8 +106,8 @@ class WebSearch:
                if results:
                    return results
            except Exception as e:
-                log.warning(f"网页搜索第 {attempt + 1} 次尝试失败: {str(e)}")
+                log.warning(f"Web search attempt {attempt + 1} failed: {str(e)}")
                if attempt < max_retries - 1:
-                    time.sleep(1)  # 等待1秒后重试
+                    time.sleep(1)  # Wait 1 second before retrying
-        log.error(f"网页搜索在 {max_retries} 次尝试后仍然失败")
+        log.error(f"Web search failed after {max_retries} attempts")
        return []
--- a/main.py
+++ b/main.py
@ -141,7 +141,7 @@ def load_files(
            batch_size=batch_size if batch_size is not None else 8,
            force_rebuild=force_rebuild,
        )
-        return {"message": "成功加载"}
+        return {"message": "加载完成"}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@ -432,12 +432,12 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
        # 尝试读取文件内容
        try:
-            with open(file_path_obj, 'r', encoding='utf-8') as f:
+            with open(file_path_obj, encoding='utf-8') as f:
                content = f.read()
        except UnicodeDecodeError:
            # 如果UTF-8解码失败，尝试其他编码
            try:
-                with open(file_path_obj, 'r', encoding='latin-1') as f:
+                with open(file_path_obj, encoding='latin-1') as f:
                    content = f.read()
            except Exception as e:
                raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
@ -552,7 +552,7 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
                    sample = f.read(1024)
                    # 检查是否包含null字节，这是二进制文件的特征
                    if b'\x00' in sample:
-                        html_content += f"""
+                        html_content += """
                            <div class="binary-notice">
                                <strong>注意：</strong>这是一个二进制文件，无法在浏览器中直接显示内容。
                            </div>
@ -565,13 +565,13 @@ def serve_file(file_path: str, download: bool = Query(False, description="Whethe
                                <pre>{text_content}</pre>
                            """
                        except UnicodeDecodeError:
-                            html_content += f"""
+                            html_content += """
                                <div class="binary-notice">
                                    <strong>注意：</strong>此文件包含非文本内容，无法在浏览器中直接显示。
                                </div>
                            """
            except Exception:
-                html_content += f"""
+                html_content += """
                            <div class="binary-notice">
                                <strong>注意：</strong>无法读取文件内容。
                            </div>