fix: 添加markdown引用使本地文件的脚注可以正常跳转

5 days ago · 7306c9b37d
2 changed files with 164 additions and 8 deletions
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@ -136,7 +136,7 @@ class DeepSearch(BaseAgent):
        max_iter: int,
        route_collection: bool = False,
        text_window_splitter: bool = True,
-        web_search: bool = True,
+        web_search: bool = False,
        **kwargs,
    ):
        """
@ -255,7 +255,9 @@ class DeepSearch(BaseAgent):
            else:
                send_info("网页搜索未找到相关结果")
-        retrieved_results = vector_results + web_results
+            retrieved_results = vector_results + web_results
        else:
            retrieved_results = vector_results
        # Format all chunks for batch processing
        chunks, _ = self._format_chunks(retrieved_results)
@ -470,7 +472,7 @@ class DeepSearch(BaseAgent):
                    absolute_path = str(Path(reference).resolve())
                    encoded_path = urllib.parse.quote(absolute_path, safe='')
                    # 使用相对路径，这样可以在不同的服务器配置下工作
-                    formated_refs.append(f"[^{i + 1}]: /file/{encoded_path}\n")
+                    formated_refs.append(f"[^{i + 1}]: [/file/{encoded_path}](/file/{encoded_path})\n")
                except Exception as _:
                    formated_refs.append(f"[^{i + 1}]: {reference}\n")
--- a/main.py
+++ b/main.py
@ -3,7 +3,7 @@ import argparse
 import uvicorn
 from fastapi import Body, FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse
+from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse, PlainTextResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 import os
@ -376,7 +376,7 @@ def clear_messages():
@app.get("/file/{file_path:path}")
-def serve_file(file_path: str):
+def serve_file(file_path: str, download: bool = Query(False, description="Whether to download the file")):
    """
    Serve local files for file:// URIs in generated reports.
@ -388,12 +388,13 @@ def serve_file(file_path: str):
        file_path (str): The URL-encoded file path
    Returns:
-        FileResponse: The file content or an error response
+        HTMLResponse or PlainTextResponse: The file content displayed in browser
    Raises:
        HTTPException: If the file is not found or access is denied
    """
    import urllib.parse
    import mimetypes
    from pathlib import Path
    try:
@ -415,15 +416,168 @@ def serve_file(file_path: str):
        if not file_path_obj.is_file():
            raise HTTPException(status_code=400, detail=f"Path is not a file: {decoded_path}")
-        # 尝试读取文件并返回
+        # 如果请求下载，直接返回文件
-        try:
+        if download:
            return FileResponse(
                path=str(file_path_obj),
                filename=file_path_obj.name,
                media_type='application/octet-stream'
            )
        # 尝试读取文件内容
        try:
            with open(file_path_obj, 'r', encoding='utf-8') as f:
                content = f.read()
        except UnicodeDecodeError:
            # 如果UTF-8解码失败，尝试其他编码
            try:
                with open(file_path_obj, 'r', encoding='latin-1') as f:
                    content = f.read()
            except Exception as e:
                raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
        # 获取文件类型
        mime_type, _ = mimetypes.guess_type(str(file_path_obj))
        # 根据文件类型决定如何显示
        if mime_type and mime_type.startswith('text/'):
            # 文本文件直接在浏览器中显示
            return PlainTextResponse(content=content, media_type=mime_type)
        else:
            # 其他文件类型创建HTML页面显示
            html_content = f"""
 <!DOCTYPE html>
 <html lang="zh-CN">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>文件查看器 - {file_path_obj.name}</title>
    <style>
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f5f5f5;
        }}
        .container {{
            max-width: 1200px;
            margin: 0 auto;
            background-color: white;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            overflow: hidden;
        }}
        .header {{
            background-color: #2c3e50;
            color: white;
            padding: 15px 20px;
            display: flex;
            justify-content: space-between;
            align-items: center;
        }}
        .header h1 {{
            margin: 0;
            font-size: 1.5em;
        }}
        .file-info {{
            font-size: 0.9em;
            opacity: 0.8;
        }}
        .content {{
            padding: 20px;
        }}
        .download-btn {{
            background-color: #3498db;
            color: white;
            border: none;
            padding: 8px 16px;
            border-radius: 4px;
            cursor: pointer;
            text-decoration: none;
            display: inline-block;
            margin-left: 10px;
        }}
        .download-btn:hover {{
            background-color: #2980b9;
        }}
        pre {{
            background-color: #f8f9fa;
            border: 1px solid #e9ecef;
            border-radius: 4px;
            padding: 15px;
            overflow-x: auto;
            white-space: pre-wrap;
            word-wrap: break-word;
            font-family: 'Courier New', monospace;
            font-size: 14px;
            line-height: 1.5;
        }}
        .binary-notice {{
            background-color: #fff3cd;
            border: 1px solid #ffeaa7;
            border-radius: 4px;
            padding: 15px;
            margin-bottom: 20px;
            color: #856404;
        }}
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header">
            <div>
                <h1>{file_path_obj.name}</h1>
                <div class="file-info">
                    路径: {decoded_path}<br>
                    大小: {file_path_obj.stat().st_size:,} 字节
                </div>
            </div>
            <a href="/file/{file_path}?download=true" class="download-btn">下载文件</a>
        </div>
        <div class="content">
 """
            # 检查是否为二进制文件
            try:
                # 尝试读取前1024字节来检测是否为二进制文件
                with open(file_path_obj, 'rb') as f:
                    sample = f.read(1024)
                    # 检查是否包含null字节，这是二进制文件的特征
                    if b'\x00' in sample:
                        html_content += f"""
            <div class="binary-notice">
                <strong>注意：</strong>这是一个二进制文件，无法在浏览器中直接显示内容。
            </div>
 """
                    else:
                        # 尝试以文本形式显示
                        try:
                            text_content = sample.decode('utf-8')
                            html_content += f"""
            <pre>{text_content}</pre>
 """
                        except UnicodeDecodeError:
                            html_content += f"""
            <div class="binary-notice">
                <strong>注意：</strong>此文件包含非文本内容，无法在浏览器中直接显示。
            </div>
 """
            except Exception:
                html_content += f"""
            <div class="binary-notice">
                <strong>注意：</strong>无法读取文件内容。
            </div>
 """
            html_content += """
        </div>
    </div>
 </body>
 </html>
 """
            return HTMLResponse(content=html_content)
    except HTTPException:
        # 重新抛出HTTP异常