Browse Source

fix: 添加markdown引用使本地文件的脚注可以正常跳转

main
tanxing 5 days ago
parent
commit
7306c9b37d
  1. 8
      deepsearcher/agent/deep_search.py
  2. 164
      main.py

8
deepsearcher/agent/deep_search.py

@ -136,7 +136,7 @@ class DeepSearch(BaseAgent):
max_iter: int, max_iter: int,
route_collection: bool = False, route_collection: bool = False,
text_window_splitter: bool = True, text_window_splitter: bool = True,
web_search: bool = True, web_search: bool = False,
**kwargs, **kwargs,
): ):
""" """
@ -255,7 +255,9 @@ class DeepSearch(BaseAgent):
else: else:
send_info("网页搜索未找到相关结果") send_info("网页搜索未找到相关结果")
retrieved_results = vector_results + web_results retrieved_results = vector_results + web_results
else:
retrieved_results = vector_results
# Format all chunks for batch processing # Format all chunks for batch processing
chunks, _ = self._format_chunks(retrieved_results) chunks, _ = self._format_chunks(retrieved_results)
@ -470,7 +472,7 @@ class DeepSearch(BaseAgent):
absolute_path = str(Path(reference).resolve()) absolute_path = str(Path(reference).resolve())
encoded_path = urllib.parse.quote(absolute_path, safe='') encoded_path = urllib.parse.quote(absolute_path, safe='')
# 使用相对路径,这样可以在不同的服务器配置下工作 # 使用相对路径,这样可以在不同的服务器配置下工作
formated_refs.append(f"[^{i + 1}]: /file/{encoded_path}\n") formated_refs.append(f"[^{i + 1}]: [/file/{encoded_path}](/file/{encoded_path})\n")
except Exception as _: except Exception as _:
formated_refs.append(f"[^{i + 1}]: {reference}\n") formated_refs.append(f"[^{i + 1}]: {reference}\n")

164
main.py

@ -3,7 +3,7 @@ import argparse
import uvicorn import uvicorn
from fastapi import Body, FastAPI, HTTPException, Query from fastapi import Body, FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse, PlainTextResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel from pydantic import BaseModel
import os import os
@ -376,7 +376,7 @@ def clear_messages():
@app.get("/file/{file_path:path}") @app.get("/file/{file_path:path}")
def serve_file(file_path: str): def serve_file(file_path: str, download: bool = Query(False, description="Whether to download the file")):
""" """
Serve local files for file:// URIs in generated reports. Serve local files for file:// URIs in generated reports.
@ -388,12 +388,13 @@ def serve_file(file_path: str):
file_path (str): The URL-encoded file path file_path (str): The URL-encoded file path
Returns: Returns:
FileResponse: The file content or an error response HTMLResponse or PlainTextResponse: The file content displayed in browser
Raises: Raises:
HTTPException: If the file is not found or access is denied HTTPException: If the file is not found or access is denied
""" """
import urllib.parse import urllib.parse
import mimetypes
from pathlib import Path from pathlib import Path
try: try:
@ -415,15 +416,168 @@ def serve_file(file_path: str):
if not file_path_obj.is_file(): if not file_path_obj.is_file():
raise HTTPException(status_code=400, detail=f"Path is not a file: {decoded_path}") raise HTTPException(status_code=400, detail=f"Path is not a file: {decoded_path}")
# 尝试读取文件并返回 # 如果请求下载,直接返回文件
try: if download:
return FileResponse( return FileResponse(
path=str(file_path_obj), path=str(file_path_obj),
filename=file_path_obj.name, filename=file_path_obj.name,
media_type='application/octet-stream' media_type='application/octet-stream'
) )
# 尝试读取文件内容
try:
with open(file_path_obj, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# 如果UTF-8解码失败,尝试其他编码
try:
with open(file_path_obj, 'r', encoding='latin-1') as f:
content = f.read()
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}") raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
# 获取文件类型
mime_type, _ = mimetypes.guess_type(str(file_path_obj))
# 根据文件类型决定如何显示
if mime_type and mime_type.startswith('text/'):
# 文本文件直接在浏览器中显示
return PlainTextResponse(content=content, media_type=mime_type)
else:
# 其他文件类型创建HTML页面显示
html_content = f"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>文件查看器 - {file_path_obj.name}</title>
<style>
body {{
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background-color: white;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
overflow: hidden;
}}
.header {{
background-color: #2c3e50;
color: white;
padding: 15px 20px;
display: flex;
justify-content: space-between;
align-items: center;
}}
.header h1 {{
margin: 0;
font-size: 1.5em;
}}
.file-info {{
font-size: 0.9em;
opacity: 0.8;
}}
.content {{
padding: 20px;
}}
.download-btn {{
background-color: #3498db;
color: white;
border: none;
padding: 8px 16px;
border-radius: 4px;
cursor: pointer;
text-decoration: none;
display: inline-block;
margin-left: 10px;
}}
.download-btn:hover {{
background-color: #2980b9;
}}
pre {{
background-color: #f8f9fa;
border: 1px solid #e9ecef;
border-radius: 4px;
padding: 15px;
overflow-x: auto;
white-space: pre-wrap;
word-wrap: break-word;
font-family: 'Courier New', monospace;
font-size: 14px;
line-height: 1.5;
}}
.binary-notice {{
background-color: #fff3cd;
border: 1px solid #ffeaa7;
border-radius: 4px;
padding: 15px;
margin-bottom: 20px;
color: #856404;
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<div>
<h1>{file_path_obj.name}</h1>
<div class="file-info">
路径: {decoded_path}<br>
大小: {file_path_obj.stat().st_size:,} 字节
</div>
</div>
<a href="/file/{file_path}?download=true" class="download-btn">下载文件</a>
</div>
<div class="content">
"""
# 检查是否为二进制文件
try:
# 尝试读取前1024字节来检测是否为二进制文件
with open(file_path_obj, 'rb') as f:
sample = f.read(1024)
# 检查是否包含null字节,这是二进制文件的特征
if b'\x00' in sample:
html_content += f"""
<div class="binary-notice">
<strong>注意</strong>这是一个二进制文件无法在浏览器中直接显示内容
</div>
"""
else:
# 尝试以文本形式显示
try:
text_content = sample.decode('utf-8')
html_content += f"""
<pre>{text_content}</pre>
"""
except UnicodeDecodeError:
html_content += f"""
<div class="binary-notice">
<strong>注意</strong>此文件包含非文本内容无法在浏览器中直接显示
</div>
"""
except Exception:
html_content += f"""
<div class="binary-notice">
<strong>注意</strong>无法读取文件内容
</div>
"""
html_content += """
</div>
</div>
</body>
</html>
"""
return HTMLResponse(content=html_content)
except HTTPException: except HTTPException:
# 重新抛出HTTP异常 # 重新抛出HTTP异常

Loading…
Cancel
Save