From 4b98e94942e1a2e76204ac9468faeaf77c2b2f70 Mon Sep 17 00:00:00 2001
From: liangweihao <734499798@qq.com>
Date: Thu, 12 Feb 2026 17:41:02 +0800
Subject: [PATCH] =?UTF-8?q?=E8=AF=BB=E5=8F=96=E6=96=87=E4=BB=B6url?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md          |  34 +++++++-
 mcp_docx_server.py | 211 ++++++++++++++++++++++++++++++++++-----------
 requirements.txt   |   1 +
 3 files changed, 191 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index 6eda8bd..1cc7ecf 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,18 @@
-## DOCX 转换工具 MCP 服务器
+## DOCX 转换 / 编辑 MCP 服务器
 
-这是一个基于 MCP (Model Context Protocol) 的服务器，目前**只提供 HTML → DOCX** 的转换能力，底层通过 Pandoc 实现高质量排版。
+这是一个基于 MCP (Model Context Protocol) 的服务器，目前提供两类能力：
+
+- **DOCX 编辑工具**：基于 `mcp_docx.py`，支持列出图片、文本替换、关键字上色、图片替换（可直接使用 URL 作为输入）。
+- **HTML → DOCX 转换工具**：基于 Pandoc，实现高质量排版。
 
 ### 功能
 
-- **html_to_docx_pandoc**：将包含 HTML 标签的文本转换为 DOCX 文件，支持引用模板、Lua 过滤器等高级格式控制。
+- `list_docx_images`：列出 DOCX 中的图片信息，支持 `docx_path` 为本地路径或 HTTP/HTTPS URL。
+- `edit_docx`：对 DOCX 进行编辑，支持：
+  - `input_docx` 为本地路径或 HTTP/HTTPS URL；
+  - `image_replacements[*].file` 为本地路径或 HTTP/HTTPS URL；
+  - 返回结果中包含 `output_path` 和可选的 `output_url`（见下文）。
+- `html_to_docx_pandoc`：将包含 HTML 标签的文本转换为 DOCX 文件，支持引用模板、Lua 过滤器等高级格式控制。
 
 ### 安装依赖（本机运行）
 
@@ -24,7 +32,25 @@ pip install -r requirements.txt
 python mcp_docx_server.py
 ```
 
-在 MCP 客户端中连接该服务器后，会看到一个名为 `html_to_docx_pandoc` 的工具。
+在 MCP 客户端中连接该服务器后，会看到上述三个工具。
+
+#### 输出 URL（output_url）
+
+如果你希望 `edit_docx` 返回一个可直接访问的 URL，需要：
+
+- 在运行服务器前设置环境变量 `MCP_OUTPUT_BASE_URL`，例如：
+
+```bash
+set MCP_OUTPUT_BASE_URL=http://localhost:8000/files/
+```
+
+或在类 Unix 系统中：
+
+```bash
+export MCP_OUTPUT_BASE_URL="http://localhost:8000/files/"
+```
+
+然后确保你的 HTTP 服务器能在该前缀下提供生成的 DOCX 文件（默认逻辑是：`output_url = MCP_OUTPUT_BASE_URL + 文件名`）。
 
 #### 方式二：使用 Docker 封装运行
 
diff --git a/mcp_docx_server.py b/mcp_docx_server.py
index 48da0f7..c146f65 100644
--- a/mcp_docx_server.py
+++ b/mcp_docx_server.py
@@ -26,8 +26,11 @@
 
 import argparse
 import os
+import tempfile
+import urllib.parse
 from typing import Any, Dict, List, Optional
 
+import requests
 from mcp.server.fastmcp import FastMCP
 from mcp.server.transport_security import TransportSecuritySettings
 
@@ -58,13 +61,60 @@ mcp = FastMCP(
 )
 
 
+def _is_url(path: str) -> bool:
+    """简单判断一个字符串是否为 HTTP/HTTPS URL。"""
+    return path.startswith("http://") or path.startswith("https://")
+
+
+def _download_to_temp(url: str, suffix: str = ".tmp") -> str:
+    """
+    将远程 URL 下载到临时文件，返回本地临时路径。
+
+    调用方负责在使用完毕后删除该文件。
+    """
+    resp = requests.get(url, stream=True, timeout=30)
+    resp.raise_for_status()
+
+    fd, tmp_path = tempfile.mkstemp(suffix=suffix)
+    try:
+        with os.fdopen(fd, "wb") as f:
+            for chunk in resp.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+    except Exception:
+        # 出错时清理临时文件
+        try:
+            os.remove(tmp_path)
+        except OSError:
+            pass
+        raise
+
+    return tmp_path
+
+
+def _build_output_url(abs_output_path: str) -> Optional[str]:
+    """
+    根据环境变量 MCP_OUTPUT_BASE_URL 构造输出文件的 URL。
+
+    约定：
+      - MCP_OUTPUT_BASE_URL 形如: http://host:port/files/
+      - 最终 URL = MCP_OUTPUT_BASE_URL.rstrip('/') + '/' + 文件名
+    """
+    base = os.getenv("MCP_OUTPUT_BASE_URL")
+    if not base:
+        return None
+
+    filename = os.path.basename(abs_output_path)
+    return base.rstrip("/") + "/" + filename
+
+
 @mcp.tool()
 async def list_docx_images(docx_path: str) -> List[Dict[str, Any]]:
     """
     列出指定 DOCX 文件中的所有图片信息。
 
     参数:
-      - docx_path: DOCX 文件的路径（相对或绝对）
+      - docx_path: DOCX 文件的路径（相对或绝对），也可以是 HTTP/HTTPS URL。
 
     返回:
       - 图片信息列表，每一项包含:
@@ -74,14 +124,29 @@ async def list_docx_images(docx_path: str) -> List[Dict[str, Any]]:
         - docpr_name: Word 内部的图片名称
         - width_cm / height_cm: 近似尺寸（厘米），可能为 None
     """
-    if not os.path.exists(docx_path):
-        raise FileNotFoundError(f"DOCX 文件不存在: {docx_path}")
+    tmp_file: Optional[str] = None
+    try:
+        local_path = docx_path
+        if _is_url(docx_path):
+            parsed = urllib.parse.urlparse(docx_path)
+            ext = os.path.splitext(parsed.path)[1] or ".docx"
+            tmp_file = _download_to_temp(docx_path, suffix=ext)
+            local_path = tmp_file
 
-    imgs = get_images_info(docx_path)
-    # 为了避免泄露容器内部路径，屏蔽 abs_path 字段
-    for img in imgs:
-        img.pop("abs_path", None)
-    return imgs
+        if not os.path.exists(local_path):
+            raise FileNotFoundError(f"DOCX 文件不存在: {docx_path}")
+
+        imgs = get_images_info(local_path)
+        # 为了避免泄露容器内部路径，屏蔽 abs_path 字段
+        for img in imgs:
+            img.pop("abs_path", None)
+        return imgs
+    finally:
+        if tmp_file and os.path.exists(tmp_file):
+            try:
+                os.remove(tmp_file)
+            except OSError:
+                pass
 
 
 @mcp.tool()
@@ -100,7 +165,7 @@ async def edit_docx(
       - 替换指定序号的图片
 
     参数:
-      - input_docx: 输入 DOCX 文件路径
+      - input_docx: 输入 DOCX 文件路径，或 HTTP/HTTPS URL
       - output_docx: 输出 DOCX 文件路径
       - replacements: 文本替换规则列表，例如:
             [
@@ -112,56 +177,100 @@ async def edit_docx(
               {\"index\": 1, \"file\": \"new_chart.png\"},
               {\"index\": 2, \"file\": \"new_photo.jpg\"}
             ]
+            其中 file 字段同样可以是本地路径或 HTTP/HTTPS URL。
 
     返回:
-      - {\"output_path\": 生成的 DOCX 绝对路径}
+      - {
+          \"output_path\": 生成的 DOCX 绝对路径,
+          \"output_url\":  如果配置了 MCP_OUTPUT_BASE_URL，则为可访问该文件的 URL，否则为 null
+        }
     """
-    if not os.path.exists(input_docx):
-        raise FileNotFoundError(f"输入 DOCX 文件不存在: {input_docx}")
+    tmp_input: Optional[str] = None
+    tmp_images: List[str] = []
 
-    if replacements is None:
-        replacements = []
-    if image_replacements is None:
-        image_replacements = []
+    try:
+        local_input = input_docx
+        if _is_url(input_docx):
+            parsed = urllib.parse.urlparse(input_docx)
+            ext = os.path.splitext(parsed.path)[1] or ".docx"
+            tmp_input = _download_to_temp(input_docx, suffix=ext)
+            local_input = tmp_input
 
-    # 解析文本替换与颜色关键字（复用 CLI 逻辑）
-    rep_pairs = []
-    color_keywords = []
-    for item in replacements:
-        old = item.get("old")
-        new_raw = item.get("new")
-        if not old:
-            continue
-        if new_raw is None:
-            new_raw = ""
-        new_plain, spans = _parse_span_replacement(new_raw)
-        rep_pairs.append((old, new_plain))
-        color_keywords.extend(spans)
+        if not os.path.exists(local_input):
+            raise FileNotFoundError(f"输入 DOCX 文件不存在: {input_docx}")
 
-    # 处理图片替换参数
-    img_pairs = []
-    for item in image_replacements:
-        try:
-            idx = int(item.get("index"))
-        except (TypeError, ValueError):
-            continue
-        path = item.get("file")
-        if not path:
-            continue
-        if not os.path.exists(path):
-            raise FileNotFoundError(f"图片文件不存在: {path}")
-        img_pairs.append((idx, path))
+        if replacements is None:
+            replacements = []
+        if image_replacements is None:
+            image_replacements = []
 
-    # 复用原始处理函数
-    process(
-        input_docx=input_docx,
-        output_docx=output_docx,
-        replacements=rep_pairs,
-        image_replacements=img_pairs,
-        color_keywords=color_keywords,
-    )
+        # 解析文本替换与颜色关键字（复用 CLI 逻辑）
+        rep_pairs = []
+        color_keywords = []
+        for item in replacements:
+            old = item.get("old")
+            new_raw = item.get("new")
+            if not old:
+                continue
+            if new_raw is None:
+                new_raw = ""
+            new_plain, spans = _parse_span_replacement(new_raw)
+            rep_pairs.append((old, new_plain))
+            color_keywords.extend(spans)
 
-    return {"output_path": os.path.abspath(output_docx)}
+        # 处理图片替换参数（支持本地路径或 URL）
+        img_pairs = []
+        for item in image_replacements:
+            try:
+                idx = int(item.get("index"))
+            except (TypeError, ValueError):
+                continue
+
+            path = item.get("file")
+            if not path:
+                continue
+
+            local_img = path
+            if _is_url(path):
+                parsed = urllib.parse.urlparse(path)
+                ext = os.path.splitext(parsed.path)[1] or ""
+                suffix = ext if ext else ".img"
+                tmp_img = _download_to_temp(path, suffix=suffix)
+                tmp_images.append(tmp_img)
+                local_img = tmp_img
+
+            if not os.path.exists(local_img):
+                raise FileNotFoundError(f"图片文件不存在: {path}")
+
+            img_pairs.append((idx, local_img))
+
+        # 复用原始处理函数
+        process(
+            input_docx=local_input,
+            output_docx=output_docx,
+            replacements=rep_pairs,
+            image_replacements=img_pairs,
+            color_keywords=color_keywords,
+        )
+
+        abs_out = os.path.abspath(output_docx)
+        return {
+            "output_path": abs_out,
+            "output_url": _build_output_url(abs_out),
+        }
+    finally:
+        if tmp_input and os.path.exists(tmp_input):
+            try:
+                os.remove(tmp_input)
+            except OSError:
+                pass
+
+        for p in tmp_images:
+            if os.path.exists(p):
+                try:
+                    os.remove(p)
+                except OSError:
+                    pass
 
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 29217ca..615933e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ mcp>=1.0.0
 python-docx>=1.1.0
 lxml>=5.0.0
 Pillow>=10.0.0
+requests>=2.0.0