first commit

2026-02-12 16:24:41 +08:00
commit 1b4f81a9bc
6 changed files with 674 additions and 0 deletions
--- a/24
+++ b/24
@@ -0,0 +1,24 @@
+FROM python:3.11-slim
+
+# 避免交互与时区问题
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+# 安装系统依赖（Pandoc）
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends pandoc \
+    && rm -rf /var/lib/apt/lists/*
+
+# 安装 Python 依赖
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 拷贝代码和相关资源（包括 ref.docx、color.lua 等）
+COPY . .
+
+# 默认启动 MCP 服务器
+CMD ["python", "mcp_docx_server.py"]
+
+
--- a/README.md
+++ b/README.md
@@ -0,0 +1,107 @@
+## DOCX 转换工具 MCP 服务器
+
+这是一个基于 MCP (Model Context Protocol) 的服务器，目前**只提供 HTML → DOCX** 的转换能力，底层通过 Pandoc 实现高质量排版。
+
+### 功能
+
+- **html_to_docx_pandoc**：将包含 HTML 标签的文本转换为 DOCX 文件，支持引用模板、Lua 过滤器等高级格式控制。
+
+### 安装依赖（本机运行）
+
+```bash
+pip install -r requirements.txt
+```
+
+请确保系统已安装 Pandoc（`pandoc --version` 可正常执行）。
+
+### 使用方法
+
+#### 方式一：本机直接运行
+
+运行服务器：
+
+```bash
+python mcp_docx_server.py
+```
+
+在 MCP 客户端中连接该服务器后，会看到一个名为 `html_to_docx_pandoc` 的工具。
+
+#### 方式二：使用 Docker 封装运行
+
+本项目已提供 `Dockerfile`，可以直接构建镜像并运行：
+
+```bash
+# 构建镜像
+docker build -t mcp-docx-server .
+
+# 运行容器（前台运行）
+docker run --rm -it mcp-docx-server
+```
+
+如果你希望在容器外部自定义 `ref.docx`、`color.lua` 或输出目录，可以通过挂载卷的方式：
+
+```bash
+docker run --rm -it ^
+  -v %cd%/ref.docx:/app/ref.docx ^
+  -v %cd%/color.lua:/app/color.lua ^
+  -v %cd%/output:/app/output ^
+  mcp-docx-server
+```
+
+在类 Unix 系统（如 macOS / Linux）中可改为：
+
+```bash
+docker run --rm -it \
+  -v "$(pwd)/ref.docx:/app/ref.docx" \
+  -v "$(pwd)/color.lua:/app/color.lua" \
+  -v "$(pwd)/output:/app/output" \
+  mcp-docx-server
+```
+
+#### 方式三：使用 docker-compose 运行
+
+已提供 `docker-compose.yml`，可以一条命令完成构建与运行：
+
+```bash
+# 构建并启动（前台）
+docker-compose up --build
+```
+
+默认会：
+
+- **构建镜像**：使用当前目录下的 `Dockerfile`
+- **挂载当前目录到容器 `/app`**：方便直接访问 `ref.docx`、`color.lua` 和输出文件
+- **在容器内执行**：`python mcp_docx_server.py`
+
+如需在后台运行，可使用：
+
+```bash
+docker-compose up -d --build
+```
+
+### 工具说明：html_to_docx_pandoc
+
+**作用：** 使用 Pandoc 将 HTML 文本转换为 DOCX 文件，尽可能保留原始样式，并支持：
+- 使用 `ref.docx` 作为参考模板（如果文件存在）
+- 使用 `color.lua` 作为 Lua 过滤器（如果文件存在）
+- 独立 HTML 模式、图片提取、自定义 CSS 等选项
+
+**参数：**
+- `html_text`（必需）：需要转换的 HTML 文本内容
+- `output_path`（必需）：输出 DOCX 文件的完整路径
+- `standalone`（可选，默认 `true`）：是否以独立 HTML 模式调用 Pandoc
+- `extract_media`（可选）：图片提取目录（如 `./media`），不需要提取可不传
+- `css_file`（可选）：CSS 样式文件路径
+
+### 依赖项
+
+- `mcp`: MCP Python SDK
+- `python-docx`、`lxml`：内部保留的 DOCX/HTML 处理能力（当前未通过 MCP 暴露）
+- **外部工具**：Pandoc（必须预先在系统中安装）
+
+### 注意事项
+
+- 必须安装 MCP SDK 才能运行服务器
+- 确保有足够的权限读取输入文件和写入输出文件
+- 大文件转换可能需要较长时间
+
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,18 @@
+version: "3.9"
+
+services:
+  mcp-docx-server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: mcp-docx-server
+    working_dir: /app
+    # MCP 通常通过标准输入/输出与客户端通信，因此不需要暴露端口
+    stdin_open: true
+    tty: true
+    volumes:
+      # 可选：将当前目录挂载到容器内，便于共享 ref.docx、color.lua 和输出文件
+      - ./:/app
+    command: ["python", "mcp_docx_server.py"]
+
+
--- a/mcp_docx.py
+++ b/mcp_docx.py
@@ -0,0 +1,384 @@
+#!/usr/bin/env python3
+"""
+docx_editor.py — 保留原格式替换文本 + 修改字体颜色 + 替换图片
+
+用法:
+  # 列出文档中所有图片
+  python3 docx_editor.py input.docx --list-images
+
+  # 文本替换 + 颜色
+  python3 docx_editor.py input.docx output.docx \
+    --replace "原文" "新文" \
+    --color "关键词" "FF0000"
+
+  # 图片替换（按文档中出现的顺序，从1开始）
+  python3 docx_editor.py input.docx output.docx \
+    --image 1 new_chart.png \
+    --image 2 new_photo.jpg
+
+  # 同时替换文字和图片
+  python3 docx_editor.py input.docx output.docx \
+    --replace "旧标题" "新标题" \
+    --image 1 new_image.png \
+    --color "重点" "FF0000"
+"""
+
+import argparse
+import os
+import tempfile
+import zipfile
+from lxml import etree
+from PIL import Image
+
+W   = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
+WD  = 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'
+A   = 'http://schemas.openxmlformats.org/drawingml/2006/main'
+R   = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
+REL_TYPE_IMAGE = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
+
+EXT_TO_MIME = {
+    'png': 'image/png', 'jpg': 'image/jpeg', 'jpeg': 'image/jpeg',
+    'gif': 'image/gif', 'bmp': 'image/bmp', 'tiff': 'image/tiff',
+    'webp': 'image/webp',
+}
+
+
+def unpack(docx_path, out_dir):
+    """使用 zipfile 直接解包 .docx 到临时目录，替代外部 unpack.py 脚本。"""
+    with zipfile.ZipFile(docx_path, 'r') as zf:
+        zf.extractall(out_dir)
+
+
+def pack(unpacked_dir, output_docx, original_docx):
+    """
+    使用 zipfile 将修改后的目录重新打包为 .docx。
+
+    original_docx 参数目前保留只是为了兼容原函数签名，没有实际使用。
+    """
+    # 确保输出目录存在
+    out_dir = os.path.dirname(os.path.abspath(output_docx))
+    if out_dir and not os.path.exists(out_dir):
+        os.makedirs(out_dir, exist_ok=True)
+
+    # 将解包目录中的所有文件打成 ZIP（保持相对路径结构）
+    with zipfile.ZipFile(output_docx, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
+        for root, _, files in os.walk(unpacked_dir):
+            for fname in files:
+                abs_path = os.path.join(root, fname)
+                # docx 内部使用 / 作为路径分隔符
+                arcname = os.path.relpath(abs_path, unpacked_dir).replace(os.sep, '/')
+                zf.write(abs_path, arcname)
+
+
+def build_image_index(unpacked_dir):
+    """返回按文档顺序排列的图片列表"""
+    word_dir  = os.path.join(unpacked_dir, 'word')
+    doc_xml   = os.path.join(word_dir, 'document.xml')
+    rels_xml  = os.path.join(word_dir, '_rels', 'document.xml.rels')
+
+    rels_root = etree.parse(rels_xml).getroot()
+    rid_to_media = {}
+    for rel in rels_root:
+        if rel.get('Type', '') == REL_TYPE_IMAGE:
+            rid_to_media[rel.get('Id')] = rel.get('Target')
+
+    doc_root = etree.parse(doc_xml).getroot()
+    results = []
+    for blip in doc_root.iter(f'{{{A}}}blip'):
+        rid = blip.get(f'{{{R}}}embed')
+        if not rid or rid not in rid_to_media:
+            continue
+        media_rel = rid_to_media[rid]
+        media_abs = os.path.join(word_dir, media_rel.replace('/', os.sep))
+        ext       = os.path.splitext(media_rel)[1].lstrip('.').lower()
+
+        inline = blip
+        while inline is not None and inline.tag not in (f'{{{WD}}}inline', f'{{{WD}}}anchor'):
+            inline = inline.getparent()
+        w_cm = h_cm = None
+        docpr_name = ''
+        if inline is not None:
+            ext_el = inline.find(f'{{{WD}}}extent')
+            if ext_el is not None:
+                w_cm = round(int(ext_el.get('cx', 0)) / 360000, 2)
+                h_cm = round(int(ext_el.get('cy', 0)) / 360000, 2)
+            dp = inline.find(f'{{{WD}}}docPr')
+            if dp is not None:
+                docpr_name = dp.get('name', '')
+
+        results.append({
+            'index': len(results) + 1, 'rid': rid,
+            'media_file': media_rel, 'abs_path': media_abs,
+            'ext': ext, 'docpr_name': docpr_name,
+            'width_cm': w_cm, 'height_cm': h_cm,
+        })
+    return results
+
+
+def list_images(docx_path):
+    imgs = get_images_info(docx_path)
+    if not imgs:
+        print("文档中没有找到图片。")
+        return
+    print(f"共找到 {len(imgs)} 张图片：\n")
+    print(f"  {'#':<4} {'文件名':<20} {'尺寸':<18} Word内部名称")
+    print("  " + "-" * 62)
+    for img in imgs:
+        size = f"{img['width_cm']}×{img['height_cm']}cm" if img['width_cm'] else "未知"
+        print(f"  {img['index']:<4} {os.path.basename(img['media_file']):<20} {size:<18} {img['docpr_name']}")
+
+
+def get_images_info(docx_path):
+    """
+    返回给定 DOCX 文件中所有图片的结构化信息列表。
+
+    该函数专门为其他模块（例如 MCP 服务器）复用而设计，
+    行为等价于原来的 list_images 内部逻辑，但不做任何打印。
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        unpack(docx_path, tmpdir)
+        return build_image_index(tmpdir)
+
+
+def replace_image(unpacked_dir, index, new_image_path):
+    """替换第 index 张图片（1-based）"""
+    imgs = build_image_index(unpacked_dir)
+    if index < 1 or index > len(imgs):
+        raise ValueError(f"图片序号 {index} 超出范围（共 {len(imgs)} 张）")
+
+    info     = imgs[index - 1]
+    old_abs  = info['abs_path']
+    old_ext  = info['ext']
+    new_ext  = os.path.splitext(new_image_path)[1].lstrip('.').lower()
+    if new_ext == 'jpg':
+        new_ext = 'jpeg'
+
+    print(f"    图片#{index} {os.path.basename(info['media_file'])}({old_ext.upper()})"
+          f" ← {os.path.basename(new_image_path)}({new_ext.upper()})")
+
+    if old_ext == new_ext:
+        # ── 同格式：直接覆盖 ──────────────────────────────
+        import shutil
+        shutil.copy2(new_image_path, old_abs)
+
+    else:
+        # ── 不同格式：Pillow 转换 + 更新 rels + ContentTypes
+        new_abs = os.path.splitext(old_abs)[0] + '.' + new_ext
+        img = Image.open(new_image_path)
+        fmt = {'jpeg': 'JPEG', 'png': 'PNG', 'gif': 'GIF',
+               'bmp': 'BMP', 'tiff': 'TIFF', 'webp': 'WEBP'}.get(new_ext, new_ext.upper())
+        if fmt == 'JPEG' and img.mode in ('RGBA', 'P'):
+            img = img.convert('RGB')
+        img.save(new_abs, format=fmt)
+        if os.path.abspath(new_abs) != os.path.abspath(old_abs):
+            os.remove(old_abs)
+
+        # 更新 rels
+        old_media = info['media_file']
+        new_media = os.path.splitext(old_media)[0] + '.' + new_ext
+        word_dir  = os.path.join(unpacked_dir, 'word')
+        rels_path = os.path.join(word_dir, '_rels', 'document.xml.rels')
+        rels_tree = etree.parse(rels_path)
+        for rel in rels_tree.getroot():
+            if rel.get('Id') == info['rid']:
+                rel.set('Target', new_media)
+                break
+        rels_tree.write(rels_path, xml_declaration=True, encoding='UTF-8', standalone=True)
+
+        # 更新 ContentTypes
+        ct_path = os.path.join(unpacked_dir, '[Content_Types].xml')
+        ct_tree = etree.parse(ct_path)
+        ct_root = ct_tree.getroot()
+        existing = {el.get('Extension', '') for el in ct_root}
+        if new_ext not in existing:
+            etree.SubElement(ct_root, 'Default', Extension=new_ext,
+                             ContentType=EXT_TO_MIME.get(new_ext, f'image/{new_ext}'))
+        ct_tree.write(ct_path, xml_declaration=True, encoding='UTF-8', standalone=True)
+        print(f"      格式转换 {old_ext}→{new_ext}，rels 和 ContentTypes 已更新")
+
+
+def paragraph_replace(para_el, replacements):
+    """在 <w:t> 层面替换文本，完全不碰图片和格式"""
+    for t_el in para_el.iter(f'{{{W}}}t'):
+        if not t_el.text:
+            continue
+        new_text = t_el.text
+        for old, new in replacements:
+            new_text = new_text.replace(old, new)
+        if new_text != t_el.text:
+            t_el.text = new_text
+            if new_text and (new_text[0] == ' ' or new_text[-1] == ' '):
+                t_el.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+
+
+def ensure_rpr(run_el):
+    rpr = run_el.find(f'{{{W}}}rPr')
+    if rpr is None:
+        rpr = etree.Element(f'{{{W}}}rPr')
+        run_el.insert(0, rpr)
+    return rpr
+
+def set_color_on_rpr(rpr_el, hex_color):
+    c = rpr_el.find(f'{{{W}}}color')
+    if c is None:
+        c = etree.SubElement(rpr_el, f'{{{W}}}color')
+    c.set(f'{{{W}}}val', hex_color.lstrip('#'))
+
+def apply_color_to_keyword(doc_el, keyword, hex_color):
+    """
+    只给匹配到的关键字本身着色，而不是整个 run。
+
+    做法：在有关键字的 run 上，把文本拆成多段 run：
+      [前缀][关键字][后缀]，只有“关键字”这个 run 设置颜色。
+    """
+    # 先 list 一下，避免在遍历时修改树结构导致问题
+    runs = list(doc_el.iter(f'{{{W}}}r'))
+    for run in runs:
+        t_nodes = list(run.findall(f'{{{W}}}t'))
+        if not t_nodes:
+            continue
+        full_text = ''.join(t.text or '' for t in t_nodes)
+        if keyword not in full_text:
+            continue
+
+        parent = run.getparent()
+        if parent is None:
+            continue
+        insert_pos = parent.index(run)
+
+        # 原 run 的 rPr 复制给新 run
+        orig_rpr = run.find(f'{{{W}}}rPr')
+        if orig_rpr is not None:
+            rpr_bytes = etree.tostring(orig_rpr)
+        else:
+            rpr_bytes = None
+
+        def make_run(text, colored):
+            new_r = etree.Element(f'{{{W}}}r')
+            if rpr_bytes is not None:
+                new_r.append(etree.fromstring(rpr_bytes))
+            t_el = etree.SubElement(new_r, f'{{{W}}}t')
+            t_el.text = text
+            if text and (text[0] == ' ' or text[-1] == ' '):
+                t_el.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+            if colored:
+                set_color_on_rpr(ensure_rpr(new_r), hex_color)
+            return new_r
+
+        segments = []
+        s = full_text
+        start = 0
+        klen = len(keyword)
+        while True:
+            idx = s.find(keyword, start)
+            if idx == -1:
+                if start < len(s):
+                    segments.append((s[start:], False))
+                break
+            if idx > start:
+                segments.append((s[start:idx], False))
+            segments.append((keyword, True))
+            start = idx + klen
+
+        # 用新 run 替换原 run
+        parent.remove(run)
+        for offset, (seg_text, colored) in enumerate(segments):
+            if seg_text:
+                parent.insert(insert_pos + offset, make_run(seg_text, colored))
+
+def process(input_docx, output_docx, replacements, image_replacements,
+            color_keywords):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        print(f"📂 解包 {input_docx} ...")
+        unpack(input_docx, tmpdir)
+
+        doc_xml_path = os.path.join(tmpdir, 'word', 'document.xml')
+
+        if image_replacements:
+            print(f"🖼️  替换 {len(image_replacements)} 张图片...")
+            for idx, new_img in image_replacements:
+                replace_image(tmpdir, idx, new_img)
+
+        tree = etree.parse(doc_xml_path)
+        root = tree.getroot()
+
+        if replacements:
+            print(f"✏️  替换 {len(replacements)} 条文本...")
+            for para in root.iter(f'{{{W}}}p'):
+                paragraph_replace(para, replacements)
+
+        # 根据 span 解析出的关键字上色
+        for keyword, color in color_keywords:
+            print(f"🎨 关键词「{keyword}」→ #{color}")
+            apply_color_to_keyword(root, keyword, color)
+
+        tree.write(doc_xml_path, xml_declaration=True, encoding='UTF-8', standalone=True)
+        print(f"📦 打包 → {output_docx} ...")
+        pack(tmpdir, output_docx, input_docx)
+        print(f"✅ 完成！输出: {output_docx}")
+
+
+def _parse_span_replacement(new_text):
+    """
+    解析 NEW 文本中的 span 标签，用于决定颜色。
+
+    约定格式（不区分大小写）：
+      <span color="FF0000">待补充</span>
+      <span color="#FF0000">待补充</span>
+
+    返回: (纯文本, [(keyword, hex_color), ...])
+    """
+    import re
+
+    span_pattern = re.compile(
+        r'<span\s+[^>]*?color=["\']?(#?[0-9a-fA-F]{6})["\']?[^>]*>(.*?)</span>',
+        re.IGNORECASE | re.DOTALL,
+    )
+
+    color_keywords = []
+
+    def _repl(m):
+        hex_color = m.group(1).lstrip('#')
+        keyword = m.group(2)
+        color_keywords.append((keyword, hex_color))
+        return keyword
+
+    plain_text = span_pattern.sub(_repl, new_text)
+    return plain_text, color_keywords
+
+
+def main():
+    parser = argparse.ArgumentParser(description='DOCX 格式保留：替换文本/图片/颜色')
+    parser.add_argument('input', help='输入 .docx')
+    parser.add_argument('output', nargs='?', help='输出 .docx')
+    parser.add_argument('--list-images', action='store_true', help='列出所有图片')
+    parser.add_argument('--replace', nargs=2, metavar=('OLD', 'NEW'),
+                        action='append', default=[])
+    parser.add_argument('--image', nargs=2, metavar=('INDEX', 'FILE'),
+                        action='append', default=[], help='图片替换')
+    args = parser.parse_args()
+
+    if args.list_images:
+        list_images(args.input)
+        return
+    if not args.output:
+        parser.error("需要指定输出文件")
+
+    # 处理 span 颜色：把 NEW 中的 <span color="...">文字</span> 抽出来
+    replacements = []
+    color_keywords = []
+    for old, new_raw in args.replace:
+        new_plain, spans = _parse_span_replacement(new_raw)
+        replacements.append((old, new_plain))
+        color_keywords.extend(spans)
+
+    process(
+        input_docx        = args.input,
+        output_docx       = args.output,
+        replacements      = replacements,
+        image_replacements= [(int(i), f) for i, f in args.image],
+        color_keywords    = color_keywords,
+    )
+
+if __name__ == '__main__':
+    main()
--- a/mcp_docx_server.py
+++ b/mcp_docx_server.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""
+基于 mcp_docx.py 封装的 MCP 服务器。
+
+暴露两个主要工具：
+- list_docx_images：列出 DOCX 中的图片信息
+- edit_docx:       进行文本替换 / 关键字上色 / 图片替换
+
+注意：底层仍然完全复用 mcp_docx.py 中的逻辑，只是通过 MCP SDK 对外提供。
+"""
+
+import os
+from typing import Any, Dict, List, Optional
+
+from mcp.server.fastmcp import FastMCPServer
+
+from mcp_docx import get_images_info, process, _parse_span_replacement
+
+
+server = FastMCPServer(
+    "docx-editor",
+    version="0.1.0",
+    description="DOCX 文本和图片编辑工具（基于 mcp_docx.py 封装）",
+)
+
+
+@server.tool()
+async def list_docx_images(docx_path: str) -> List[Dict[str, Any]]:
+    """
+    列出指定 DOCX 文件中的所有图片信息。
+
+    参数:
+      - docx_path: DOCX 文件的路径（相对或绝对）
+
+    返回:
+      - 图片信息列表，每一项包含:
+        - index: 图片在文档中的顺序（从 1 开始）
+        - media_file: DOCX 内部的资源路径
+        - abs_path: 解包后的绝对路径（仅用于调试）
+        - ext: 图片扩展名
+        - docpr_name: Word 内部的图片名称
+        - width_cm / height_cm: 近似尺寸（厘米），可能为 None
+    """
+    if not os.path.exists(docx_path):
+        raise FileNotFoundError(f"DOCX 文件不存在: {docx_path}")
+
+    imgs = get_images_info(docx_path)
+    # 为了避免泄露容器内部路径，可选择屏蔽 abs_path 字段
+    for img in imgs:
+        img.pop("abs_path", None)
+    return imgs
+
+
+@server.tool()
+async def edit_docx(
+    input_docx: str,
+    output_docx: str,
+    replacements: Optional[List[Dict[str, str]]] = None,
+    image_replacements: Optional[List[Dict[str, Any]]] = None,
+) -> Dict[str, Any]:
+    """
+    使用原始 mcp_docx 逻辑对 DOCX 文件进行编辑。
+
+    支持：
+      - 纯文本替换
+      - 通过 <span color=\"FF0000\">关键字</span> 语法设置关键字颜色
+      - 替换指定序号的图片
+
+    参数:
+      - input_docx: 输入 DOCX 文件路径
+      - output_docx: 输出 DOCX 文件路径
+      - replacements: 文本替换规则列表，例如:
+            [
+              {\"old\": \"旧标题\", \"new\": \"<span color='#FF0000'>新标题</span>\"},
+              {\"old\": \"原文\", \"new\": \"新文\"}
+            ]
+      - image_replacements: 图片替换规则列表，例如:
+            [
+              {\"index\": 1, \"file\": \"new_chart.png\"},
+              {\"index\": 2, \"file\": \"new_photo.jpg\"}
+            ]
+
+    返回:
+      - {\"output_path\": 生成的 DOCX 绝对路径}
+    """
+    if not os.path.exists(input_docx):
+        raise FileNotFoundError(f"输入 DOCX 文件不存在: {input_docx}")
+
+    if replacements is None:
+        replacements = []
+    if image_replacements is None:
+        image_replacements = []
+
+    # 解析文本替换与颜色关键字（复用 CLI 逻辑）
+    rep_pairs = []
+    color_keywords = []
+    for item in replacements:
+        old = item.get("old")
+        new_raw = item.get("new")
+        if not old:
+            continue
+        if new_raw is None:
+            new_raw = ""
+        new_plain, spans = _parse_span_replacement(new_raw)
+        rep_pairs.append((old, new_plain))
+        color_keywords.extend(spans)
+
+    # 处理图片替换参数
+    img_pairs = []
+    for item in image_replacements:
+        try:
+            idx = int(item.get("index"))
+        except (TypeError, ValueError):
+            continue
+        path = item.get("file")
+        if not path:
+            continue
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"图片文件不存在: {path}")
+        img_pairs.append((idx, path))
+
+    # 复用原始处理函数
+    process(
+        input_docx=input_docx,
+        output_docx=output_docx,
+        replacements=rep_pairs,
+        image_replacements=img_pairs,
+        color_keywords=color_keywords,
+    )
+
+    return {"output_path": os.path.abspath(output_docx)}
+
+
+if __name__ == "__main__":
+    # 通过 stdio 运行 MCP 服务器
+    server.run()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+mcp>=1.0.0
+python-docx>=1.1.0
+lxml>=5.0.0
+Pillow>=10.0.0
+