From 248f7a263727c53a98675c56aa01509f1b86acf3 Mon Sep 17 00:00:00 2001 From: liangweihao <734499798@qq.com> Date: Fri, 20 Mar 2026 19:19:26 +0800 Subject: [PATCH] update --- README.md | 32 +++++ docker-compose.yml | 6 +- mcp_docx.py | 164 ++++++++++++++++++++++-- mcp_docx_server.py | 308 +++++++++++++++++++++++++++++++++++++++------ 4 files changed, 457 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 1cc7ecf..3c5a80c 100644 --- a/README.md +++ b/README.md @@ -131,3 +131,35 @@ docker-compose up -d --build - 确保有足够的权限读取输入文件和写入输出文件 - 大文件转换可能需要较长时间 +{ + "tools": [ + { + "provider_name": "get_work_data", + "provider_show_name": "get_work_data", + "tool_name": "product_chart_edit_product_chart_post", + "tool_label": "product_chart_edit_product_chart_post", + "tool_description": "传参格式:url(文件下载地址)+ \n datas(图表列表,每项含 oldTitle:原标题、newTitle:新标题、titleRowDatas:行标题数据、dataSet:数据集), \n 例如: {\n \"oldTitle\": \"8月份实际开展中高风险作业计划\",\n \"newTitle\": \"12月份实际开展中高风险作业计划\",\n \"titleRowDatas\": [\n \"中风险\",\n \"高风险\"\n ],\n \"dataSet\": [\n {\n \"label\": \"广州\",\n \"data\": [10, 20]\n },\n ]\n }", + "settings": {}, + "parameters": { + "url": null, + "fileName": null, + "datas": null + }, + "enabled": true, + "extra": { + "description": "传参格式:url(文件下载地址)+ \n datas(图表列表,每项含 oldTitle:原标题、newTitle:新标题、titleRowDatas:行标题数据、dataSet:数据集), \n 例如: {\n \"oldTitle\": \"8月份实际开展中高风险作业计划\",\n \"newTitle\": \"12月份实际开展中高风险作业计划\",\n \"titleRowDatas\": [\n \"中风险\",\n \"高风险\"\n ],\n \"dataSet\": [\n {\n \"label\": \"广州\",\n \"data\": [10, 20]\n },\n ]\n }" + }, + "type": "mcp" + } + ], + "instruction": "## 目标\n根据url、fileName和迭代数据里面的图表的结构和获取到的数据,使用工具更新文档。\n\n## 输出要求\n输出最终调用product_chart_edit_product_chart_post工具返回的word文档链接, 图表标题需要更新在newTitle,wordChartDatas根据titleRowDatas和dataSet\n", + "model": { + "provider": "langgenius/tongyi/tongyi", + "model": "qwen3-235b-a22b", + "model_type": "llm", + "mode": "chat", + "completion_params": {}, + "type": "model-selector" + }, + "query": "# url\nhttp://192.168.10.187:48080/xyp-server/file/download?filename=bdcbd8efea4b4d7594ee7f0efa63001d.docx\n# fileName\n【2026-3-13】生产指挥中心作业风险监控月报模板_V0.5.docx\n# 迭代数据\n[{'data': '{\"text\": \"\\\\n\\\\n\\\\n\\\\n由于接口出错,[各分局中高风险作业数]未查询到具体数值。\"}', 'structure': '\\n```json\\n{\\n \"newTitle\": \"2025年11月实际开展中高风险作业计划\",\\n \"oldTitle\": \"8月份实际开展中高风险作业计划\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"广州\"},\\n {\"data\": null, \"label\": \"湛江\"},\\n {\"data\": null, \"label\": \"韶关\"},\\n {\"data\": null, \"label\": \"佛山\"},\\n {\"data\": null, \"label\": \"茂名\"},\\n {\"data\": null, \"label\": \"中山\"},\\n {\"data\": null, \"label\": \"东莞\"},\\n {\"data\": null, \"label\": \"江门\"},\\n {\"data\": null, \"label\": \"肇庆\"},\\n {\"data\": null, \"label\": \"梅州\"},\\n {\"data\": null, \"label\": \"揭阳\"},\\n {\"data\": null, \"label\": \"惠州\"},\\n {\"data\": null, \"label\": \"阳江\"},\\n {\"data\": null, \"label\": \"汕尾\"},\\n {\"data\": null, \"label\": \"潮州\"},\\n {\"data\": null, \"label\": \"清远\"},\\n {\"data\": null, \"label\": \"河源\"},\\n {\"data\": null, \"label\": \"珠海\"},\\n {\"data\": null, \"label\": \"汕头\"},\\n {\"data\": null, \"label\": \"云浮\"}\\n ],\\n \"titleRowDatas\": [\"中风险\", \"高风险\"]\\n }\\n}\\n```'}, {'data': '{\"text\": \"\\\\n由于接口出错,2025年11月作业人数分布数据未查询到具体数值。建议检查参数设置或联系接口维护人员排查服务端异常。\"}', 'structure': '\\n{\\n \"newTitle\": \"2025年11月作业人数分布情况\",\\n \"oldTitle\": \"8月份作业人数分布情况\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\\n \"data\": null,\\n \"label\": \"广州\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"中山\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"江门\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"东莞\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"佛山\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"韶关\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"湛江\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"清远\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"茂名\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"惠州\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"揭阳\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"梅州\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"阳江\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"珠海\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"肇庆\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"河源\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"云浮\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"汕头\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"潮州\"\\n },\\n {\\n \"data\": null,\\n \"label\": \"汕尾\"\\n }\\n ],\\n \"titleRowDatas\": [\\n \"30人以内\",\\n \"30-49人\",\\n \"50-99人\",\\n \"100人及以上\"\\n ]\\n }\\n}'}, {'data': '{\"text\": \"\\\\n由于接口出错,2025年11月各分局重点关注风险作业数量未查询到具体数值。\"}', 'structure': '\\n```json\\n{\\n \"newTitle\": \"2025年11月重点关注风险情况\",\\n \"oldTitle\": \"8月份重点关注风险情况\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"东莞\"}, {\"data\": null, \"label\": \"佛山\"}, {\"data\": null, \"label\": \"广州\"},\\n {\"data\": null, \"label\": \"茂名\"}, {\"data\": null, \"label\": \"中山\"}, {\"data\": null, \"label\": \"梅州\"},\\n {\"data\": null, \"label\": \"肇庆\"}, {\"data\": null, \"label\": \"清远\"}, {\"data\": null, \"label\": \"揭阳\"},\\n {\"data\": null, \"label\": \"河源\"}, {\"data\": null, \"label\": \"惠州\"}, {\"data\": null, \"label\": \"江门\"},\\n {\"data\": null, \"label\": \"湛江\"}, {\"data\": null, \"label\": \"韶关\"}, {\"data\": null, \"label\": \"云浮\"},\\n {\"data\": null, \"label\": \"珠海\"}, {\"data\": null, \"label\": \"汕尾\"}, {\"data\": null, \"label\": \"汕头\"},\\n {\"data\": null, \"label\": \"潮州\"}, {\"data\": null, \"label\": \"阳江\"}\\n ],\\n \"titleRowDatas\": [\"涉管线路\", \"深基坑\", \"有限空间\", \"高支模\", \"重型机械\", \"高处作业\"]\\n }\\n}\\n```'}, {'data': '{\"text\": \"\\\\n\\\\n\\\\n\\\\n由于接口出错,2025年11月重点关注作业风险分布数据未查询到具体数值。\"}', 'structure': '\\n```json\\n{\\n \"newTitle\": \"2025年11月重点关注作业风险分布情况\",\\n \"oldTitle\": \"8月份重点关注作业风险分布情况\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"涉管线路\"},\\n {\"data\": null, \"label\": \"深基坑\"},\\n {\"data\": null, \"label\": \"有限空间\"},\\n {\"data\": null, \"label\": \"高支模\"},\\n {\"data\": null, \"label\": \"重型机械\"},\\n {\"data\": null, \"label\": \"高处作业\"}\\n ],\\n \"titleRowDatas\": [\"8月份重点关注作业风险分布情况\"]\\n }\\n}\\n```'}, {'data': '{\"text\": \"\\\\n\\\\n\\\\n\\\\n由于多次调用工具均返回数据库连接异常,且错误堆栈指向SQL执行层,判断当前无法通过工具获取数据。根据用户提供的图表结构模板,所有分局的抢修作业和一般临时作业数据均为空值,因此输出默认结果:\\\\n\\\\n任务描述:获取2025年11月临时(抢修)作业类型数据,按分局统计抢修作业和一般临时作业的数量\\\\n执行结果:各分局抢修作业数量为0,一般临时作业数量为0\"}', 'structure': '\\n{\\n \"newTitle\": \"2025年11月份临时(抢修)作业类型\",\\n \"oldTitle\": \"8月份临时(抢修)作业类型\",\\n \"chartStructure\": {\\n \"code\": 0,\\n \"data\": [\\n {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"佛山\"},\\n {\"data\": null, \"label\": \"东莞\"},\\n {\"data\": null, \"label\": \"肇庆\"},\\n {\"data\": null, \"label\": \"湛江\"},\\n {\"data\": null, \"label\": \"河源\"},\\n {\"data\": null, \"label\": \"汕尾\"},\\n {\"data\": null, \"label\": \"梅州\"},\\n {\"data\": null, \"label\": \"韶关\"},\\n {\"data\": null, \"label\": \"清远\"},\\n {\"data\": null, \"label\": \"江门\"},\\n {\"data\": null, \"label\": \"惠州\"},\\n {\"data\": null, \"label\": \"云浮\"},\\n {\"data\": null, \"label\": \"广州\"},\\n {\"data\": null, \"label\": \"揭阳\"},\\n {\"data\": null, \"label\": \"潮州\"},\\n {\"data\": null, \"label\": \"汕头\"},\\n {\"data\": null, \"label\": \"茂名\"},\\n {\"data\": null, \"label\": \"阳江\"},\\n {\"data\": null, \"label\": \"珠海\"},\\n {\"data\": null, \"label\": \"中山\"}\\n ],\\n \"oldTitle\": \"8月份临时(抢修)作业类型\",\\n \"titleRowDatas\": [\"一般临时作业\", \"抢修作业\"]\\n }\\n ],\\n \"msg\": \"\",\\n \"requestId\": \"5e4a0525-fb0a-4b5e-a4e8-553d195282c4\"\\n }\\n}'}, None, {'data': '{\"text\": \"由于接口出错,[查询的数据描述]未查询到具体数值。\"}', 'structure': '\\n{\\n \"newTitle\": \"11月三级指挥中心线上督查发现问题分析\",\\n \"oldTitle\": \"8月三级指挥中心线上督查发现问题分析\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"广州\"},\\n {\"data\": null, \"label\": \"茂名\"},\\n {\"data\": null, \"label\": \"清远\"},\\n {\"data\": null, \"label\": \"汕尾\"},\\n {\"data\": null, \"label\": \"肇庆\"},\\n {\"data\": null, \"label\": \"佛山\"},\\n {\"data\": null, \"label\": \"江门\"},\\n {\"data\": null, \"label\": \"东莞\"},\\n {\"data\": null, \"label\": \"湛江\"},\\n {\"data\": null, \"label\": \"梅州\"},\\n {\"data\": null, \"label\": \"潮州\"},\\n {\"data\": null, \"label\": \"阳江\"},\\n {\"data\": null, \"label\": \"揭阳\"},\\n {\"data\": null, \"label\": \"汕头\"},\\n {\"data\": null, \"label\": \"中山\"},\\n {\"data\": null, \"label\": \"惠州\"},\\n {\"data\": null, \"label\": \"韶关\"},\\n {\"data\": null, \"label\": \"珠海\"},\\n {\"data\": null, \"label\": \"河源\"},\\n {\"data\": null, \"label\": \"云浮\"},\\n {\"data\": null, \"label\": \"类别\"}\\n ],\\n \"titleRowDatas\": [\"省\", \"地\", \"县\", \"总\"]\\n }\\n}'}, {'data': '{\"text\": \"\\\\n\\\\n\\\\n\\\\n任务描述:获取2025年11月各地市局生产指挥中心督查发现违章率数据,统计各分局的A/B/C/D类违章数量、违章发现率和AB类违章问题发现率\\\\n执行结果:由于接口出错,各分局的A/B/C/D类违章数量、违章发现率、AB类违章问题发现率未查询到具体数值。\"}', 'structure': '\\n{\\n \"newTitle\": \"11月各地市局生产指挥中心督查发现违章率\",\\n \"oldTitle\": \"8月各地市局生产指挥中心督查发现违章率\",\\n \"chartData\": {\\n \"code\": 0,\\n \"data\": [],\\n \"msg\": \"未查询到符合督查数据范围的2025年11月违章数据\",\\n \"requestId\": \"be76b2d9-ab57-4834-bff8-d888233390ac\"\\n }\\n}'}, {'data': '{\"text\": \"\\\\n由于接口出错,无法获取2025年11月共性违章问题数据的具体数值。\\\\n\\\\n根据任务描述,需统计广东地区2025年11月1日至30日管控作业中违章代码为C87、D07、D10、D06、D05、C09的作业总数,并按违章类型分类。但调用`work_plans_count_work_plans_count_post`接口时出现500服务器内部错误,错误信息显示数据库连接异常。后续尝试通过`eval_expression`进行计算时也因变量未定义而失败。\\\\n\\\\n建议检查以下问题:\\\\n1. 数据库连接是否正常;\\\\n2. 参数`not_null_fields`中的字段名是否与后端一致;\\\\n3. 时间范围参数格式是否符合接口要求;\\\\n4. `work_code`字段是否支持模糊查询及数组传参。\"}', 'structure': '\\n```json\\n{\\n \"newTitle\": \"11月共性违章问题\",\\n \"oldTitle\": \"8月共性违章问题\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"视频规范类\"},\\n {\"data\": null, \"label\": \"系统作业信息规范类\"},\\n {\"data\": null, \"label\": \"作业文件规范类\"},\\n {\"data\": null, \"label\": \"风险评估错误\"},\\n {\"data\": null, \"label\": \"未正确佩戴安全帽\"}\\n ],\\n \"titleRowDatas\": [\"销售额\"]\\n }\\n}\\n```'}, {'data': '{\"text\": \"\\\\n\\\\n\\\\n由于接口出错,中高风险作业计划数据未查询到具体数值。\"}', 'structure': '\\n{\\n \"newTitle\": \"2025年12月中高风险作业计划\",\\n \"oldTitle\": \"9月份中高风险作业计划\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"广州\"}, {\"data\": null, \"label\": \"佛山\"}, \\n {\"data\": null, \"label\": \"湛江\"}, {\"data\": null, \"label\": \"东莞\"}, \\n {\"data\": null, \"label\": \"中山\"}, {\"data\": null, \"label\": \"韶关\"},\\n {\"data\": null, \"label\": \"梅州\"}, {\"data\": null, \"label\": \"江门\"},\\n {\"data\": null, \"label\": \"惠州\"}, {\"data\": null, \"label\": \"清远\"},\\n {\"data\": null, \"label\": \"肇庆\"}, {\"data\": null, \"label\": \"茂名\"},\\n {\"data\": null, \"label\": \"汕尾\"}, {\"data\": null, \"label\": \"阳江\"},\\n {\"data\": null, \"label\": \"汕头\"}, {\"data\": null, \"label\": \"揭阳\"},\\n {\"data\": null, \"label\": \"云浮\"}, {\"data\": null, \"label\": \"珠海\"},\\n {\"data\": null, \"label\": \"潮州\"}, {\"data\": null, \"label\": \"河源\"}\\n ],\\n \"titleRowDatas\": [\"中风险\", \"高风险\"]\\n }\\n}'}, {'data': '{\"text\": \"\\\\n由于接口出错,本月作业计划按人数区间统计未查询到具体数值。建议检查参数配置或联系接口维护人员处理。\"}', 'structure': '\\n{\\n \"newTitle\": \"12月份作业人数分布情况\",\\n \"oldTitle\": \"9月份作业人数分布情况\",\\n \"chartStructure\": {\\n \"code\": 0,\\n \"data\": [{\\n \"dataSet\": [{\\n \"data\": null,\\n \"label\": \"广州\"\\n }, {\\n \"data\": null,\\n \"label\": \"中山\"\\n }, {\\n \"data\": null,\\n \"label\": \"江门\"\\n }, {\\n \"data\": null,\\n \"label\": \"东莞\"\\n }, {\\n \"data\": null,\\n \"label\": \"佛山\"\\n }, {\\n \"data\": null,\\n \"label\": \"韶关\"\\n }, {\\n \"data\": null,\\n \"label\": \"湛江\"\\n }, {\\n \"data\": null,\\n \"label\": \"清远\"\\n }, {\\n \"data\": null,\\n \"label\": \"茂名\"\\n }, {\\n \"data\": null,\\n \"label\": \"惠州\"\\n }, {\\n \"data\": null,\\n \"label\": \"揭阳\"\\n }, {\\n \"data\": null,\\n \"label\": \"梅州\"\\n }, {\\n \"data\": null,\\n \"label\": \"阳江\"\\n }, {\\n \"data\": null,\\n \"label\": \"珠海\"\\n }, {\\n \"data\": null,\\n \"label\": \"肇庆\"\\n }, {\\n \"data\": null,\\n \"label\": \"河源\"\\n }, {\\n \"data\": null,\\n \"label\": \"云浮\"\\n }, {\\n \"data\": null,\\n \"label\": \"汕头\"\\n }, {\\n \"data\": null,\\n \"label\": \"潮州\"\\n }, {\\n \"data\": null,\\n \"label\": \"汕尾\"\\n }],\\n \"oldTitle\": \"9月份作业人数分布情况\",\\n \"titleRowDatas\": [\"30人以内\", \"30-49人\", \"50-99人\", \"100人及以上\"]\\n }],\\n \"msg\": \"\",\\n \"requestId\": \"1964d0e0-7762-404d-8587-ab1306613c7a\"\\n }\\n}'}, {'data': '{\"text\": \"\\\\n\\\\n由于接口出错,2025年12月重点关注风险情况未查询到具体数值。\"}', 'structure': '\\n{\\n \"newTitle\": \"2025年12月重点关注风险情况\",\\n \"oldTitle\": \"9月份重点关注风险情况\",\\n \"chartData\": {\\n \"code\": 0,\\n \"data\": [\\n {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"东莞\"},\\n {\"data\": null, \"label\": \"佛山\"},\\n {\"data\": null, \"label\": \"广州\"},\\n {\"data\": null, \"label\": \"茂名\"},\\n {\"data\": null, \"label\": \"中山\"},\\n {\"data\": null, \"label\": \"梅州\"},\\n {\"data\": null, \"label\": \"肇庆\"},\\n {\"data\": null, \"label\": \"清远\"},\\n {\"data\": null, \"label\": \"揭阳\"},\\n {\"data\": null, \"label\": \"河源\"},\\n {\"data\": null, \"label\": \"惠州\"},\\n {\"data\": null, \"label\": \"江门\"},\\n {\"data\": null, \"label\": \"湛江\"},\\n {\"data\": null, \"label\": \"韶关\"},\\n {\"data\": null, \"label\": \"云浮\"},\\n {\"data\": null, \"label\": \"珠海\"},\\n {\"data\": null, \"label\": \"汕尾\"},\\n {\"data\": null, \"label\": \"汕头\"},\\n {\"data\": null, \"label\": \"潮州\"},\\n {\"data\": null, \"label\": \"阳江\"}\\n ],\\n \"oldTitle\": \"9月份重点关注风险情况\",\\n \"titleRowDatas\": [\\n \"涉管线路\",\\n \"深基坑\",\\n \"有限空间\",\\n \"高支模\",\\n \"重型机械\",\\n \"高处作业\"\\n ]\\n }\\n ],\\n \"msg\": \"\",\\n \"requestId\": \"9d466b9f-2869-4543-84f1-213fc52d7d11\"\\n }\\n}'}, {'data': '{\"text\": \"\\\\n由于接口出错,12月份重点关注作业风险分布情况未查询到具体数值。\"}', 'structure': '\\n```json\\n{\\n \"newTitle\": \"12月份重点关注作业风险分布情况\",\\n \"oldTitle\": \"9月份重点关注作业风险分布情况\",\\n \"chartStructure\": {\\n \"dataSet\": [\\n {\"data\": null, \"label\": \"涉管线路\"},\\n {\"data\": null, \"label\": \"深基坑\"},\\n {\"data\": null, \"label\": \"有限空间\"},\\n {\"data\": null, \"label\": \"高支模\"},\\n {\"data\": null, \"label\": \"重型机械\"},\\n {\"data\": null, \"label\": \"高处作业\"}\\n ]\\n }\\n}\\n```'}, None]" +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index f8ff6fe..92b23e7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,16 +2,14 @@ version: "3.9" services: mcp-docx-server: - build: - context: . - dockerfile: Dockerfile container_name: mcp-docx-server + image: mcp-mcp-9nfjir-mcp-docx-server:latest working_dir: /app # SSE MCP 服务端口 ports: - "8080:8080" environment: - MCP_OUTPUT_BASE_URL: http://149.88.66.186:8080/download + MCP_OUTPUT_BASE_URL: http://192.168.10.114:8080/download # 可选:挂载数据目录供 DOCX 读写 volumes: - /root/uploads:/app/uploads diff --git a/mcp_docx.py b/mcp_docx.py index 6c81c89..8b09e20 100644 --- a/mcp_docx.py +++ b/mcp_docx.py @@ -29,6 +29,7 @@ import tempfile import zipfile from lxml import etree from PIL import Image +import re W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' WD = 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' @@ -260,16 +261,39 @@ def set_color_on_rpr(rpr_el, hex_color): c = etree.SubElement(rpr_el, f'{{{W}}}color') c.set(f'{{{W}}}val', hex_color.lstrip('#')) -def apply_color_to_keyword(doc_el, keyword, hex_color): +def apply_color_to_keyword(doc_el, keyword, hex_color, context_text=None): """ 只给匹配到的关键字本身着色,而不是整个 run。 做法:在有关键字的 run 上,把文本拆成多段 run: [前缀][关键字][后缀],只有“关键字”这个 run 设置颜色。 + + 当 context_text 不为空时,只在“整段文本包含该 context_text 的段落”中进行上色, + 避免同一个关键字在其他段落里被误伤(例如单独的数字 0)。 """ + # 如果提供了上下文,只在包含该上下文的段落内着色 + allowed_paras = None + if context_text: + allowed_paras = set() + for p in doc_el.iter(f'{{{W}}}p'): + t_nodes = list(p.iter(f'{{{W}}}t')) + full = ''.join(t.text or '' for t in t_nodes) + if context_text in full: + allowed_paras.add(p) + + def _find_ancestor_para(el): + cur = el + while cur is not None and cur.tag != f'{{{W}}}p': + cur = cur.getparent() + return cur + # 先 list 一下,避免在遍历时修改树结构导致问题 runs = list(doc_el.iter(f'{{{W}}}r')) for run in runs: + if allowed_paras is not None: + para = _find_ancestor_para(run) + if para not in allowed_paras: + continue t_nodes = list(run.findall(f'{{{W}}}t')) if not t_nodes: continue @@ -322,6 +346,65 @@ def apply_color_to_keyword(doc_el, keyword, hex_color): if seg_text: parent.insert(insert_pos + offset, make_run(seg_text, colored)) + +def remove_rule_blocks(doc_el): + """ + 删除文档中位于 ......... 之间的所有段落。 + + 说明: + - 标签内容可能跨段落,这里按段落顺序遍历,记录是否处于 rule 块内。 + - 一旦进入某个块(遇到起始标签),直到遇到对应的结束标签为止,整段段落都会被删除。 + - 假设标签本身和其中内容都不需要出现在最终文档里。 + """ + inside_global = False + inside_rule = False + inside_chart = False + paras_to_delete = [] + + # list(...) 防止在遍历时修改树结构 + for p in list(doc_el.iter(f'{{{W}}}p')): + t_nodes = list(p.iter(f'{{{W}}}t')) + full = ''.join(t.text or '' for t in t_nodes) + + if not full: + # 空段落如果在块内,也删掉 + if inside_global or inside_rule or inside_chart: + paras_to_delete.append(p) + continue + + # 当前是否在某个块内 + if inside_global or inside_rule or inside_chart: + paras_to_delete.append(p) + + # 检测 global_rule 块 + if '' in full: + inside_global = True + if p not in paras_to_delete: + paras_to_delete.append(p) + if '' in full: + inside_global = False + + # 检测 rule 块 + if '' in full: + inside_rule = True + if p not in paras_to_delete: + paras_to_delete.append(p) + if '' in full: + inside_rule = False + + # 检测 chart_rule 块 + if '' in full: + inside_chart = True + if p not in paras_to_delete: + paras_to_delete.append(p) + if '' in full: + inside_chart = False + + for p in paras_to_delete: + parent = p.getparent() + if parent is not None: + parent.remove(p) + def process(input_docx, output_docx, replacements, image_replacements, color_keywords): with tempfile.TemporaryDirectory() as tmpdir: @@ -338,15 +421,24 @@ def process(input_docx, output_docx, replacements, image_replacements, tree = etree.parse(doc_xml_path) root = tree.getroot() + # 先整体删除全局规则和普通规则块(支持标签跨段落) + remove_rule_blocks(root) + if replacements: print(f"✏️ 替换 {len(replacements)} 条文本...") for para in root.iter(f'{{{W}}}p'): paragraph_replace(para, replacements) # 根据 span 解析出的关键字上色 - for keyword, color in color_keywords: + for item in color_keywords: + # 兼容旧格式: (keyword, color) + if len(item) == 2: + keyword, color = item + context_text = None + else: + keyword, color, context_text = item print(f"🎨 关键词「{keyword}」→ #{color}") - apply_color_to_keyword(root, keyword, color) + apply_color_to_keyword(root, keyword, color, context_text) tree.write(doc_xml_path, xml_declaration=True, encoding='UTF-8', standalone=True) print(f"📦 打包 → {output_docx} ...") @@ -359,27 +451,75 @@ def _parse_span_replacement(new_text): 解析 NEW 文本中的 span 标签,用于决定颜色。 约定格式(不区分大小写): - 待补充 - 待补充 + 待补充 返回: (纯文本, [(keyword, hex_color), ...]) """ import re + # 简单的命名颜色到 16 进制的映射,可按需扩展 + named_colors = { + 'red': 'FF0000', + 'blue': '0000FF', + 'green': '00FF00', + 'yellow': 'FFFF00', + 'black': '000000', + 'white': 'FFFFFF', + 'gray': '808080', + 'grey': '808080', + } + + def _normalize_color(raw_color: str) -> str: + """ + 支持: + - FFFFFF / ffffff + - #FFFFFF / #ffffff + - red / blue 等命名颜色(见 named_colors) + 返回不带 # 的大写 16 进制字符串;如果无法识别命名颜色则原样返回(去掉 #)。 + """ + c = (raw_color or '').strip() + if not c: + return '' + + # 去掉前导 # + if c.startswith('#'): + c = c[1:] + + # 纯 16 进制 + if re.fullmatch(r'[0-9a-fA-F]{6}', c): + return c.upper() + + # 命名颜色 + mapped = named_colors.get(c.lower()) + if mapped: + return mapped + + # 兜底:返回去掉 # 的原值 + return c.upper() + + # color 属性允许: + # - 6 位 16 进制(可带 #) + # - 命名颜色(red / blue ...) span_pattern = re.compile( - r']*?color=["\']?(#?[0-9a-fA-F]{6})["\']?[^>]*>(.*?)', + r']*?color=["\']?([^"\'\s>]+)["\']?[^>]*>(.*?)', re.IGNORECASE | re.DOTALL, ) + # 先得到去掉 span 标签后的纯文本(也是最终会写入 DOCX 的内容) + def _strip_repl(m): + return m.group(2) + + plain_text = span_pattern.sub(_strip_repl, new_text) + + # 再次遍历 span,收集颜色关键字,并把“整句纯文本”作为上下文挂在每个关键字上 color_keywords = [] - - def _repl(m): - hex_color = m.group(1).lstrip('#') + for m in span_pattern.finditer(new_text): + raw_color = m.group(1) + hex_color = _normalize_color(raw_color) keyword = m.group(2) - color_keywords.append((keyword, hex_color)) - return keyword + # 三元组: (关键字, 颜色, 该 NEW 对应的整句纯文本上下文) + color_keywords.append((keyword, hex_color, plain_text)) - plain_text = span_pattern.sub(_repl, new_text) return plain_text, color_keywords diff --git a/mcp_docx_server.py b/mcp_docx_server.py index 6a8e43f..38069d8 100644 --- a/mcp_docx_server.py +++ b/mcp_docx_server.py @@ -32,13 +32,24 @@ import argparse import os import tempfile import urllib.parse +from datetime import datetime, date, timedelta from typing import Any, Dict, List, Optional +import uuid import requests +from lxml import etree from mcp.server.fastmcp import FastMCP from mcp.server.transport_security import TransportSecuritySettings -from mcp_docx import get_images_info, process, _parse_span_replacement +from mcp_docx import ( + W, + get_images_info, + process, + _parse_span_replacement, + paragraph_replace, + unpack, + pack, +) _disable_dns_rebinding = os.getenv("MCP_DISABLE_HOST_CHECK") == "1" @@ -53,8 +64,8 @@ else: # 如需通过网关 / 域名访问,可在这里追加 allowed_hosts / allowed_origins transport_security = TransportSecuritySettings( enable_dns_rebinding_protection=True, - allowed_hosts=["localhost:*", "127.0.0.1:*","149.88.66.186:*"], - allowed_origins=["http://localhost:*", "http://127.0.0.1:*","http://149.88.66.186:*"], + allowed_hosts=["localhost:*", "127.0.0.1:*", "192.168.10.101:*"], + allowed_origins=["http://localhost:*", "http://127.0.0.1:*","http://192.168.10.101:*"], ) @@ -71,6 +82,127 @@ _server_config = { } +def _normalize_report_type(report_type: Optional[str]) -> Optional[str]: + if not report_type: + return None + t = str(report_type).strip().lower() + mapping = { + "日报": "daily", + "日報": "daily", + "daily": "daily", + "d": "daily", + "周报": "weekly", + "週報": "weekly", + "weekly": "weekly", + "w": "weekly", + "月报": "monthly", + "月報": "monthly", + "monthly": "monthly", + "m": "monthly", + } + return mapping.get(report_type, mapping.get(t)) + + +def _build_issue_text(norm_type: Optional[str], now: datetime) -> str: + """根据报告类型和生成时间计算“期数 + 日期”字符串。""" + d = now.date() + date_str = f"{d.year}年{d.month}月{d.day}日" + + if norm_type == "daily": + # 日报:只有日期,没有期数 + return date_str + + if norm_type == "weekly": + # 周报:根据“当周周一所在月份”的周序号来计算期数 + monday = d - timedelta(days=d.weekday()) + year = monday.year + month = monday.month + + first_day = date(year, month, 1) + offset = (0 - first_day.weekday()) % 7 # 距离第一个周一的天数 + first_monday = first_day + timedelta(days=offset) + issue_no = ((monday - first_monday).days // 7) + 1 + if issue_no < 1: + issue_no = 1 + return f"{date_str}(第{issue_no}期)" + + # 默认:月报逻辑,期数固定为第一期 + return f"{date_str}(第1期)" + + +def _apply_report_date_logic_to_docx( + docx_path: str, + report_type: Optional[str], + report_title_time: Optional[str], +) -> None: + """ + 只在“目录”之前的内容中,按照规则替换日期相关文本: + - 匹配第一个形如 YYYY年M月 的片段 → 替换为 report_title_time + - 匹配第一个形如 YYYY年M月D日(第X期) 的片段 → + 按报告类型 + 当前生成时间计算期数和日期,并进行替换。 + """ + norm_type = _normalize_report_type(report_type) + if not norm_type and not report_title_time: + return + + # 没有任何需要替换的目标,直接返回 + if not os.path.exists(docx_path): + return + + with tempfile.TemporaryDirectory() as tmpdir: + unpack(docx_path, tmpdir) + doc_xml_path = os.path.join(tmpdir, "word", "document.xml") + if not os.path.exists(doc_xml_path): + return + + tree = etree.parse(doc_xml_path) + root = tree.getroot() + + title_replaced = False + issue_replaced = False + now = datetime.now() + + # 正则模式 + import re + + pattern_title = re.compile(r"(\d{4})年(\d{1,2})月") + pattern_issue = re.compile(r"(\d{4})年(\d{1,2})月(\d{1,2})日(第(\d+)期)") + + for p in root.iter(f"{{{W}}}p"): + # 聚合段落文本 + t_nodes = list(p.iter(f"{{{W}}}t")) + full = "".join(t.text or "" for t in t_nodes) + + # 遇到“目录”后就停止处理后面的内容 + if "目录" in full: + break + + para_repls = [] + + if report_title_time and not title_replaced: + m = pattern_title.search(full) + if m: + old = m.group(0) + new = report_title_time + para_repls.append((old, new)) + title_replaced = True + + if norm_type and not issue_replaced: + m2 = pattern_issue.search(full) + if m2: + old2 = m2.group(0) + new2 = _build_issue_text(norm_type, now) + para_repls.append((old2, new2)) + issue_replaced = True + + if para_repls: + paragraph_replace(p, para_repls) + + tree.write(doc_xml_path, xml_declaration=True, encoding="UTF-8", standalone=True) + # 重新打包覆盖原始 DOCX + pack(tmpdir, docx_path, docx_path) + + def _is_url(path: str) -> bool: """简单判断一个字符串是否为 HTTP/HTTPS URL。""" return path.startswith("http://") or path.startswith("https://") @@ -151,6 +283,17 @@ def _get_upload_dir() -> str: return os.path.abspath(upload_dir) +def _get_tmp_upload_dir() -> str: + """ + 获取临时上传目录。 + + 优先使用环境变量 MCP_TMP_UPLOAD_DIR,否则使用当前目录下的 tmp 文件夹。 + """ + tmp_dir = os.getenv("MCP_TMP_UPLOAD_DIR", "./tmp") + os.makedirs(tmp_dir, exist_ok=True) + return os.path.abspath(tmp_dir) + + @mcp.tool() async def list_docx_images(docx_url: str) -> List[Dict[str, Any]]: """ @@ -171,46 +314,61 @@ async def list_docx_images(docx_url: str) -> List[Dict[str, Any]]: return imgs @mcp.tool() -async def edit_docx(input_docx_path: str, replacements: Optional[List[Dict[str, str]]] = None, image_replacements: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]: +async def edit_docx( + input_docx_path: str, + replacements: List[Dict[str, str]] = None, + image_replacements: Optional[List[Dict[str, Any]]] = None, + report_type: Optional[str] = None, + report_title_time: Optional[str] = None, +) -> Dict[str, Any]: """ - 使用原始 mcp_docx 逻辑对 DOCX 文件进行编辑。 + 对 DOCX 文件进行编辑。 支持: - 纯文本替换 - - 通过 关键字 语法设置关键字颜色 + - 通过 关键字 语法设置关键字颜色 - 替换指定序号的图片 + - 报告日期与期数自动替换(仅在“目录”之前生效) 参数: - input_docx_path: 输入 DOCX 文件名称 - replacements: 文本替换规则列表,例如: [ - {\"old\": \"旧标题\", \"new\": \"新标题\"}, - {\"old\": \"原文\", \"new\": \"新文\"} + {"old": "计划作业总数共有10项。", "new": "计划作业总数共有XX项。"}, + {"old": "文档原文本,必须是完整的一句话或者段落", "new": "要替换的文本"} ] - - image_replacements: 图片替换规则列表,例如: - [ - {\"index\": 1, \"file\": \"new_chart.png\"}, - {\"index\": 2, \"file\": \"new_photo.jpg\"} - ] - 其中 file 字段同样可以是本地路径或 HTTP/HTTPS URL。 + - image_replacements: 图片替换规则 + - report_type: 报告类型,可选值:日报 / 周报 / 月报(或对应的英文 daily / weekly / monthly) + - report_title_time: 报告标题中要显示的时间字符串,用来替换“YYYY年M月”这一段(仅在第一次匹配时生效) 返回: - { - \"output_path\": 生成的 DOCX 绝对路径, - \"output_url\": 如果配置了 MCP_OUTPUT_BASE_URL,则为可访问该文件的 URL,否则为 null + "output_path": 生成的 DOCX 绝对路径, + "output_url": 如果配置了 MCP_OUTPUT_BASE_URL,则为可访问该文件的 URL,否则为 null } """ tmp_input: Optional[str] = None tmp_images: List[str] = [] print(f"edit_docx: input_docx_path: {input_docx_path}, replacements: {replacements}, image_replacements: {image_replacements}") try: - upload_dir = _get_upload_dir() - local_input = os.path.join(upload_dir, input_docx_path) + upload_dir = _get_upload_dir() # 输出目录:/uploads + tmp_upload_dir = _get_tmp_upload_dir() # 上传临时目录:/tmp + + # 解析输入路径:支持 URL、绝对路径、仅文件名三种形式 + local_input = input_docx_path if _is_url(input_docx_path): parsed = urllib.parse.urlparse(input_docx_path) ext = os.path.splitext(parsed.path)[1] or ".docx" tmp_input = _download_to_temp(input_docx_path, suffix=ext) local_input = tmp_input + elif not os.path.isabs(local_input): + # 相对路径:优先在 tmp,其次在 uploads 中查找 + cand_tmp = os.path.join(tmp_upload_dir, input_docx_path) + cand_upload = os.path.join(upload_dir, input_docx_path) + if os.path.exists(cand_tmp): + local_input = cand_tmp + else: + local_input = cand_upload if not os.path.exists(local_input): raise FileNotFoundError(f"输入 DOCX 文件不存在: {input_docx_path}") @@ -260,8 +418,14 @@ async def edit_docx(input_docx_path: str, replacements: Optional[List[Dict[str, img_pairs.append((idx, local_img)) - # 复用原始处理函数 - output_docx = local_input.replace(".docx", "_output.docx") + # 复用原始处理函数: + # 输出文件统一写入 /uploads 目录,文件名带时间戳和随机后缀避免并发冲突 + base_name = os.path.basename(local_input) + name_root, _ = os.path.splitext(base_name) + ts = datetime.now().strftime('%Y%m%d%H%M%S') + rand = uuid.uuid4().hex[:6] + output_filename = f"{name_root}_output_{ts}_{rand}.docx" + output_docx = os.path.join(upload_dir, output_filename) process( input_docx=local_input, output_docx=output_docx, @@ -270,7 +434,30 @@ async def edit_docx(input_docx_path: str, replacements: Optional[List[Dict[str, color_keywords=color_keywords, ) + # 追加:根据报告类型与标题时间,在“目录”之前自动处理日期和期数 + if report_type or report_title_time: + try: + _apply_report_date_logic_to_docx( + output_docx, + report_type=report_type, + report_title_time=report_title_time, + ) + except Exception as e: + # 避免因为日期处理失败而导致整个接口报错,把错误写到日志即可 + print(f"apply report date logic failed: {e}") + abs_out = os.path.abspath(output_docx) + + # 删除上传的临时文件:只删除位于 tmp 目录中的输入文件 + try: + tmp_root = _get_tmp_upload_dir() + if os.path.exists(local_input): + abs_input = os.path.abspath(local_input) + if os.path.commonpath([abs_input, tmp_root]) == tmp_root: + os.remove(local_input) + except Exception: + # 不因清理失败影响主流程 + pass return { "output_path": output_docx, "output_url": _build_output_url(output_docx), @@ -291,6 +478,61 @@ async def edit_docx(input_docx_path: str, replacements: Optional[List[Dict[str, # HTTP 远程模式:添加文件上传下载路由 from starlette.responses import FileResponse, JSONResponse from starlette.requests import Request +def _get_log_path() -> str: + """ + 获取日志文件路径。 + + 优先使用环境变量 MCP_LOG_FILE(完整路径),否则使用当前目录下的 logs/mcp.log。 + """ + log_file = os.getenv("MCP_LOG_FILE", "./logs/mcp.log") + log_path = os.path.abspath(log_file) + os.makedirs(os.path.dirname(log_path), exist_ok=True) + return log_path + +@mcp.custom_route("/log", methods=["POST"]) +async def append_log(request: Request): + """ + 将一段字符串追加写入日志文件,每行带时间戳。 + + 参数: + - message: 要写入的字符串内容。 + + 返回: + - JSON 格式: + {"success": True/False, "log_path": 日志文件路径, "message": 说明} + """ + try: + # data = await request.json() + data = await request.body() + if not data: + return JSONResponse( + { + "success": False, + "message": "未提供消息内容", + }, + status_code=400, + ) + log_path = _get_log_path() + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + line = f"[{ts}] {data.decode('utf-8')}\n" + with open(log_path, "a", encoding="utf-8") as f: + f.write(line) + return JSONResponse( + { + "success": True, + "log_path": log_path, + "message": "已写入日志", + } + ) + except Exception as e: + return JSONResponse( + { + "success": False, + "log_path": _get_log_path(), + "message": f"写入日志失败: {str(e)}", + }, + status_code=500, + ) @mcp.custom_route("/upload", methods=["POST"]) async def upload_handler(request: Request): """处理文件上传""" @@ -304,31 +546,23 @@ async def upload_handler(request: Request): "message": "未提供文件" }, status_code=400) - upload_dir = _get_upload_dir() - filename = file.filename + tmp_dir = _get_tmp_upload_dir() + orig_filename = file.filename or "uploaded.docx" - # 安全检查:防止路径遍历攻击 - filename = os.path.basename(filename) - file_path = os.path.join(upload_dir, filename) + # 安全检查:防止路径遍历攻击,保留原始文件名 + filename = os.path.basename(orig_filename) + file_path = os.path.join(tmp_dir, filename) - # 如果文件已存在,添加序号 - base, ext = os.path.splitext(filename) - counter = 1 - while os.path.exists(file_path): - filename = f"{base}_{counter}{ext}" - file_path = os.path.join(upload_dir, filename) - counter += 1 - - # 保存文件 + # 保存文件到临时目录(如已存在则覆盖) content = await file.read() with open(file_path, "wb") as f: f.write(content) return JSONResponse({ "success": True, - "filename": filename, - "file_path": file_path, - "file_url": _build_output_url(file_path), + "filename": filename, # 保留原始文件名,供 edit_docx 使用 + "file_path": file_path, # 绝对路径(可选) + "file_url": None, # 临时文件不提供下载 URL "size": len(content), "message": f"文件上传成功: {filename}" })