From 0d178c748e73d587ace8ad22152ce6448e360f22 Mon Sep 17 00:00:00 2001
From: liangweihao <734499798@qq.com>
Date: Thu, 2 Apr 2026 10:43:45 +0800
Subject: [PATCH] =?UTF-8?q?add=EF=BC=9A=E5=88=A0=E9=99=A4data=E6=A0=87?=
=?UTF-8?q?=E7=AD=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
mcp_docx.py | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/mcp_docx.py b/mcp_docx.py
index 76d63bc..314930c 100644
--- a/mcp_docx.py
+++ b/mcp_docx.py
@@ -417,7 +417,7 @@ def apply_color_to_keyword(doc_el, keyword, hex_color, context_text=None):
def remove_rule_blocks(doc_el):
"""
- 删除文档中位于 ...、... 和 ... 之间的所有段落。
+ 删除文档中位于 ...、...、... 和 ... 之间的所有段落。
说明:
- 标签内容可能跨段落,这里按段落顺序遍历,记录是否处于 rule 块内。
@@ -427,6 +427,7 @@ def remove_rule_blocks(doc_el):
inside_global = False
inside_rule = False
inside_chart = False
+ inside_data = False
paras_to_delete = []
# list(...) 防止在遍历时修改树结构
@@ -436,12 +437,12 @@ def remove_rule_blocks(doc_el):
if not full:
# 空段落如果在块内,也删掉
- if inside_global or inside_rule or inside_chart:
+ if inside_global or inside_rule or inside_chart or inside_data:
paras_to_delete.append(p)
continue
# 当前是否在某个块内
- if inside_global or inside_rule or inside_chart:
+ if inside_global or inside_rule or inside_chart or inside_data:
paras_to_delete.append(p)
# 检测 global_rule 块
@@ -468,6 +469,14 @@ def remove_rule_blocks(doc_el):
if '' in full:
inside_chart = False
+ # 检测 data 块
+ if '' in full:
+ inside_data = True
+ if p not in paras_to_delete:
+ paras_to_delete.append(p)
+ if '' in full:
+ inside_data = False
+
for p in paras_to_delete:
parent = p.getparent()
if parent is not None: