diff --git a/mcp_docx.py b/mcp_docx.py
index 76d63bc..314930c 100644
--- a/mcp_docx.py
+++ b/mcp_docx.py
@@ -417,7 +417,7 @@ def apply_color_to_keyword(doc_el, keyword, hex_color, context_text=None):
def remove_rule_blocks(doc_el):
"""
- 删除文档中位于 ...、... 和 ... 之间的所有段落。
+ 删除文档中位于 ...、...、... 和 ... 之间的所有段落。
说明:
- 标签内容可能跨段落,这里按段落顺序遍历,记录是否处于 rule 块内。
@@ -427,6 +427,7 @@ def remove_rule_blocks(doc_el):
inside_global = False
inside_rule = False
inside_chart = False
+ inside_data = False
paras_to_delete = []
# list(...) 防止在遍历时修改树结构
@@ -436,12 +437,12 @@ def remove_rule_blocks(doc_el):
if not full:
# 空段落如果在块内,也删掉
- if inside_global or inside_rule or inside_chart:
+ if inside_global or inside_rule or inside_chart or inside_data:
paras_to_delete.append(p)
continue
# 当前是否在某个块内
- if inside_global or inside_rule or inside_chart:
+ if inside_global or inside_rule or inside_chart or inside_data:
paras_to_delete.append(p)
# 检测 global_rule 块
@@ -468,6 +469,14 @@ def remove_rule_blocks(doc_el):
if '' in full:
inside_chart = False
+ # 检测 data 块
+ if '' in full:
+ inside_data = True
+ if p not in paras_to_delete:
+ paras_to_delete.append(p)
+ if '' in full:
+ inside_data = False
+
for p in paras_to_delete:
parent = p.getparent()
if parent is not None: