633 lines
20 KiB
Python
633 lines
20 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
docx_editor.py — 保留原格式替换文本 + 修改字体颜色
|
||
|
||
用法:
|
||
# 列出文档中所有图片
|
||
python3 docx_editor.py input.docx --list-images
|
||
|
||
# 文本替换 + 颜色
|
||
python3 docx_editor.py input.docx output.docx \
|
||
--replace "原文" "新文" \
|
||
--color "关键词" "FF0000"
|
||
"""
|
||
|
||
import argparse
|
||
import copy
|
||
import os
|
||
import tempfile
|
||
import zipfile
|
||
from lxml import etree
|
||
import re
|
||
|
||
W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
|
||
WD = 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'
|
||
A = 'http://schemas.openxmlformats.org/drawingml/2006/main'
|
||
R = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
|
||
REL_TYPE_IMAGE = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
|
||
|
||
|
||
def unpack(docx_path, out_dir):
|
||
"""使用 zipfile 直接解包 .docx 到临时目录,替代外部 unpack.py 脚本。"""
|
||
with zipfile.ZipFile(docx_path, 'r') as zf:
|
||
zf.extractall(out_dir)
|
||
|
||
|
||
def pack(unpacked_dir, output_docx, original_docx):
|
||
"""
|
||
使用 zipfile 将修改后的目录重新打包为 .docx。
|
||
|
||
original_docx 参数目前保留只是为了兼容原函数签名,没有实际使用。
|
||
"""
|
||
# 确保输出目录存在
|
||
out_dir = os.path.dirname(os.path.abspath(output_docx))
|
||
if out_dir and not os.path.exists(out_dir):
|
||
os.makedirs(out_dir, exist_ok=True)
|
||
|
||
# 将解包目录中的所有文件打成 ZIP(保持相对路径结构)
|
||
with zipfile.ZipFile(output_docx, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
|
||
for root, _, files in os.walk(unpacked_dir):
|
||
for fname in files:
|
||
abs_path = os.path.join(root, fname)
|
||
# docx 内部使用 / 作为路径分隔符
|
||
arcname = os.path.relpath(abs_path, unpacked_dir).replace(os.sep, '/')
|
||
zf.write(abs_path, arcname)
|
||
|
||
|
||
def build_image_index(unpacked_dir):
|
||
"""返回按文档顺序排列的图片列表"""
|
||
word_dir = os.path.join(unpacked_dir, 'word')
|
||
doc_xml = os.path.join(word_dir, 'document.xml')
|
||
rels_xml = os.path.join(word_dir, '_rels', 'document.xml.rels')
|
||
|
||
rels_root = etree.parse(rels_xml).getroot()
|
||
rid_to_media = {}
|
||
for rel in rels_root:
|
||
if rel.get('Type', '') == REL_TYPE_IMAGE:
|
||
rid_to_media[rel.get('Id')] = rel.get('Target')
|
||
|
||
doc_root = etree.parse(doc_xml).getroot()
|
||
results = []
|
||
for blip in doc_root.iter(f'{{{A}}}blip'):
|
||
rid = blip.get(f'{{{R}}}embed')
|
||
if not rid or rid not in rid_to_media:
|
||
continue
|
||
media_rel = rid_to_media[rid]
|
||
media_abs = os.path.join(word_dir, media_rel.replace('/', os.sep))
|
||
ext = os.path.splitext(media_rel)[1].lstrip('.').lower()
|
||
|
||
inline = blip
|
||
while inline is not None and inline.tag not in (f'{{{WD}}}inline', f'{{{WD}}}anchor'):
|
||
inline = inline.getparent()
|
||
w_cm = h_cm = None
|
||
docpr_name = ''
|
||
if inline is not None:
|
||
ext_el = inline.find(f'{{{WD}}}extent')
|
||
if ext_el is not None:
|
||
w_cm = round(int(ext_el.get('cx', 0)) / 360000, 2)
|
||
h_cm = round(int(ext_el.get('cy', 0)) / 360000, 2)
|
||
dp = inline.find(f'{{{WD}}}docPr')
|
||
if dp is not None:
|
||
docpr_name = dp.get('name', '')
|
||
|
||
results.append({
|
||
'index': len(results) + 1, 'rid': rid,
|
||
'media_file': media_rel, 'abs_path': media_abs,
|
||
'ext': ext, 'docpr_name': docpr_name,
|
||
'width_cm': w_cm, 'height_cm': h_cm,
|
||
})
|
||
return results
|
||
|
||
|
||
def list_images(docx_path):
|
||
imgs = get_images_info(docx_path)
|
||
if not imgs:
|
||
print("文档中没有找到图片。")
|
||
return
|
||
print(f"共找到 {len(imgs)} 张图片:\n")
|
||
print(f" {'#':<4} {'文件名':<20} {'尺寸':<18} Word内部名称")
|
||
print(" " + "-" * 62)
|
||
for img in imgs:
|
||
size = f"{img['width_cm']}×{img['height_cm']}cm" if img['width_cm'] else "未知"
|
||
print(f" {img['index']:<4} {os.path.basename(img['media_file']):<20} {size:<18} {img['docpr_name']}")
|
||
|
||
|
||
def get_images_info(docx_path):
|
||
"""
|
||
返回给定 DOCX 文件中所有图片的结构化信息列表。
|
||
|
||
该函数专门为其他模块(例如 MCP 服务器)复用而设计,
|
||
行为等价于原来的 list_images 内部逻辑,但不做任何打印。
|
||
"""
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
unpack(docx_path, tmpdir)
|
||
return build_image_index(tmpdir)
|
||
|
||
|
||
def _normalize_newlines(text):
|
||
if text is None:
|
||
return ''
|
||
return str(text).replace('\r\n', '\n').replace('\r', '\n')
|
||
|
||
|
||
def _is_text_node(el):
|
||
return el.tag == f'{{{W}}}t'
|
||
|
||
|
||
def _is_break_node(el):
|
||
return el.tag in (f'{{{W}}}br', f'{{{W}}}cr')
|
||
|
||
|
||
def _is_tab_node(el):
|
||
return el.tag == f'{{{W}}}tab'
|
||
|
||
|
||
def _iter_run_text_parts(run_el):
|
||
for child in run_el:
|
||
if _is_text_node(child):
|
||
yield child, _normalize_newlines(child.text or '')
|
||
elif _is_break_node(child):
|
||
yield child, '\n'
|
||
elif _is_tab_node(child):
|
||
yield child, '\t'
|
||
|
||
|
||
def _run_text(run_el):
|
||
return ''.join(part for _, part in _iter_run_text_parts(run_el))
|
||
|
||
|
||
def _paragraph_text(para_el):
|
||
return ''.join(_run_text(run) for run in para_el.iter(f'{{{W}}}r'))
|
||
|
||
|
||
def _clear_run_text_like_children(run_el):
|
||
for child in list(run_el):
|
||
if _is_text_node(child) or _is_break_node(child) or _is_tab_node(child):
|
||
run_el.remove(child)
|
||
|
||
|
||
def _append_text_to_run(run_el, text):
|
||
text = _normalize_newlines(text)
|
||
parts = text.split('\n')
|
||
|
||
if len(parts) == 1:
|
||
t_el = etree.SubElement(run_el, f'{{{W}}}t')
|
||
t_el.text = parts[0]
|
||
if parts[0] and (parts[0][0] == ' ' or parts[0][-1] == ' '):
|
||
t_el.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||
return
|
||
|
||
for idx, part in enumerate(parts):
|
||
if part:
|
||
t_el = etree.SubElement(run_el, f'{{{W}}}t')
|
||
t_el.text = part
|
||
if part[0] == ' ' or part[-1] == ' ':
|
||
t_el.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||
if idx < len(parts) - 1:
|
||
etree.SubElement(run_el, f'{{{W}}}br')
|
||
|
||
|
||
def _ensure_paragraph_run(para_el):
|
||
runs = list(para_el.findall(f'.//{{{W}}}r'))
|
||
if runs:
|
||
return runs[0]
|
||
|
||
ppr = para_el.find(f'{{{W}}}pPr')
|
||
new_r = etree.Element(f'{{{W}}}r')
|
||
if ppr is None:
|
||
para_el.insert(0, new_r)
|
||
else:
|
||
para_el.insert(para_el.index(ppr) + 1, new_r)
|
||
return new_r
|
||
|
||
|
||
def _set_paragraph_text(para_el, text):
|
||
runs = list(para_el.findall(f'.//{{{W}}}r'))
|
||
text_runs = [run for run in runs if any(True for _ in _iter_run_text_parts(run))]
|
||
|
||
if text_runs:
|
||
first_run = text_runs[0]
|
||
for run in text_runs:
|
||
_clear_run_text_like_children(run)
|
||
else:
|
||
first_run = _ensure_paragraph_run(para_el)
|
||
_clear_run_text_like_children(first_run)
|
||
|
||
_append_text_to_run(first_run, text)
|
||
|
||
|
||
def _paragraph_list(doc_el):
|
||
return list(doc_el.iter(f'{{{W}}}p'))
|
||
|
||
|
||
def _replace_paragraph_block(doc_el, old_text, new_text):
|
||
old_segments = _normalize_newlines(old_text).split('\n\n')
|
||
new_segments = _normalize_newlines(new_text).split('\n\n')
|
||
if len(old_segments) <= 1:
|
||
return False
|
||
|
||
paras = _paragraph_list(doc_el)
|
||
para_texts = [_paragraph_text(p) for p in paras]
|
||
|
||
match_start = None
|
||
for i in range(0, len(para_texts) - len(old_segments) + 1):
|
||
if para_texts[i:i + len(old_segments)] == old_segments:
|
||
match_start = i
|
||
break
|
||
|
||
if match_start is None:
|
||
return False
|
||
|
||
matched_paras = paras[match_start:match_start + len(old_segments)]
|
||
parent = matched_paras[0].getparent()
|
||
if parent is None:
|
||
return False
|
||
|
||
anchor_index = parent.index(matched_paras[-1])
|
||
|
||
shared_count = min(len(matched_paras), len(new_segments))
|
||
for idx in range(shared_count):
|
||
_set_paragraph_text(matched_paras[idx], new_segments[idx])
|
||
|
||
if len(new_segments) > len(matched_paras):
|
||
template_para = matched_paras[-1]
|
||
insert_at = anchor_index + 1
|
||
for seg in new_segments[len(matched_paras):]:
|
||
new_para = copy.deepcopy(template_para)
|
||
_set_paragraph_text(new_para, seg)
|
||
parent.insert(insert_at, new_para)
|
||
insert_at += 1
|
||
elif len(new_segments) < len(matched_paras):
|
||
for para in matched_paras[len(new_segments):]:
|
||
para_parent = para.getparent()
|
||
if para_parent is not None:
|
||
para_parent.remove(para)
|
||
|
||
return True
|
||
|
||
|
||
def paragraph_replace(para_el, replacements):
|
||
"""
|
||
在段落级别替换文本,支持跨 <w:t> 元素的匹配。
|
||
|
||
策略:
|
||
1. 收集段落中所有 <w:t> 元素及其文本
|
||
2. 拼接成完整文本进行替换
|
||
3. 如果有替换发生,重新分配文本到原有的 <w:t> 元素中
|
||
"""
|
||
# 收集所有 run 元素(<w:r>),保持顺序
|
||
runs = list(para_el.findall(f'.//{{{W}}}r'))
|
||
if not runs:
|
||
return
|
||
|
||
# 收集所有文本元素及其位置信息
|
||
text_runs = []
|
||
for run in runs:
|
||
if any(True for _ in _iter_run_text_parts(run)):
|
||
text_runs.append(run)
|
||
|
||
if not text_runs:
|
||
return
|
||
|
||
# 拼接完整文本
|
||
full_text = _paragraph_text(para_el)
|
||
original_text = full_text
|
||
|
||
normalized_replacements = []
|
||
for old, new in replacements:
|
||
normalized_replacements.append((
|
||
_normalize_newlines(old),
|
||
_normalize_newlines(new),
|
||
))
|
||
|
||
# 执行所有替换
|
||
for old, new in normalized_replacements:
|
||
if old in full_text:
|
||
full_text = full_text.replace(old, new)
|
||
|
||
# 如果没有变化,直接返回
|
||
if full_text == original_text:
|
||
return
|
||
|
||
print(f"段落替换: {len(original_text)} -> {len(full_text)} 字符")
|
||
|
||
# 将规范化文本重新写回第一个文本 run,\n 会回写成 Word 的换行节点。
|
||
first_run = text_runs[0]
|
||
for run in text_runs:
|
||
_clear_run_text_like_children(run)
|
||
_append_text_to_run(first_run, full_text)
|
||
|
||
|
||
def ensure_rpr(run_el):
|
||
rpr = run_el.find(f'{{{W}}}rPr')
|
||
if rpr is None:
|
||
rpr = etree.Element(f'{{{W}}}rPr')
|
||
run_el.insert(0, rpr)
|
||
return rpr
|
||
|
||
def set_color_on_rpr(rpr_el, hex_color):
|
||
c = rpr_el.find(f'{{{W}}}color')
|
||
if c is None:
|
||
c = etree.SubElement(rpr_el, f'{{{W}}}color')
|
||
c.set(f'{{{W}}}val', hex_color.lstrip('#'))
|
||
|
||
def apply_color_to_keyword(doc_el, keyword, hex_color, context_text=None):
|
||
"""
|
||
只给匹配到的关键字本身着色,而不是整个 run。
|
||
|
||
做法:在有关键字的 run 上,把文本拆成多段 run:
|
||
[前缀][关键字][后缀],只有“关键字”这个 run 设置颜色。
|
||
|
||
当 context_text 不为空时,只在“整段文本包含该 context_text 的段落”中进行上色,
|
||
避免同一个关键字在其他段落里被误伤(例如单独的数字 0)。
|
||
"""
|
||
keyword = _normalize_newlines(keyword)
|
||
context_text = _normalize_newlines(context_text) if context_text is not None else None
|
||
|
||
# 如果提供了上下文,只在包含该上下文的段落内着色
|
||
allowed_paras = None
|
||
if context_text:
|
||
allowed_paras = set()
|
||
for p in doc_el.iter(f'{{{W}}}p'):
|
||
full = _paragraph_text(p)
|
||
if context_text in full:
|
||
allowed_paras.add(p)
|
||
|
||
def _find_ancestor_para(el):
|
||
cur = el
|
||
while cur is not None and cur.tag != f'{{{W}}}p':
|
||
cur = cur.getparent()
|
||
return cur
|
||
|
||
# 先 list 一下,避免在遍历时修改树结构导致问题
|
||
runs = list(doc_el.iter(f'{{{W}}}r'))
|
||
for run in runs:
|
||
if allowed_paras is not None:
|
||
para = _find_ancestor_para(run)
|
||
if para not in allowed_paras:
|
||
continue
|
||
full_text = _run_text(run)
|
||
if not full_text:
|
||
continue
|
||
if keyword not in full_text:
|
||
continue
|
||
|
||
parent = run.getparent()
|
||
if parent is None:
|
||
continue
|
||
insert_pos = parent.index(run)
|
||
|
||
# 原 run 的 rPr 复制给新 run
|
||
orig_rpr = run.find(f'{{{W}}}rPr')
|
||
if orig_rpr is not None:
|
||
rpr_bytes = etree.tostring(orig_rpr)
|
||
else:
|
||
rpr_bytes = None
|
||
|
||
def make_run(text, colored):
|
||
new_r = etree.Element(f'{{{W}}}r')
|
||
if rpr_bytes is not None:
|
||
new_r.append(etree.fromstring(rpr_bytes))
|
||
_append_text_to_run(new_r, text)
|
||
if colored:
|
||
set_color_on_rpr(ensure_rpr(new_r), hex_color)
|
||
return new_r
|
||
|
||
segments = []
|
||
s = full_text
|
||
start = 0
|
||
klen = len(keyword)
|
||
while True:
|
||
idx = s.find(keyword, start)
|
||
if idx == -1:
|
||
if start < len(s):
|
||
segments.append((s[start:], False))
|
||
break
|
||
if idx > start:
|
||
segments.append((s[start:idx], False))
|
||
segments.append((keyword, True))
|
||
start = idx + klen
|
||
|
||
# 用新 run 替换原 run
|
||
parent.remove(run)
|
||
for offset, (seg_text, colored) in enumerate(segments):
|
||
if seg_text:
|
||
parent.insert(insert_pos + offset, make_run(seg_text, colored))
|
||
|
||
|
||
def remove_rule_blocks(doc_el):
|
||
"""
|
||
删除文档中位于 <global_rule>...</global_rule>、<rule>...</rule> 和 <chart_rule>...</chart_rule> 之间的所有段落。
|
||
|
||
说明:
|
||
- 标签内容可能跨段落,这里按段落顺序遍历,记录是否处于 rule 块内。
|
||
- 一旦进入某个块(遇到起始标签),直到遇到对应的结束标签为止,整段段落都会被删除。
|
||
- 假设标签本身和其中内容都不需要出现在最终文档里。
|
||
"""
|
||
inside_global = False
|
||
inside_rule = False
|
||
inside_chart = False
|
||
paras_to_delete = []
|
||
|
||
# list(...) 防止在遍历时修改树结构
|
||
for p in list(doc_el.iter(f'{{{W}}}p')):
|
||
t_nodes = list(p.iter(f'{{{W}}}t'))
|
||
full = ''.join(t.text or '' for t in t_nodes)
|
||
|
||
if not full:
|
||
# 空段落如果在块内,也删掉
|
||
if inside_global or inside_rule or inside_chart:
|
||
paras_to_delete.append(p)
|
||
continue
|
||
|
||
# 当前是否在某个块内
|
||
if inside_global or inside_rule or inside_chart:
|
||
paras_to_delete.append(p)
|
||
|
||
# 检测 global_rule 块
|
||
if '<global_rule>' in full:
|
||
inside_global = True
|
||
if p not in paras_to_delete:
|
||
paras_to_delete.append(p)
|
||
if '</global_rule>' in full:
|
||
inside_global = False
|
||
|
||
# 检测 rule 块
|
||
if '<rule>' in full:
|
||
inside_rule = True
|
||
if p not in paras_to_delete:
|
||
paras_to_delete.append(p)
|
||
if '</rule>' in full:
|
||
inside_rule = False
|
||
|
||
# 检测 chart_rule 块
|
||
if '<chart_rule>' in full:
|
||
inside_chart = True
|
||
if p not in paras_to_delete:
|
||
paras_to_delete.append(p)
|
||
if '</chart_rule>' in full:
|
||
inside_chart = False
|
||
|
||
for p in paras_to_delete:
|
||
parent = p.getparent()
|
||
if parent is not None:
|
||
parent.remove(p)
|
||
|
||
def process(input_docx, output_docx, replacements, color_keywords):
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
print(f"📂 解包 {input_docx} ...")
|
||
unpack(input_docx, tmpdir)
|
||
|
||
doc_xml_path = os.path.join(tmpdir, 'word', 'document.xml')
|
||
|
||
tree = etree.parse(doc_xml_path)
|
||
root = tree.getroot()
|
||
|
||
# 先整体删除全局规则和普通规则块(支持标签跨段落)
|
||
remove_rule_blocks(root)
|
||
|
||
if replacements:
|
||
print(f"✏️ 替换 {len(replacements)} 条文本...")
|
||
remaining_replacements = []
|
||
for old, new in replacements:
|
||
if '\n\n' in _normalize_newlines(old):
|
||
replaced = _replace_paragraph_block(root, old, new)
|
||
if replaced:
|
||
print("🧩 跨段替换命中")
|
||
continue
|
||
remaining_replacements.append((old, new))
|
||
if remaining_replacements:
|
||
for para in root.iter(f'{{{W}}}p'):
|
||
paragraph_replace(para, remaining_replacements)
|
||
|
||
# 根据 span 解析出的关键字上色
|
||
for item in color_keywords:
|
||
# 兼容旧格式: (keyword, color)
|
||
if len(item) == 2:
|
||
keyword, color = item
|
||
context_text = None
|
||
else:
|
||
keyword, color, context_text = item
|
||
print(f"🎨 关键词「{keyword}」→ #{color}")
|
||
apply_color_to_keyword(root, keyword, color, context_text)
|
||
|
||
tree.write(doc_xml_path, xml_declaration=True, encoding='UTF-8', standalone=True)
|
||
print(f"📦 打包 → {output_docx} ...")
|
||
pack(tmpdir, output_docx, input_docx)
|
||
print(f"✅ 完成!输出: {output_docx}")
|
||
|
||
|
||
def _parse_span_replacement(new_text):
|
||
"""
|
||
解析 NEW 文本中的 span 标签,用于决定颜色。
|
||
|
||
约定格式(不区分大小写):
|
||
<span color="red">待补充</span>
|
||
|
||
返回: (纯文本, [(keyword, hex_color), ...])
|
||
"""
|
||
import re
|
||
|
||
new_text = _normalize_newlines(new_text)
|
||
|
||
# 简单的命名颜色到 16 进制的映射,可按需扩展
|
||
named_colors = {
|
||
'red': 'FF0000',
|
||
'blue': '0000FF',
|
||
'green': '00FF00',
|
||
'yellow': 'FFFF00',
|
||
'black': '000000',
|
||
'white': 'FFFFFF',
|
||
'gray': '808080',
|
||
'grey': '808080',
|
||
}
|
||
|
||
def _normalize_color(raw_color: str) -> str:
|
||
"""
|
||
支持:
|
||
- FFFFFF / ffffff
|
||
- #FFFFFF / #ffffff
|
||
- red / blue 等命名颜色(见 named_colors)
|
||
返回不带 # 的大写 16 进制字符串;如果无法识别命名颜色则原样返回(去掉 #)。
|
||
"""
|
||
c = (raw_color or '').strip()
|
||
if not c:
|
||
return ''
|
||
|
||
# 去掉前导 #
|
||
if c.startswith('#'):
|
||
c = c[1:]
|
||
|
||
# 纯 16 进制
|
||
if re.fullmatch(r'[0-9a-fA-F]{6}', c):
|
||
return c.upper()
|
||
|
||
# 命名颜色
|
||
mapped = named_colors.get(c.lower())
|
||
if mapped:
|
||
return mapped
|
||
|
||
# 兜底:返回去掉 # 的原值
|
||
return c.upper()
|
||
|
||
# color 属性允许:
|
||
# - 6 位 16 进制(可带 #)
|
||
# - 命名颜色(red / blue ...)
|
||
span_pattern = re.compile(
|
||
r'<span\s+[^>]*?color=["\']?([^"\'\s>]+)["\']?[^>]*>(.*?)</span>',
|
||
re.IGNORECASE | re.DOTALL,
|
||
)
|
||
|
||
# 先按段落边界拆分,这样 span 上色时可以使用所在段落作为上下文。
|
||
def _strip_repl(m):
|
||
return m.group(2)
|
||
|
||
plain_segments = []
|
||
color_keywords = []
|
||
for segment in new_text.split('\n\n'):
|
||
plain_segment = span_pattern.sub(_strip_repl, segment)
|
||
plain_segments.append(plain_segment)
|
||
for m in span_pattern.finditer(segment):
|
||
raw_color = m.group(1)
|
||
hex_color = _normalize_color(raw_color)
|
||
keyword = m.group(2)
|
||
# 三元组: (关键字, 颜色, 所在段落的纯文本上下文)
|
||
color_keywords.append((keyword, hex_color, plain_segment))
|
||
|
||
plain_text = '\n\n'.join(plain_segments)
|
||
return plain_text, color_keywords
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description='DOCX 格式保留:替换文本/颜色')
|
||
parser.add_argument('input', help='输入 .docx')
|
||
parser.add_argument('output', nargs='?', help='输出 .docx')
|
||
parser.add_argument('--list-images', action='store_true', help='列出所有图片')
|
||
parser.add_argument('--replace', nargs=2, metavar=('OLD', 'NEW'),
|
||
action='append', default=[])
|
||
args = parser.parse_args()
|
||
|
||
if args.list_images:
|
||
list_images(args.input)
|
||
return
|
||
if not args.output:
|
||
parser.error("需要指定输出文件")
|
||
|
||
# 处理 span 颜色:把 NEW 中的 <span color="...">文字</span> 抽出来
|
||
replacements = []
|
||
color_keywords = []
|
||
for old, new_raw in args.replace:
|
||
new_plain, spans = _parse_span_replacement(new_raw)
|
||
replacements.append((old, new_plain))
|
||
color_keywords.extend(spans)
|
||
|
||
process(
|
||
input_docx=args.input,
|
||
output_docx=args.output,
|
||
replacements=replacements,
|
||
color_keywords=color_keywords,
|
||
)
|
||
|
||
if __name__ == '__main__':
|
||
main()
|