必须软件
- libreoffice和ImageMagick
- 安装方法问豆包。
代码
- 使用的时候注意配置项:输入要是aa.docx
- 输出为bb.docx
- 临时文件夹为xbb
- 为了不对公式进行处理,设定了转换阈值2.5k
- 最终会有个别图显示不正常,可以在xbb/word/media中找到替换掉。
- 纯粹娱乐,有需要的拿走。
import os
import shutil
import subprocess
import zipfile
from pathlib import Path
# ===================== 配置项 =====================
INPUT_DOCX = "aa.DOCX"
OUTPUT_DOCX = "bb.docx"
TEMP_FOLDER = "xbb"
LIBREOFFICE = "/Applications/LibreOffice.app/Contents/MacOS/soffice"
MIN_SIZE_KB = 2.5 # 大于1KB才转换
# ==================================================
def main():
print("=== macOS DOCX WMF/EMF→PNG 上下边距=0 全自动 ===")
print(f"⚙️ 仅处理大于 {MIN_SIZE_KB}KB 图片")
print("⚙️ 图片:上下无边距(0),左右无边距")
print("⚙️ 未转换图片 XML 不修改\n")
if os.path.exists(TEMP_FOLDER):
shutil.rmtree(TEMP_FOLDER)
os.makedirs(TEMP_FOLDER)
print("🔓 解压 DOCX...")
try:
with zipfile.ZipFile(INPUT_DOCX, 'r') as zf:
zf.extractall(TEMP_FOLDER)
except Exception as e:
print(f"❌ 解压失败:{e}")
return
media_dir = Path(TEMP_FOLDER) / "word" / "media"
if not media_dir.exists():
print("✅ 无图片,直接打包")
with zipfile.ZipFile(OUTPUT_DOCX, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, _, files in os.walk(TEMP_FOLDER):
for f in files:
zf.write(os.path.join(root, f), os.path.relpath(os.path.join(root, f), TEMP_FOLDER))
return
image_files = []
for ext in ["*.wmf", "*.emf", "*.WMF", "*.EMF"]:
image_files.extend(media_dir.glob(ext))
total = len(image_files)
print(f"🖼️ 找到 {total} 张矢量图\n")
converted_names = []
min_size_bytes = MIN_SIZE_KB * 1024
for idx, img_path in enumerate(image_files, 1):
size = os.path.getsize(img_path)
kb = size / 1024
base = img_path.stem
if size <= min_size_bytes:
print(f"[{idx}/{total}] ⏭️ 跳过:{img_path.name} ({kb:.1f}KB)")
continue
print(f"[{idx}/{total}] 处理:{img_path.name} ({kb:.1f}KB)")
# 矢量转PNG
subprocess.run([
LIBREOFFICE, "--headless",
"--convert-to", "png",
"--outdir", str(media_dir),
str(img_path)
], capture_output=True)
png_path = img_path.with_suffix(".png")
if not png_path.exists():
print(" ❌ 转换失败")
continue
os.remove(img_path)
# ===================== 核心:只裁剪空白,上下边距=0,不加任何边距 =====================
subprocess.run([
"magick",
str(png_path),
"-trim", # 自动裁剪空白
"+repage",
"-quality", "100",
str(png_path)
], capture_output=True)
converted_names.append(base)
print(" ✅ 上下边距 0 处理完成")
# 只修改已转换图片的XML
print("\n🔧 仅更新已转换图片XML引用...")
for ext in [".xml", ".rels"]:
for f in Path(TEMP_FOLDER).rglob(f"*{ext}"):
try:
content = f.read_text("utf-8")
for base in converted_names:
content = content.replace(f"{base}.wmf", f"{base}.png")
content = content.replace(f"{base}.emf", f"{base}.png")
content = content.replace(f"{base}.WMF", f"{base}.png")
content = content.replace(f"{base}.EMF", f"{base}.png")
f.write_text(content, "utf-8")
except Exception:
continue
print("\n📦 打包生成新文档...")
with zipfile.ZipFile(OUTPUT_DOCX, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, _, files in os.walk(TEMP_FOLDER):
for file in files:
full = os.path.join(root, file)
arc = os.path.relpath(full, TEMP_FOLDER)
zf.write(full, arc)
print("\n🎉 处理完成!")
print(f"✅ 已转换:{len(converted_names)} 张")
print(f"⏭️ 保留原图:{total - len(converted_names)} 张")
print(f"📄 输出文件:{OUTPUT_DOCX}")
if __name__ == "__main__":
main()