64 lines
2 KiB
Python
64 lines
2 KiB
Python
#!/usr/bin/env python3
|
|
# make_text_invisible.py
|
|
# Usage: python make_text_invisible.py text.pdf out.pdf
|
|
# Produces a single-page PDF identical visually but with text made invisible (selectable/searchable).
|
|
# Requires: pip install pymupdf
|
|
|
|
import sys
|
|
import fitz # pymupdf
|
|
import os
|
|
|
|
if len(sys.argv) != 4:
|
|
print("Usage: python make_text_invisible.py text.pdf sig.png out.pdf", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
in_path, sig_path, out_path = sys.argv[1], sys.argv[2], sys.argv[3]
|
|
|
|
doc = fitz.open(in_path)
|
|
if doc.page_count < 1:
|
|
print("Input has no pages", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# operate only on first page (single-page input assumed)
|
|
page = doc.load_page(0)
|
|
w, h = page.rect.width, page.rect.height
|
|
|
|
# create new doc and a blank page with same size
|
|
out = fitz.open()
|
|
newp = out.new_page(width=w, height=h)
|
|
|
|
# # copy original page's visible content as an image to preserve exact appearance
|
|
# pix = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
|
|
# img_bytes = pix.tobytes("png")
|
|
# newp.insert_image(fitz.Rect(0, 0, w, h), stream=img_bytes)
|
|
|
|
newp.insert_image(newp.rect, filename=sig_path, keep_proportion=False)
|
|
|
|
# extract text spans (positions, fonts, sizes)
|
|
td = page.get_text("dict")
|
|
|
|
for block in td.get("blocks", []):
|
|
for line in block.get("lines", []):
|
|
for span in line.get("spans", []):
|
|
|
|
txt = span.get("text", "")
|
|
if not txt:
|
|
continue
|
|
|
|
origin = span.get("origin")
|
|
if origin:
|
|
x, y = origin[0], origin[1]
|
|
else:
|
|
bbox = span.get("bbox", [0,0,0,0])
|
|
x, y = bbox[0], bbox[3]
|
|
|
|
# Use PDF invisible text rendering: render_mode=3 (neither fill nor stroke) is invisible but selectable.
|
|
try:
|
|
size = span.get("size", 12)
|
|
newp.insert_text((x, y), txt, fontsize=size, color=(0, 0, 0, 0), render_mode=3)
|
|
except Exception:
|
|
print(f"Failed to insert text: '{txt}'")
|
|
|
|
out.save(out_path, garbage=4, deflate=True)
|
|
out.close()
|
|
doc.close()
|