From f5e639f59ca0ea39d1bb7b77e9528c8f46538985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn-Michael=20Miehe?= Date: Sun, 11 May 2025 20:06:35 +0000 Subject: [PATCH] create redacted pdf version --- Makefile | 16 +++++++++++----- redact.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 redact.py diff --git a/Makefile b/Makefile index f476d60..359cdeb 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ pdfFilesInt := $(call suffixSubst,$(mdFiles),md,pdf,$(suffixInt)) texFilesInt := $(call suffixSubst,$(mdFiles),md,tex,$(suffixInt)) .PHONY: all pdf pdfPub pdfInt tex texPub texInt -all: pdfPub +all: pdf pdf: pdfPub pdfInt pdfPub: $(pdfFilesPub) @@ -47,8 +47,14 @@ compile = pandoc \ --variable lang:de-DE \ --output $(2) -%.pdf: %.md lenaisten-defs.sty - $(call compile,$<,$@) +%$(suffixPub).pdf: %.md lenaisten-defs.sty + python3 ./redact.py $< | $(call compile,-,$@) -%.tex: %.md lenaisten-defs.sty - $(call compile,$<,$@) +%$(suffixPub).tex: %.md lenaisten-defs.sty + python3 ./redact.py $< | $(call compile,-,$@) + +%$(suffixInt).pdf: %.md lenaisten-defs.sty + python3 ./redact.py -i $< | $(call compile,-,$@) + +%$(suffixInt).tex: %.md lenaisten-defs.sty + python3 ./redact.py -i $< | $(call compile,-,$@) diff --git a/redact.py b/redact.py new file mode 100644 index 0000000..4489be5 --- /dev/null +++ b/redact.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +# parse CLI arguments +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("file", help="File to open") +parser.add_argument("-i", "--internal", help="Output internal version", action="store_true") + +args = parser.parse_args() + +# read file +from pathlib import Path +txt = Path(args.file).read_text() + +# redaction syntax +import re +inlinepat = re.compile(r"\(\(\((([^>]*?):::)?(.*?)\)\)\)", re.MULTILINE|re.DOTALL) +tokenpat = re.compile(r"\[redact(:::(.*?))?\](.*?)\[\/redact\]", re.MULTILINE|re.DOTALL) + +# match handling +def subredact(match): + _, reason, content = match.groups() + + if reason: + reason = reason.strip() + else: + reason = "" + + if args.internal: + retval = content.lstrip(" ").rstrip() + # retval = re.sub(r"(\s*[+\-\*]\s+)?(.*)", r"\1*\2*", retval, re.MULTILINE) + + retval = f"*{retval}*" + retval = re.sub(r"\n(\s*[+\-\*]\s+)?", r"*\n\1*", retval, re.MULTILINE) + + else: + retval = reason + " [intern]" + + # DEBUG output + # print("GROUPS", match.groups()) + # print("REASON", reason.strip()) + # print("CONTENT", content.strip()) + # print("RET", retval) + # print("~~~~~~") + + return retval + +# matching +txt = tokenpat.sub(subredact, txt) +txt = inlinepat.sub(subredact, txt) + +# pipe to stdout +print(txt)