From 332e62b764e514c537e506bca598c20b6232e48d Mon Sep 17 00:00:00 2001 From: Frank Conrads Date: Thu, 9 Apr 2026 09:40:19 +0200 Subject: [PATCH] =?UTF-8?q?Funktionalit=C3=A4t=20min=5Fsavings=20der=20cae?= =?UTF-8?q?siumclt=201.3.0=20implementiert=20(default:=202%),=20log-Datei?= =?UTF-8?q?=20um=20"Bild=20in=20Folien=20Nr."=20erg=C3=A4nzt.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pptx_image_compress.py | 80 +++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/pptx_image_compress.py b/pptx_image_compress.py index 044e67a..da20fdd 100644 --- a/pptx_image_compress.py +++ b/pptx_image_compress.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup) -Version: 1.1.4 +Version: 1.1.6 Highlights: @@ -12,8 +12,8 @@ Highlights: - Log: image_name,size_before,size_after,saving,saving_percent - Summary inkl. Zeit benötigt -Änderungen in 1.1.4: -- Libcaesium 1.1.0 kann nun auch gif verkleinern +Änderungen in 1.1.6: +- Libcaesium 1.3.0 kann nun auch files ignorieren, wenn die Kompression kleiner als ist """ import argparse @@ -35,11 +35,12 @@ from threading import Lock from typing import List, Optional -__version__ = "1.1.4" +__version__ = "1.1.6" ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} PROGRESS_BAR_LEN = 40 TEMP_PREFIX = "pptx_compress_" +DEFAULT_MIN_SAVINGS = "2%" # -------------------- Utilities -------------------- @@ -89,7 +90,7 @@ def zip_dir_to_pptx(src_dir: Path, out_pptx: Path): def which(cmd: str): return shutil.which(cmd) -def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None, quality: int) -> Path | None: +def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str) -> Path | None: exe = which("caesiumclt") if not exe: raise RuntimeError("[ERROR] 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen.") @@ -97,7 +98,7 @@ def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | ext = original.suffix.lower() if ext not in {".jpg", ".jpeg", ".png", ".webp", ".gif"}: return None - cmd = [exe, "-q", str(quality), "-O", "bigger", "-o", str(out_dir)] + cmd = [exe, "-q", str(quality), "-O", "bigger", "--min-savings", min_savings, "-o", str(out_dir)] if caesium_threads is not None: cmd += ["--threads", str(caesium_threads)] cmd += [str(original)] @@ -121,40 +122,45 @@ def format_duration(seconds: float) -> str: return f"{hms}.{frac[:2]}" return base -def get_slide_numbers_for_image(rels_dir: str, image_filename: str) -> Optional[List[int]]: - """ - Durchsucht alle .rels-Dateien im angegebenen Verzeichnis und gibt die Slide-Nummern zurück, - in denen die angegebene Bilddatei referenziert wird. +def build_image_slide_index(rels_dir: Path) -> dict[str, List[int]]: + if not rels_dir.exists() or not rels_dir.is_dir(): + return {} - :param rels_dir: Pfad zum Verzeichnis ppt/slides/_rels - :param image_filename: z. B. 'image80.png' - :return: Liste von Slide-Nummern oder None - """ - slide_numbers = [] + image_to_slides: dict[str, set[int]] = {} - for rels_file in os.listdir(rels_dir): - if rels_file.startswith("slide") and rels_file.endswith(".xml.rels"): - rels_path = os.path.join(rels_dir, rels_file) + for rels_path in rels_dir.iterdir(): + rels_file = rels_path.name + if rels_file.startswith("slide") and rels_file.endswith(".xml.rels") and rels_path.is_file(): + match = re.search(r"slide(\d+)\.xml\.rels$", rels_file) + if not match: + continue + slide_number = int(match.group(1)) try: tree = ET.parse(rels_path) root = tree.getroot() for rel in root.findall(".//{http://schemas.openxmlformats.org/package/2006/relationships}Relationship"): target = rel.attrib.get("Target", "") - if image_filename in target: - match = re.search(r"slide(\d+).xml.rels", rels_file) - if match: - slide_number = int(match.group(1)) - slide_numbers.append(slide_number) - except ET.ParseError: - print(f"Fehler beim Parsen von {rels_file}") + image_name = Path(target).name + if image_name: + if image_name not in image_to_slides: + image_to_slides[image_name] = set() + image_to_slides[image_name].add(slide_number) + except (ET.ParseError, OSError): + print(f"Fehler beim Lesen von {rels_file}") - return slide_numbers if slide_numbers else None + return {img: sorted(slides) for img, slides in image_to_slides.items()} + + +def get_slide_numbers_for_image(rels_dir: Path, image_filename: str) -> Optional[List[int]]: + image_to_slides = build_image_slide_index(rels_dir) + slides = image_to_slides.get(image_filename) + return slides if slides else None # -------------------- Core per-deck processing -------------------- -def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quality: int) -> dict: +def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quality: int, min_savings: str) -> dict: start_time = time.perf_counter() result = { "input": str(input_pptx), @@ -188,6 +194,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali z.extractall(work_dir) slides_dir = work_dir / "ppt" / "slides" + rels_dir = slides_dir / "_rels" media_dir = work_dir / "ppt" / "media" images = [] @@ -207,23 +214,23 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali caesium_threads = 1 if threads and threads > 1 else None lock = Lock() done_count = 0 + image_to_slides = build_image_slide_index(rels_dir) def worker(idx: int, img_path: Path): nonlocal done_count - ext = img_path.suffix.lower() orig_size = img_path.stat().st_size chosen_size = orig_size - found_in_slide=None - slide_nr="" + found_in_slide = None + slide_nr = "" try: - found_in_slide = get_slide_numbers_for_image(slides_dir.name, img_path.name) + found_in_slide = image_to_slides.get(img_path.name) if found_in_slide is None: slide_nr = "NOT_USED" else: slide_nr = str(found_in_slide) out_sub = scratch_dir / f"img_{idx:06d}" - caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads, quality) + caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads, quality, min_savings) if caesium_out and caesium_out.exists(): s = caesium_out.stat().st_size if s < orig_size: @@ -359,15 +366,16 @@ def main(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument('-i','--input', nargs='*', help='Input-PPTX (eine oder mehrere, Wildcards erlaubt). Bei mehreren: -O erforderlich.') + parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)') parser.add_argument('-o','--output', help='Output-PPTX (nur Single-Mode)') parser.add_argument('-O','--output-dir', help='Output-Verzeichnis (erforderlich für Batch)') - parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)') parser.add_argument('--pattern', default='*.pptx', help='Dateimuster für --input-dir') parser.add_argument('--recursive', action='store_true', help='Rekursiv in --input-dir suchen') #parser.add_argument('-t','--threads', type=int, default=min(32, os.cpu_count() or 4), help='Anzahl paralleler Threads pro Datei') parser.add_argument('-t','--threads', type=int, default=16, help='Anzahl paralleler Threads pro Datei') parser.add_argument('-q','--quality', type=int, default=90, help='Qualität für caesiumclt (0..100), höher = bessere Qualität / größere Datei') - parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}') + parser.add_argument('--min-savings', default=DEFAULT_MIN_SAVINGS, help="Mindestersparnis für caesiumclt (z. B. 2%%, 100KB, 1MB oder Bytes als Zahl)") + parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help="Zeigt die Versionsnummer an" ) args = parser.parse_args() @@ -412,7 +420,7 @@ def main(): failures += 1 continue dst = out_dir / f"{src.stem}_compressed.pptx" - res = process_single_deck(src, dst, args.threads, args.quality) + res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings) if res['ok']: successes += 1 overall_before += res['size_before'] @@ -427,7 +435,7 @@ def main(): dst = Path(args.output_dir) / f"{src.stem}_compressed.pptx" else: dst = Path(args.output).resolve() if args.output else src.with_name(f"{src.stem}_compressed.pptx") - res = process_single_deck(src, dst, args.threads, args.quality) + res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings) if res['ok']: successes += 1 overall_before += res['size_before']