From 38390362e65d1d15ac65c5044276512e328e7805 Mon Sep 17 00:00:00 2001 From: "Conrads, Frank-MGB" Date: Mon, 8 Sep 2025 16:39:09 +0200 Subject: [PATCH] Version 1.0.0 --- pptx_image_compress.py | 333 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 pptx_image_compress.py diff --git a/pptx_image_compress.py b/pptx_image_compress.py new file mode 100644 index 0000000..e23ad1d --- /dev/null +++ b/pptx_image_compress.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup) +Version: 1.0.0 + +Highlights: +- Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX +- Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird +- Overwrite Policy: -O bigger +- Log: image_name,size_before,size_after,saving,saving_percent +- Summary inkl. Zeit benötigt + +Benutzung: + python pptx_image_compress.py -i input.pptx [-o output.pptx] [-t THREADS] [--version] +""" + +import argparse +import os +import sys +import zipfile +import tempfile +import shutil +import subprocess +import time +from pathlib import Path +from datetime import timedelta +from concurrent.futures import ThreadPoolExecutor, as_completed +from threading import Lock + +# -------------------- Version -------------------- +__version__ = "1.0.0" + +# -------------------- Konfiguration -------------------- +ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} # GIF wird übersprungen +CAESIUM_QUALITY = 90 # -q 90 +PROGRESS_BAR_LEN = 40 +TEMP_PREFIX = "pptx_compress_" + + +# -------------------- Utilities -------------------- +def human_mb(nbytes: int) -> float: + return round(nbytes / (1024 * 1024), 2) + + +def ensure_clean_file(path: Path): + if path.exists(): + try: + if path.is_file(): + path.unlink() + else: + shutil.rmtree(path, ignore_errors=True) + except Exception: + pass + + +def cleanup_old_temps(): + tmp_root = Path(tempfile.gettempdir()) + for p in tmp_root.glob(f"{TEMP_PREFIX}*"): + try: + if p.is_dir(): + shutil.rmtree(p, ignore_errors=True) + else: + p.unlink(missing_ok=True) + except Exception: + pass + + +def print_progress(i: int, total: int): + if total <= 0: + return + done = int(PROGRESS_BAR_LEN * i / total) + bar = "█" * done + "-" * (PROGRESS_BAR_LEN - done) + pct = int(i * 100 / total) + print(f"\rBilder: |{bar}| {i}/{total} ({pct}%)", end="", flush=True) + + +def zip_dir_to_pptx(src_dir: Path, out_pptx: Path): + with zipfile.ZipFile(out_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z: + for root, _, files in os.walk(src_dir): + for f in files: + full = Path(root) / f + rel = full.relative_to(src_dir) + z.write(full, arcname=str(rel)) + + +def which(cmd: str) -> str | None: + return shutil.which(cmd) + + +def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None) -> Path | None: + """ + Ruft caesiumclt auf, um eine komprimierte Version zu erzeugen. + Output wird ins out_dir geschrieben (gleicher Filename). + Gibt Pfad zur erzeugten Datei zurück oder None bei Fehler. + """ + exe = which("caesiumclt") + if not exe: + raise RuntimeError( + "❌ 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen." + ) + + out_dir.mkdir(parents=True, exist_ok=True) + + # Nur Formate an Caesium geben, die es unterstützt: JPG/JPEG, PNG, WEBP + ext = original.suffix.lower() + if ext not in {".jpg", ".jpeg", ".png", ".webp"}: + return None # GIF & andere werden übersprungen + + cmd = [ + exe, + "-q", str(CAESIUM_QUALITY), + "-O", "bigger", # <<< nur überschreiben, wenn Ziel größer ist + "-o", str(out_dir), + ] + if caesium_threads is not None: + cmd += ["--threads", str(caesium_threads)] + cmd += [str(original)] + + try: + r = subprocess.run(cmd, capture_output=True, text=True) + if r.returncode != 0: + sys.stderr.write(f"\n[caesiumclt] Fehler bei {original.name}:\n{r.stderr}\n") + return None + + out_file = out_dir / original.name + return out_file if out_file.exists() else None + except Exception as ex: + sys.stderr.write(f"\n[caesiumclt] Ausnahme bei {original.name}: {ex}\n") + return None + + +def format_duration(seconds: float) -> str: + total_ms = int(round(seconds * 1000)) + td = timedelta(milliseconds=total_ms) + base = str(td) + if "." in base: + hms, frac = base.split(".", 1) + return f"{hms}.{frac[:2]}" + return base + + +def main(): + start_time = time.perf_counter() + + parser = argparse.ArgumentParser( + description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup)", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-i", "--input", help="Input-PPTX", required=False) + parser.add_argument("-o", "--output", help="Output-PPTX", required=False) + parser.add_argument( + "-t", "--threads", + type=int, + default=min(32, os.cpu_count() or 4), + help="Anzahl paralleler Threads für die Bildverarbeitung" + ) + parser.add_argument( + "--version", + action="version", + version=f"%(prog)s {__version__}" + ) + + args = parser.parse_args() + + if not args.input: + parser.print_help() + sys.exit(1) + + input_pptx = Path(args.input).resolve() + if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx": + print("❌ Eingabedatei existiert nicht oder ist keine .pptx") + sys.exit(2) + + if args.output: + output_pptx = Path(args.output).resolve() + else: + output_pptx = input_pptx.with_name(f"{input_pptx.stem}_compressed.pptx") + + # Vorherige Temp-Files & existierendes Output löschen + cleanup_old_temps() + ensure_clean_file(output_pptx) + + # --- Zwei getrennte Temp-Verzeichnisse --- + work_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "work_")) # entpackte PPTX + scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_")) # Caesium-Ausgaben (außerhalb!) + + # Logdatei neben Output + log_file = output_pptx.with_suffix(".log") + ensure_clean_file(log_file) + log_lines = ["image_name,size_before,size_after,saving,saving_percent\n"] + + size_before = input_pptx.stat().st_size + + try: + # Entpacken + with zipfile.ZipFile(input_pptx, "r") as z: + z.extractall(work_dir) + + media_dir = work_dir / "ppt" / "media" + images = [] + if media_dir.exists(): + for f in sorted(media_dir.iterdir()): + if f.is_file() and f.suffix.lower() in ALLOWED_EXT: + images.append(f) + + total = len(images) + print(f"🔧 Finde Bilder in {media_dir} ... {total} Kandidaten") + print_progress(0, total) + + # Vorab prüfen, ob caesiumclt verfügbar ist + if not which("caesiumclt"): + print("\n❌ 'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") + sys.exit(3) + + # Oversubscription vermeiden: viele Python-Threads -> caesium intern 1 Thread + caesium_threads = 1 if args.threads and args.threads > 1 else None + + # Thread-sichere Fortschritts- & Log-Verwaltung + lock = Lock() + done_count = 0 + + def worker(idx: int, img_path: Path): + nonlocal done_count + ext = img_path.suffix.lower() + orig_size = img_path.stat().st_size + + # GIF überspringen + if ext == ".gif": + with lock: + done_count += 1 + log_lines.append(f"{img_path.name},{orig_size},{orig_size},0,0.0\n") + print_progress(done_count, total) + return + + chosen_size = orig_size + try: + # Eigener Output-Unterordner pro Bild, um Kollisionen zu vermeiden + out_sub = scratch_dir / f"img_{idx:06d}" + caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads) + + if caesium_out and caesium_out.exists(): + s = caesium_out.stat().st_size + if s < orig_size: + # kleineren ersetzen (atomar) + tmp_target = img_path.with_suffix(img_path.suffix + ".tmp") + shutil.copy2(caesium_out, tmp_target) + tmp_target.replace(img_path) + chosen_size = s + + except Exception: + chosen_size = orig_size # Original beibehalten + + finally: + saving = orig_size - chosen_size + saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0 + with lock: + log_lines.append(f"{img_path.name},{orig_size},{chosen_size},{saving},{saving_percent}\n") + done_count += 1 + print_progress(done_count, total) + + # Parallel ausführen + if total > 0: + with ThreadPoolExecutor(max_workers=max(1, args.threads)) as ex: + futures = [ex.submit(worker, i, p) for i, p in enumerate(images, start=1)] + for _ in as_completed(futures): + pass # Fortschritt wird im Worker gezeichnet + + print() # newline nach Progressbar + + # --- Safety-Cleanup innerhalb des Arbeitsverzeichnisses --- + # 1) Entferne evtl. vorhandene caesium*-Ordner (aus alten Runs) + for p in work_dir.rglob("*"): + try: + if p.is_dir() and p.name.lower().startswith("caesium"): + shutil.rmtree(p, ignore_errors=True) + except Exception: + pass + + # 2) Lösche eventuelle .tmp-Dateien in ppt/media + media_dir = work_dir / "ppt" / "media" + if media_dir.exists(): + for f in media_dir.iterdir(): + if f.is_file() and f.suffix.lower() == ".tmp": + try: + f.unlink(missing_ok=True) + except Exception: + pass + + # Neue PPTX bauen (nur work_dir -> scratch_dir liegt außerhalb und ist damit sicher ausgeschlossen) + zip_dir_to_pptx(work_dir, output_pptx) + size_after = output_pptx.stat().st_size + + # Log schreiben + try: + with open(log_file, "w", encoding="utf-8") as f: + f.writelines(log_lines) + except Exception: + pass + # Summary + savings_pct = 0.0 + if size_before > 0: + savings_pct = round(100.0 * (size_before - size_after) / size_before, 2) + + elapsed = time.perf_counter() - start_time + + print("\n✅ Fertig!") + print("Summary") + print("-------") + print(f"Version: {__version__}") + print(f"Name: {output_pptx.name}") + print(f"Datei-Größe vorher: {human_mb(size_before)} MB") + print(f"Datei-Größe nachher: {human_mb(size_after)} MB") + print(f"Ersparnis: {savings_pct}%") + print(f"Zeit benötigt: {format_duration(elapsed)}") + print(f"Log-Datei: {log_file}") + + finally: + # Aufräumen ALLER temporären Dateien/Ordner + try: + shutil.rmtree(work_dir, ignore_errors=True) + except Exception: + pass + try: + shutil.rmtree(scratch_dir, ignore_errors=True) + except Exception: + pass + # Zusätzlich: ältere Reste entfernen + cleanup_old_temps() + + +if __name__ == "__main__": + main()