#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup) Version: 1.0.0 Highlights: - Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX - Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird - Overwrite Policy: -O bigger - Log: image_name,size_before,size_after,saving,saving_percent - Summary inkl. Zeit benötigt Benutzung: python pptx_image_compress.py -i input.pptx [-o output.pptx] [-t THREADS] [--version] """ import argparse import os import sys import zipfile import tempfile import shutil import subprocess import time from pathlib import Path from datetime import timedelta from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock # -------------------- Version -------------------- __version__ = "1.0.0" # -------------------- Konfiguration -------------------- ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} # GIF wird übersprungen CAESIUM_QUALITY = 90 # -q 90 PROGRESS_BAR_LEN = 40 TEMP_PREFIX = "pptx_compress_" # -------------------- Utilities -------------------- def human_mb(nbytes: int) -> float: return round(nbytes / (1024 * 1024), 2) def ensure_clean_file(path: Path): if path.exists(): try: if path.is_file(): path.unlink() else: shutil.rmtree(path, ignore_errors=True) except Exception: pass def cleanup_old_temps(): tmp_root = Path(tempfile.gettempdir()) for p in tmp_root.glob(f"{TEMP_PREFIX}*"): try: if p.is_dir(): shutil.rmtree(p, ignore_errors=True) else: p.unlink(missing_ok=True) except Exception: pass def print_progress(i: int, total: int): if total <= 0: return done = int(PROGRESS_BAR_LEN * i / total) bar = "█" * done + "-" * (PROGRESS_BAR_LEN - done) pct = int(i * 100 / total) print(f"\rBilder: |{bar}| {i}/{total} ({pct}%)", end="", flush=True) def zip_dir_to_pptx(src_dir: Path, out_pptx: Path): with zipfile.ZipFile(out_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z: for root, _, files in os.walk(src_dir): for f in files: full = Path(root) / f rel = full.relative_to(src_dir) z.write(full, arcname=str(rel)) def which(cmd: str) -> str | None: return shutil.which(cmd) def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None) -> Path | None: """ Ruft caesiumclt auf, um eine komprimierte Version zu erzeugen. Output wird ins out_dir geschrieben (gleicher Filename). Gibt Pfad zur erzeugten Datei zurück oder None bei Fehler. """ exe = which("caesiumclt") if not exe: raise RuntimeError( "❌ 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen." ) out_dir.mkdir(parents=True, exist_ok=True) # Nur Formate an Caesium geben, die es unterstützt: JPG/JPEG, PNG, WEBP ext = original.suffix.lower() if ext not in {".jpg", ".jpeg", ".png", ".webp"}: return None # GIF & andere werden übersprungen cmd = [ exe, "-q", str(CAESIUM_QUALITY), "-O", "bigger", # <<< nur überschreiben, wenn Ziel größer ist "-o", str(out_dir), ] if caesium_threads is not None: cmd += ["--threads", str(caesium_threads)] cmd += [str(original)] try: r = subprocess.run(cmd, capture_output=True, text=True) if r.returncode != 0: sys.stderr.write(f"\n[caesiumclt] Fehler bei {original.name}:\n{r.stderr}\n") return None out_file = out_dir / original.name return out_file if out_file.exists() else None except Exception as ex: sys.stderr.write(f"\n[caesiumclt] Ausnahme bei {original.name}: {ex}\n") return None def format_duration(seconds: float) -> str: total_ms = int(round(seconds * 1000)) td = timedelta(milliseconds=total_ms) base = str(td) if "." in base: hms, frac = base.split(".", 1) return f"{hms}.{frac[:2]}" return base def main(): start_time = time.perf_counter() parser = argparse.ArgumentParser( description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup)", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("-i", "--input", help="Input-PPTX", required=False) parser.add_argument("-o", "--output", help="Output-PPTX", required=False) parser.add_argument( "-t", "--threads", type=int, default=min(32, os.cpu_count() or 4), help="Anzahl paralleler Threads für die Bildverarbeitung" ) parser.add_argument( "--version", action="version", version=f"%(prog)s {__version__}" ) args = parser.parse_args() if not args.input: parser.print_help() sys.exit(1) input_pptx = Path(args.input).resolve() if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx": print("❌ Eingabedatei existiert nicht oder ist keine .pptx") sys.exit(2) if args.output: output_pptx = Path(args.output).resolve() else: output_pptx = input_pptx.with_name(f"{input_pptx.stem}_compressed.pptx") # Vorherige Temp-Files & existierendes Output löschen cleanup_old_temps() ensure_clean_file(output_pptx) # --- Zwei getrennte Temp-Verzeichnisse --- work_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "work_")) # entpackte PPTX scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_")) # Caesium-Ausgaben (außerhalb!) # Logdatei neben Output log_file = output_pptx.with_suffix(".log") ensure_clean_file(log_file) log_lines = ["image_name,size_before,size_after,saving,saving_percent\n"] size_before = input_pptx.stat().st_size try: # Entpacken with zipfile.ZipFile(input_pptx, "r") as z: z.extractall(work_dir) media_dir = work_dir / "ppt" / "media" images = [] if media_dir.exists(): for f in sorted(media_dir.iterdir()): if f.is_file() and f.suffix.lower() in ALLOWED_EXT: images.append(f) total = len(images) print(f"🔧 Finde Bilder in {media_dir} ... {total} Kandidaten") print_progress(0, total) # Vorab prüfen, ob caesiumclt verfügbar ist if not which("caesiumclt"): print("\n❌ 'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") sys.exit(3) # Oversubscription vermeiden: viele Python-Threads -> caesium intern 1 Thread caesium_threads = 1 if args.threads and args.threads > 1 else None # Thread-sichere Fortschritts- & Log-Verwaltung lock = Lock() done_count = 0 def worker(idx: int, img_path: Path): nonlocal done_count ext = img_path.suffix.lower() orig_size = img_path.stat().st_size # GIF überspringen if ext == ".gif": with lock: done_count += 1 log_lines.append(f"{img_path.name},{orig_size},{orig_size},0,0.0\n") print_progress(done_count, total) return chosen_size = orig_size try: # Eigener Output-Unterordner pro Bild, um Kollisionen zu vermeiden out_sub = scratch_dir / f"img_{idx:06d}" caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads) if caesium_out and caesium_out.exists(): s = caesium_out.stat().st_size if s < orig_size: # kleineren ersetzen (atomar) tmp_target = img_path.with_suffix(img_path.suffix + ".tmp") shutil.copy2(caesium_out, tmp_target) tmp_target.replace(img_path) chosen_size = s except Exception: chosen_size = orig_size # Original beibehalten finally: saving = orig_size - chosen_size saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0 with lock: log_lines.append(f"{img_path.name},{orig_size},{chosen_size},{saving},{saving_percent}\n") done_count += 1 print_progress(done_count, total) # Parallel ausführen if total > 0: with ThreadPoolExecutor(max_workers=max(1, args.threads)) as ex: futures = [ex.submit(worker, i, p) for i, p in enumerate(images, start=1)] for _ in as_completed(futures): pass # Fortschritt wird im Worker gezeichnet print() # newline nach Progressbar # --- Safety-Cleanup innerhalb des Arbeitsverzeichnisses --- # 1) Entferne evtl. vorhandene caesium*-Ordner (aus alten Runs) for p in work_dir.rglob("*"): try: if p.is_dir() and p.name.lower().startswith("caesium"): shutil.rmtree(p, ignore_errors=True) except Exception: pass # 2) Lösche eventuelle .tmp-Dateien in ppt/media media_dir = work_dir / "ppt" / "media" if media_dir.exists(): for f in media_dir.iterdir(): if f.is_file() and f.suffix.lower() == ".tmp": try: f.unlink(missing_ok=True) except Exception: pass # Neue PPTX bauen (nur work_dir -> scratch_dir liegt außerhalb und ist damit sicher ausgeschlossen) zip_dir_to_pptx(work_dir, output_pptx) size_after = output_pptx.stat().st_size # Log schreiben try: with open(log_file, "w", encoding="utf-8") as f: f.writelines(log_lines) except Exception: pass # Summary savings_pct = 0.0 if size_before > 0: savings_pct = round(100.0 * (size_before - size_after) / size_before, 2) elapsed = time.perf_counter() - start_time print("\n✅ Fertig!") print("Summary") print("-------") print(f"Version: {__version__}") print(f"Name: {output_pptx.name}") print(f"Datei-Größe vorher: {human_mb(size_before)} MB") print(f"Datei-Größe nachher: {human_mb(size_after)} MB") print(f"Ersparnis: {savings_pct}%") print(f"Zeit benötigt: {format_duration(elapsed)}") print(f"Log-Datei: {log_file}") finally: # Aufräumen ALLER temporären Dateien/Ordner try: shutil.rmtree(work_dir, ignore_errors=True) except Exception: pass try: shutil.rmtree(scratch_dir, ignore_errors=True) except Exception: pass # Zusätzlich: ältere Reste entfernen cleanup_old_temps() if __name__ == "__main__": main()