#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup) Version: 1.0.0 Highlights: - Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX - Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird - Overwrite Policy: -O bigger - Log: image_name,size_before,size_after,saving,saving_percent - Summary inkl. Zeit benötigt Benutzung: python pptx_image_compress.py -i input.pptx [-o output.pptx] [-t THREADS] [--version] """ import argparse import os import sys import zipfile import tempfile import shutil import subprocess import time from pathlib import Path from datetime import timedelta from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock __version__ = "1.0.0" ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} CAESIUM_QUALITY = 90 PROGRESS_BAR_LEN = 40 TEMP_PREFIX = "pptx_compress_" def human_mb(nbytes: int) -> float: return round(nbytes / (1024 * 1024), 2) def ensure_clean_file(path: Path): if path.exists(): try: if path.is_file(): path.unlink() else: shutil.rmtree(path, ignore_errors=True) except Exception: pass def cleanup_old_temps(): tmp_root = Path(tempfile.gettempdir()) for p in tmp_root.glob(f"{TEMP_PREFIX}*"): try: if p.is_dir(): shutil.rmtree(p, ignore_errors=True) else: p.unlink(missing_ok=True) except Exception: pass def print_progress(i: int, total: int): if total <= 0: return done = int(PROGRESS_BAR_LEN * i / total) bar = "█" * done + "-" * (PROGRESS_BAR_LEN - done) pct = int(i * 100 / total) print(f" Bilder: |{bar}| {i}/{total} ({pct}%)", end="", flush=True) def zip_dir_to_pptx(src_dir: Path, out_pptx: Path): with zipfile.ZipFile(out_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z: for root, _, files in os.walk(src_dir): for f in files: full = Path(root) / f rel = full.relative_to(src_dir) z.write(full, arcname=str(rel)) def which(cmd: str): return shutil.which(cmd) def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None) -> Path | None: exe = which("caesiumclt") if not exe: raise RuntimeError("❌ 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen.") out_dir.mkdir(parents=True, exist_ok=True) ext = original.suffix.lower() if ext not in {".jpg", ".jpeg", ".png", ".webp"}: return None cmd = [exe, "-q", str(CAESIUM_QUALITY), "-O", "bigger", "-o", str(out_dir)] if caesium_threads is not None: cmd += ["--threads", str(caesium_threads)] cmd += [str(original)] try: r = subprocess.run(cmd, capture_output=True, text=True) if r.returncode != 0: sys.stderr.write(f" [caesiumclt] Fehler bei {original.name}: {r.stderr} ") return None out_file = out_dir / original.name return out_file if out_file.exists() else None except Exception as ex: sys.stderr.write(f" [caesiumclt] Ausnahme bei {original.name}: {ex} ") return None def format_duration(seconds: float) -> str: total_ms = int(round(seconds * 1000)) td = timedelta(milliseconds=total_ms) base = str(td) if "." in base: hms, frac = base.split(".", 1) return f"{hms}.{frac[:2]}" return base def main(): start_time = time.perf_counter() parser = argparse.ArgumentParser(description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup)", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-i", "--input", help="Input-PPTX", required=False) parser.add_argument("-o", "--output", help="Output-PPTX", required=False) parser.add_argument("-t", "--threads", type=int, default=min(32, os.cpu_count() or 4), help="Anzahl paralleler Threads für die Bildverarbeitung") parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") args = parser.parse_args() if not args.input: parser.print_help() sys.exit(1) input_pptx = Path(args.input).resolve() if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx": print("❌ Eingabedatei existiert nicht oder ist keine .pptx") sys.exit(2) output_pptx = Path(args.output).resolve() if args.output else input_pptx.with_name(f"{input_pptx.stem}_compressed.pptx") cleanup_old_temps() ensure_clean_file(output_pptx) work_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "work_")) scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_")) log_file = output_pptx.with_suffix(".log") ensure_clean_file(log_file) log_lines = ["image_name,size_before,size_after,saving,saving_percent "] size_before = input_pptx.stat().st_size try: with zipfile.ZipFile(input_pptx, "r") as z: z.extractall(work_dir) media_dir = work_dir / "ppt" / "media" images = [] if media_dir.exists(): for f in sorted(media_dir.iterdir()): if f.is_file() and f.suffix.lower() in ALLOWED_EXT: images.append(f) total = len(images) print(f"🔧 Finde Bilder in {media_dir} ... {total} Kandidaten") print_progress(0, total) if not which("caesiumclt"): print(" ❌ 'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") sys.exit(3) caesium_threads = 1 if args.threads and args.threads > 1 else None lock = Lock() done_count = 0 def worker(idx: int, img_path: Path): nonlocal done_count ext = img_path.suffix.lower() orig_size = img_path.stat().st_size if ext == ".gif": with lock: done_count += 1 log_lines.append(f"{img_path.name},{orig_size},{orig_size},0,0.0 ") print_progress(done_count, total) return chosen_size = orig_size try: out_sub = scratch_dir / f"img_{idx:06d}" caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads) if caesium_out and caesium_out.exists(): s = caesium_out.stat().st_size if s < orig_size: tmp_target = img_path.with_suffix(img_path.suffix + ".tmp") shutil.copy2(caesium_out, tmp_target) tmp_target.replace(img_path) chosen_size = s except Exception: chosen_size = orig_size finally: saving = orig_size - chosen_size saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0 with lock: log_lines.append(f"{img_path.name},{orig_size},{chosen_size},{saving},{saving_percent} ") done_count += 1 print_progress(done_count, total) if total > 0: with ThreadPoolExecutor(max_workers=max(1, args.threads)) as ex: futures = [ex.submit(worker, i, p) for i, p in enumerate(images, start=1)] for _ in as_completed(futures): pass print() for p in work_dir.rglob("*"): try: if p.is_dir() and p.name.lower().startswith("caesium"): shutil.rmtree(p, ignore_errors=True) except Exception: pass media_dir = work_dir / "ppt" / "media" if media_dir.exists(): for f in media_dir.iterdir(): if f.is_file() and f.suffix.lower() == ".tmp": try: f.unlink(missing_ok=True) except Exception: pass zip_dir_to_pptx(work_dir, output_pptx) size_after = output_pptx.stat().st_size try: with open(log_file, "w", encoding="utf-8") as f: f.writelines(log_lines) except Exception: pass savings_pct = 0.0 if size_before > 0: savings_pct = round(100.0 * (size_before - size_after) / size_before, 2) elapsed = time.perf_counter() - start_time print(" ✅ Fertig!") print("Summary") print("-------") print(f"Version: {__version__}") print(f"Name: {output_pptx.name}") print(f"Datei-Größe vorher: {human_mb(size_before)} MB") print(f"Datei-Größe nachher: {human_mb(size_after)} MB") print(f"Ersparnis: {savings_pct}%") print(f"Zeit benötigt: {format_duration(elapsed)}") print(f"Log-Datei: {log_file}") finally: try: shutil.rmtree(work_dir, ignore_errors=True) except Exception: pass try: shutil.rmtree(scratch_dir, ignore_errors=True) except Exception: pass cleanup_old_temps() if __name__ == "__main__": main()