#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup) Version: 1.1.2 Highlights: - Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX - Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird - Overwrite Policy: -O bigger - Log: image_name,size_before,size_after,saving,saving_percent - Summary inkl. Zeit benötigt Änderungen in 1.1.2: - Batch Mode hinzugefügt - Shellscript-Fix (Store-Alias vermeiden) und Doku-Anpassungen """ import argparse import os import sys import zipfile import tempfile import shutil import subprocess import time import fnmatch from glob import glob from pathlib import Path from datetime import timedelta from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock __version__ = "1.1.2" ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} # GIF wird übersprungen PROGRESS_BAR_LEN = 40 TEMP_PREFIX = "pptx_compress_" # -------------------- Utilities -------------------- def human_mb(nbytes: int) -> float: return round(nbytes / (1024 * 1024), 2) def human_kb(nbytes: int) -> float: return round(nbytes / 1024,2) def ensure_clean_file(path: Path): if path.exists(): try: if path.is_file(): path.unlink() else: shutil.rmtree(path, ignore_errors=True) except Exception: pass def cleanup_old_temps(): tmp_root = Path(tempfile.gettempdir()) for p in tmp_root.glob(f"{TEMP_PREFIX}*"): try: if p.is_dir(): shutil.rmtree(p, ignore_errors=True) else: p.unlink(missing_ok=True) except Exception: pass def print_progress(i: int, total: int): if total <= 0: return done = int(PROGRESS_BAR_LEN * i / total) bar = "█" * done + "-" * (PROGRESS_BAR_LEN - done) pct = int(i * 100 / total) print(f"\rBilder: |{bar}| {i}/{total} ({pct}%)", end="", flush=True) def zip_dir_to_pptx(src_dir: Path, out_pptx: Path): with zipfile.ZipFile(out_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z: for root, _, files in os.walk(src_dir): for f in files: full = Path(root) / f rel = full.relative_to(src_dir) z.write(full, arcname=str(rel)) def which(cmd: str): return shutil.which(cmd) def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None, quality: int) -> Path | None: exe = which("caesiumclt") if not exe: raise RuntimeError("❌ 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen.") out_dir.mkdir(parents=True, exist_ok=True) ext = original.suffix.lower() if ext not in {".jpg", ".jpeg", ".png", ".webp"}: return None cmd = [exe, "-q", str(quality), "-O", "bigger", "-o", str(out_dir)] if caesium_threads is not None: cmd += ["--threads", str(caesium_threads)] cmd += [str(original)] try: r = subprocess.run(cmd, capture_output=True, text=True) if r.returncode != 0: sys.stderr.write(f"[caesiumclt] Fehler bei {original.name}:{r.stderr}") return None out_file = out_dir / original.name return out_file if out_file.exists() else None except Exception as ex: sys.stderr.write(f"[caesiumclt] Ausnahme bei {original.name}: {ex}") return None def format_duration(seconds: float) -> str: total_ms = int(round(seconds * 1000)) td = timedelta(milliseconds=total_ms) base = str(td) if "." in base: hms, frac = base.split(".", 1) return f"{hms}.{frac[:2]}" return base # -------------------- Core per-deck processing -------------------- def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quality: int) -> dict: start_time = time.perf_counter() result = { "input": str(input_pptx), "output": str(output_pptx), "ok": False, "size_before": 0, "size_after": 0, "elapsed_sec": 0.0, "error": None, "log_file": None, } try: if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx": raise ValueError("Eingabedatei existiert nicht oder ist keine .pptx") cleanup_old_temps() ensure_clean_file(output_pptx) work_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "work_")) scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_")) log_file = output_pptx.with_suffix(".log.csv") ensure_clean_file(log_file) log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%)\n"] size_before = input_pptx.stat().st_size result["size_before"] = size_before with zipfile.ZipFile(input_pptx, "r") as z: z.extractall(work_dir) media_dir = work_dir / "ppt" / "media" images = [] if media_dir.exists(): for f in sorted(media_dir.iterdir()): if f.is_file() and f.suffix.lower() in ALLOWED_EXT: images.append(f) total = len(images) print(f"🔧 {input_pptx.name}: {total} Bild(er) gefunden") print_progress(0, total) if not which("caesiumclt"): raise RuntimeError("'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") caesium_threads = 1 if threads and threads > 1 else None lock = Lock() done_count = 0 def worker(idx: int, img_path: Path): nonlocal done_count ext = img_path.suffix.lower() orig_size = img_path.stat().st_size if ext == ".gif": with lock: done_count += 1 log_lines.append(f"{img_path.name};{human_kb(orig_size)};{human_kb(orig_size)};0;0.0\n") print_progress(done_count, total) return chosen_size = orig_size try: out_sub = scratch_dir / f"img_{idx:06d}" caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads, quality) if caesium_out and caesium_out.exists(): s = caesium_out.stat().st_size if s < orig_size: tmp_target = img_path.with_suffix(img_path.suffix + ".tmp") shutil.copy2(caesium_out, tmp_target) tmp_target.replace(img_path) chosen_size = s except Exception: chosen_size = orig_size finally: saving = orig_size - chosen_size saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0 with lock: log_lines.append(f"{img_path.name};{human_kb(orig_size)};{human_kb(chosen_size)};{human_kb(saving)};{saving_percent}\n") done_count += 1 print_progress(done_count, total) if total > 0: with ThreadPoolExecutor(max_workers=max(1, threads)) as ex: futures = [ex.submit(worker, i, p) for i, p in enumerate(images, start=1)] for _ in as_completed(futures): pass print() # newline # Safety cleanup inside work_dir for p in work_dir.rglob("*"): try: if p.is_dir() and p.name.lower().startswith("caesium"): shutil.rmtree(p, ignore_errors=True) except Exception: pass if media_dir.exists(): for f in media_dir.iterdir(): if f.is_file() and f.suffix.lower() == ".tmp": try: f.unlink(missing_ok=True) except Exception: pass zip_dir_to_pptx(work_dir, output_pptx) size_after = output_pptx.stat().st_size result["size_after"] = size_after try: with open(log_file, "w", encoding="utf-8") as f: f.writelines(log_lines) except Exception: pass elapsed = time.perf_counter() - start_time result["elapsed_sec"] = elapsed result["log_file"] = str(log_file) result["ok"] = True savings_pct = 0.0 if size_before == 0 else round(100.0 * (size_before - size_after) / size_before, 2) print(f"✅ Fertig! ({input_pptx.name})") print("📋 Zusammenfassung ----------------") print(" Vorher: ", human_mb(size_before), "MB") print(" Nachher: ", human_mb(size_after), "MB") print(" Ersparnis: ", f"{savings_pct}%") print(" Zeit: ", format_duration(elapsed)) print(" Log: ", log_file) except Exception as e: result["error"] = str(e) finally: try: shutil.rmtree(work_dir, ignore_errors=True) # type: ignore[name-defined] except Exception: pass try: shutil.rmtree(scratch_dir, ignore_errors=True) # type: ignore[name-defined] except Exception: pass cleanup_old_temps() return result # -------------------- Input helpers -------------------- def expand_inputs(inputs: list[str]) -> list[Path]: files: list[Path] = [] for inp in inputs: p = Path(inp) if any(ch in inp for ch in ['*', '?']): for g in glob(inp): if g.lower().endswith('.pptx'): files.append(Path(g).resolve()) else: if p.is_dir(): for g in p.glob('*.pptx'): files.append(g.resolve()) else: if p.suffix.lower() == '.pptx': files.append(p.resolve()) seen = set() uniq = [] for f in files: if str(f) not in seen: uniq.append(f) seen.add(str(f)) return uniq def collect_from_dir(input_dir: Path, pattern: str, recursive: bool) -> list[Path]: files: list[Path] = [] if recursive: for root, _, names in os.walk(input_dir): for n in names: if fnmatch.fnmatch(n, pattern): p = Path(root) / n if p.suffix.lower() == '.pptx': files.append(p.resolve()) else: for p in input_dir.glob(pattern): if p.suffix.lower() == '.pptx': files.append(p.resolve()) seen = set() out = [] for f in files: s = str(f) if s not in seen: out.append(f) seen.add(s) return out # -------------------- CLI -------------------- def main(): parser = argparse.ArgumentParser( description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup)", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument('-i','--input', nargs='*', help='Input-PPTX (eine oder mehrere, Wildcards erlaubt). Bei mehreren: -O erforderlich.') parser.add_argument('-o','--output', help='Output-PPTX (nur Single-Mode)') parser.add_argument('-O','--output-dir', help='Output-Verzeichnis (erforderlich für Batch)') parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)') parser.add_argument('--pattern', default='*.pptx', help='Dateimuster für --input-dir') parser.add_argument('--recursive', action='store_true', help='Rekursiv in --input-dir suchen') #parser.add_argument('-t','--threads', type=int, default=min(32, os.cpu_count() or 4), help='Anzahl paralleler Threads pro Datei') parser.add_argument('-t','--threads', type=int, default=16, help='Anzahl paralleler Threads pro Datei') parser.add_argument('-q','--quality', type=int, default=90, help='Qualität für caesiumclt (0..100), höher = bessere Qualität / größere Datei') parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}') args = parser.parse_args() print("Threads used: ", args.threads," Threads") if args.quality < 0 or args.quality > 100: print('❌ Ungültige Qualität. Erlaubt: 0..100') sys.exit(1) input_files: list[Path] = [] if args.input: input_files.extend(expand_inputs(args.input)) if args.input_dir: input_files.extend(collect_from_dir(Path(args.input_dir), args.pattern, args.recursive)) if len(input_files) == 0: parser.print_help() sys.exit(1) batch_mode = len(input_files) > 1 if batch_mode and not args.output_dir: print('❌ Batch-Modus erkannt. Bitte -O/--output-dir angeben.') sys.exit(2) if not which('caesiumclt'): print("❌ 'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") sys.exit(3) overall_before = 0 overall_after = 0 successes = 0 failures = 0 if batch_mode: out_dir = Path(args.output_dir).resolve() out_dir.mkdir(parents=True, exist_ok=True) print(f"🗂️ Batch: {len(input_files)} Datei(en). Output-Verzeichnis: {out_dir}") for src in input_files: if not src.exists(): print(f"- Übersprungen (nicht gefunden): {src}") failures += 1 continue dst = out_dir / f"{src.stem}_compressed.pptx" res = process_single_deck(src, dst, args.threads, args.quality) if res['ok']: successes += 1 overall_before += res['size_before'] overall_after += res['size_after'] else: failures += 1 print(f" Fehler: {src.name} -> {res['error']}") else: src = input_files[0] if args.output_dir: Path(args.output_dir).mkdir(parents=True, exist_ok=True) dst = Path(args.output_dir) / f"{src.stem}_compressed.pptx" else: dst = Path(args.output).resolve() if args.output else src.with_name(f"{src.stem}_compressed.pptx") res = process_single_deck(src, dst, args.threads, args.quality) if res['ok']: successes += 1 overall_before += res['size_before'] overall_after += res['size_after'] else: failures += 1 print(f" Fehler: {src.name} -> {res['error']}") if batch_mode: print(f"====== 📊 Gesamt-Summary ======") print(f"✅ Dateien erfolgreich: {successes}") if failures > 0: print(f"❌Dateien fehlgeschlagen: {failures}") if overall_before > 0: pct = round(100.0 * (overall_before - overall_after) / overall_before, 2) else: pct = 0.0 print(f"Gesamtgröße vorher: {human_mb(overall_before)} MB") print(f"Gesamtgröße nachher: {human_mb(overall_after)} MB") print(f"Gesamt-Ersparnis: {pct}%") if __name__ == '__main__': main()