#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup) Version: 1.1.7 Highlights: - Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX - Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird - Overwrite Policy: -O bigger - Log: image_name,size_before,size_after,saving,saving_percent,in_slide_number,image_type_changed - Summary inkl. Zeit benötigt Änderungen in 1.1.7: - PNG->JPG Fallback für große PNGs hinzugefügt (wenn nach Kompression weiterhin > 500 KB) - Logging erweitert: neue Spalte image_type_changed mit Wert png_jpg bei Typwechsel Änderungen in 1.1.6: - Libcaesium 1.3.0 kann nun auch files ignorieren, wenn die Kompression kleiner als ist """ import argparse import inspect import os import re import xml.etree.ElementTree as ET import sys import zipfile import tempfile import shutil import subprocess import time import fnmatch from glob import glob from pathlib import Path from datetime import timedelta from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock from dataclasses import dataclass from typing import Callable, List, Optional __version__ = "1.1.7" ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} PROGRESS_BAR_LEN = 40 TEMP_PREFIX = "pptx_compress_" DEFAULT_MIN_SAVINGS = "2%" PNG_TO_JPEG_THRESHOLD_BYTES = 500 * 1024 @dataclass class DeckResult: input: str output: str ok: bool = False size_before: int = 0 size_after: int = 0 elapsed_sec: float = 0.0 error: Optional[str] = None log_file: Optional[str] = None @dataclass class ImageProcessResult: image_name: str orig_size: int chosen_size: int slide_nr: str image_type_changed: str = "" def discover_images(media_dir: Path) -> list[Path]: images: list[Path] = [] if media_dir.exists(): for f in sorted(media_dir.iterdir()): if f.is_file() and f.suffix.lower() in ALLOWED_EXT: images.append(f) return images def image_result_to_log_line(image_result: ImageProcessResult) -> str: saving = image_result.orig_size - image_result.chosen_size saving_percent = round((saving / image_result.orig_size) * 100, 2) if image_result.orig_size > 0 else 0.0 return f"{image_result.image_name};{human_kb(image_result.orig_size)};{human_kb(image_result.chosen_size)};{human_kb(saving)};{saving_percent};{image_result.slide_nr};{image_result.image_type_changed}\n" # -------------------- Utilities -------------------- def human_mb(nbytes: int) -> float: return round(nbytes / (1024 * 1024), 2) def human_kb(nbytes: int) -> float: return round(nbytes / 1024,2) def ensure_clean_file(path: Path): if path.exists(): try: if path.is_file(): path.unlink() else: shutil.rmtree(path, ignore_errors=True) except Exception: pass def cleanup_old_temps(): tmp_root = Path(tempfile.gettempdir()) for p in tmp_root.glob(f"{TEMP_PREFIX}*"): try: if p.is_dir(): shutil.rmtree(p, ignore_errors=True) else: p.unlink(missing_ok=True) except Exception: pass def print_progress(i: int, total: int): if total <= 0: return done = int(PROGRESS_BAR_LEN * i / total) bar = "█" * done + "-" * (PROGRESS_BAR_LEN - done) pct = int(i * 100 / total) print(f"\rBilder: |{bar}| {i}/{total} ({pct}%)", end="", flush=True) def zip_dir_to_pptx(src_dir: Path, out_pptx: Path): all_files: list[Path] = [] for root, _, files in os.walk(src_dir): for f in files: all_files.append(Path(root) / f) content_types = [f for f in all_files if f.name == "[Content_Types].xml"] rest = [f for f in all_files if f.name != "[Content_Types].xml"] with zipfile.ZipFile(out_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z: for full in content_types + rest: rel = full.relative_to(src_dir) z.write(full, arcname=str(rel)) def which(cmd: str): return shutil.which(cmd) def compress_with_caesium( original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str, output_format: str = "original", ) -> Path | None: exe = which("caesiumclt") if not exe: raise RuntimeError("[ERROR] 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen.") out_dir.mkdir(parents=True, exist_ok=True) ext = original.suffix.lower() if ext not in ALLOWED_EXT: return None cmd = [ exe, "-q", str(quality), "-O", "bigger", "--min-savings", min_savings, "--format", output_format, "-o", str(out_dir), ] if caesium_threads is not None: cmd += ["--threads", str(caesium_threads)] cmd += [str(original)] try: r = subprocess.run(cmd, capture_output=True, text=True) if r.returncode != 0: sys.stderr.write(f"[caesiumclt] Fehler bei {original.name}:{r.stderr}") return None if output_format == "jpeg": jpg_out = out_dir / f"{original.stem}.jpg" jpeg_out = out_dir / f"{original.stem}.jpeg" if jpg_out.exists(): return jpg_out if jpeg_out.exists(): return jpeg_out return None out_file = out_dir / original.name return out_file if out_file.exists() else None except Exception as ex: sys.stderr.write(f"[caesiumclt] Ausnahme bei {original.name}: {ex}") return None def compressor_accepts_output_format(compressor: Callable[..., Path | None]) -> bool: if compressor is compress_with_caesium: return True try: signature = inspect.signature(compressor) except (TypeError, ValueError): return False return "output_format" in signature.parameters def run_compressor( compressor: Callable[..., Path | None], original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str, output_format: str = "original", ) -> Path | None: if output_format != "original" and not compressor_accepts_output_format(compressor): return None if compressor is compress_with_caesium: return compressor(original, out_dir, caesium_threads, quality, min_savings, output_format) if compressor_accepts_output_format(compressor): return compressor(original, out_dir, caesium_threads, quality, min_savings, output_format) return compressor(original, out_dir, caesium_threads, quality, min_savings) def compress_raster_image( compressor: Callable[..., Path | None], original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str, ) -> Path | None: return run_compressor( compressor=compressor, original=original, out_dir=out_dir, caesium_threads=caesium_threads, quality=quality, min_savings=min_savings, ) def compress_image_with_routing( compressor: Callable[..., Path | None], original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str, ) -> Path | None: return compress_raster_image( compressor=compressor, original=original, out_dir=out_dir, caesium_threads=caesium_threads, quality=quality, min_savings=min_savings, ) def update_relationship_targets(work_dir: Path, old_name: str, new_name: str) -> None: rels_namespace = "{http://schemas.openxmlformats.org/package/2006/relationships}Relationship" for rels_file in work_dir.rglob("*.rels"): try: tree = ET.parse(rels_file) root = tree.getroot() changed = False for rel in root.findall(f".//{rels_namespace}"): target = rel.attrib.get("Target", "") if Path(target).name == old_name: rel.attrib["Target"] = re.sub(r"[^/\\]+$", new_name, target) changed = True if changed: tree.write(rels_file, encoding="utf-8", xml_declaration=True) except (ET.ParseError, OSError): continue def ensure_jpg_content_type(work_dir: Path) -> None: content_types_path = work_dir / "[Content_Types].xml" if not content_types_path.exists(): return content_ns = "{http://schemas.openxmlformats.org/package/2006/content-types}" try: tree = ET.parse(content_types_path) root = tree.getroot() has_jpg_default = False for default in root.findall(f"{content_ns}Default"): ext = default.attrib.get("Extension", "").lower() if ext == "jpg": has_jpg_default = True break if not has_jpg_default: ET.SubElement( root, f"{content_ns}Default", { "Extension": "jpg", "ContentType": "image/jpeg", }, ) tree.write(content_types_path, encoding="utf-8", xml_declaration=True) except (ET.ParseError, OSError): return def format_duration(seconds: float) -> str: total_ms = int(round(seconds * 1000)) td = timedelta(milliseconds=total_ms) base = str(td) if "." in base: hms, frac = base.split(".", 1) return f"{hms}.{frac[:2]}" return base def build_image_slide_index(rels_dir: Path) -> dict[str, List[int]]: if not rels_dir.exists() or not rels_dir.is_dir(): return {} image_to_slides: dict[str, set[int]] = {} for rels_path in rels_dir.iterdir(): rels_file = rels_path.name if rels_file.startswith("slide") and rels_file.endswith(".xml.rels") and rels_path.is_file(): match = re.search(r"slide(\d+)\.xml\.rels$", rels_file) if not match: continue slide_number = int(match.group(1)) try: tree = ET.parse(rels_path) root = tree.getroot() for rel in root.findall(".//{http://schemas.openxmlformats.org/package/2006/relationships}Relationship"): target = rel.attrib.get("Target", "") image_name = Path(target).name if image_name: if image_name not in image_to_slides: image_to_slides[image_name] = set() image_to_slides[image_name].add(slide_number) except (ET.ParseError, OSError): print(f"Fehler beim Lesen von {rels_file}") return {img: sorted(slides) for img, slides in image_to_slides.items()} def process_image_file( idx: int, img_path: Path, scratch_dir: Path, image_to_slides: dict[str, List[int]], caesium_threads: int | None, quality: int, min_savings: str, compressor: Callable[..., Path | None], ) -> ImageProcessResult: orig_size = img_path.stat().st_size chosen_size = orig_size chosen_name = img_path.name image_type_changed = "" found_in_slide = image_to_slides.get(img_path.name) slide_nr = "NOT_USED" if found_in_slide is None else str(found_in_slide) try: out_sub = scratch_dir / f"img_{idx:06d}" caesium_out = compress_image_with_routing( compressor=compressor, original=img_path, out_dir=out_sub, caesium_threads=caesium_threads, quality=quality, min_savings=min_savings, ) if caesium_out and caesium_out.exists(): compressed_size = caesium_out.stat().st_size if compressed_size < orig_size: tmp_target = img_path.with_suffix(img_path.suffix + ".tmp") shutil.copy2(caesium_out, tmp_target) tmp_target.replace(img_path) chosen_size = compressed_size if img_path.suffix.lower() == ".png" and chosen_size > PNG_TO_JPEG_THRESHOLD_BYTES: jpg_candidate = run_compressor( compressor=compressor, original=img_path, out_dir=out_sub, caesium_threads=caesium_threads, quality=quality, min_savings="0%", output_format="jpeg", ) if jpg_candidate and jpg_candidate.exists(): jpg_size = jpg_candidate.stat().st_size if jpg_size < chosen_size: jpg_target = img_path.with_suffix(".jpg") tmp_target = jpg_target.with_suffix(".jpg.tmp") shutil.copy2(jpg_candidate, tmp_target) tmp_target.replace(jpg_target) img_path.unlink(missing_ok=True) chosen_size = jpg_size chosen_name = jpg_target.name image_type_changed = "png_jpg" except Exception: chosen_size = orig_size chosen_name = img_path.name image_type_changed = "" return ImageProcessResult( image_name=chosen_name, orig_size=orig_size, chosen_size=chosen_size, slide_nr=slide_nr, image_type_changed=image_type_changed, ) # -------------------- Core per-deck processing -------------------- def process_single_deck( input_pptx: Path, output_pptx: Path, threads: int, quality: int, min_savings: str, compressor: Callable[..., Path | None] = compress_with_caesium, ) -> DeckResult: start_time = time.perf_counter() result = DeckResult( input=str(input_pptx), output=str(output_pptx), ) work_dir: Optional[Path] = None scratch_dir: Optional[Path] = None try: if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx": raise ValueError("Eingabedatei existiert nicht oder ist keine .pptx") cleanup_old_temps() ensure_clean_file(output_pptx) work_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "work_")) scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_")) log_file = output_pptx.with_suffix(".log.csv") ensure_clean_file(log_file) log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number;image_type_changed\n"] size_before = input_pptx.stat().st_size result.size_before = size_before with zipfile.ZipFile(input_pptx, "r") as z: z.extractall(work_dir) slides_dir = work_dir / "ppt" / "slides" rels_dir = slides_dir / "_rels" media_dir = work_dir / "ppt" / "media" images = discover_images(media_dir) total = len(images) print(f"[Processing] {input_pptx.name}: {total} Bild(er) gefunden") print_progress(0, total) if not which("caesiumclt") and compressor is compress_with_caesium: raise RuntimeError("'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") caesium_threads = 1 if threads > 1 else None lock = Lock() done_count = 0 image_to_slides = build_image_slide_index(rels_dir) renamed_images: list[tuple[str, str]] = [] def worker(idx: int, img_path: Path): nonlocal done_count image_result = process_image_file( idx=idx, img_path=img_path, scratch_dir=scratch_dir, image_to_slides=image_to_slides, caesium_threads=caesium_threads, quality=quality, min_savings=min_savings, compressor=compressor, ) with lock: if image_result.image_name != img_path.name: renamed_images.append((img_path.name, image_result.image_name)) log_lines.append(image_result_to_log_line(image_result)) done_count += 1 print_progress(done_count, total) if total > 0: with ThreadPoolExecutor(max_workers=max(1, threads)) as ex: futures = [ex.submit(worker, i, p) for i, p in enumerate(images, start=1)] for fut in as_completed(futures): try: fut.result() except Exception as exc: sys.stderr.write(f"[worker] Unerwarteter Fehler: {exc}\n") if renamed_images: for old_name, new_name in renamed_images: update_relationship_targets(work_dir, old_name, new_name) ensure_jpg_content_type(work_dir) print() # newline # Safety cleanup inside work_dir for p in work_dir.rglob("*"): try: if p.is_dir() and p.name.lower().startswith("caesium"): shutil.rmtree(p, ignore_errors=True) except Exception: pass if media_dir.exists(): for f in media_dir.iterdir(): if f.is_file() and f.suffix.lower() == ".tmp": try: f.unlink(missing_ok=True) except Exception: pass zip_dir_to_pptx(work_dir, output_pptx) size_after = output_pptx.stat().st_size result.size_after = size_after try: with open(log_file, "w", encoding="utf-8") as f: f.writelines(log_lines) except Exception: pass elapsed = time.perf_counter() - start_time result.elapsed_sec = elapsed result.log_file = str(log_file) result.ok = True savings_pct = 0.0 if size_before == 0 else round(100.0 * (size_before - size_after) / size_before, 2) print(f"[OK] Fertig! ({input_pptx.name})") print("Zusammenfassung ----------------") print(" Vorher: ", human_mb(size_before), "MB") print(" Nachher: ", human_mb(size_after), "MB") print(" Ersparnis: ", f"{savings_pct}%") print(" Zeit: ", format_duration(elapsed)) print(" Log: ", log_file) except Exception as e: result.error = str(e) finally: if work_dir is not None: shutil.rmtree(work_dir, ignore_errors=True) if scratch_dir is not None: shutil.rmtree(scratch_dir, ignore_errors=True) cleanup_old_temps() return result # -------------------- Input helpers -------------------- def expand_inputs(inputs: list[str]) -> list[Path]: files: list[Path] = [] for inp in inputs: p = Path(inp) if any(ch in inp for ch in ['*', '?']): for g in glob(inp): if g.lower().endswith('.pptx'): files.append(Path(g).resolve()) else: if p.is_dir(): for g in p.glob('*.pptx'): files.append(g.resolve()) else: if p.suffix.lower() == '.pptx': files.append(p.resolve()) seen = set() uniq = [] for f in files: if str(f) not in seen: uniq.append(f) seen.add(str(f)) return uniq def collect_from_dir(input_dir: Path, pattern: str, recursive: bool) -> list[Path]: files: list[Path] = [] if recursive: for root, _, names in os.walk(input_dir): for n in names: if fnmatch.fnmatch(n, pattern): p = Path(root) / n if p.suffix.lower() == '.pptx': files.append(p.resolve()) else: for p in input_dir.glob(pattern): if p.suffix.lower() == '.pptx': files.append(p.resolve()) seen = set() out = [] for f in files: s = str(f) if s not in seen: out.append(f) seen.add(s) return out # -------------------- CLI -------------------- def main(): parser, args = extractParserArguments() input_files = validateParserArguments(parser, args) batch_mode = len(input_files) > 1 if batch_mode and not args.output_dir: print('[ERROR] Batch-Modus erkannt. Bitte -O/--output-dir angeben.') sys.exit(2) if not which('caesiumclt'): print("[ERROR] 'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.") sys.exit(3) overall_before = 0 overall_after = 0 successes = 0 failures = 0 if batch_mode: out_dir = Path(args.output_dir).resolve() out_dir.mkdir(parents=True, exist_ok=True) print(f"Batch: {len(input_files)} Datei(en). Output-Verzeichnis: {out_dir}") for src in input_files: if not src.exists(): print(f"- Übersprungen (nicht gefunden): {src}") failures += 1 continue dst = out_dir / f"{src.stem}_compressed.pptx" res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings) if res.ok: successes += 1 overall_before += res.size_before overall_after += res.size_after else: failures += 1 print(f" Fehler: {src.name} -> {res.error}") else: src = input_files[0] if args.output_dir: Path(args.output_dir).mkdir(parents=True, exist_ok=True) dst = Path(args.output_dir) / f"{src.stem}_compressed.pptx" else: dst = Path(args.output).resolve() if args.output else src.with_name(f"{src.stem}_compressed.pptx") res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings) if res.ok: successes += 1 overall_before += res.size_before overall_after += res.size_after else: failures += 1 print(f" Fehler: {src.name} -> {res.error}") if batch_mode: print(f"====== Gesamt-Summary ======") print(f"[SUCCESS] Dateien erfolgreich: {successes}") if failures > 0: print(f"[FAILED] Dateien fehlgeschlagen: {failures}") if overall_before > 0: pct = round(100.0 * (overall_before - overall_after) / overall_before, 2) else: pct = 0.0 print(f"Gesamtgröße vorher: {human_mb(overall_before)} MB") print(f"Gesamtgröße nachher: {human_mb(overall_after)} MB") print(f"Gesamt-Ersparnis: {pct}%") def validateParserArguments(parser, args): print("Threads used: ", args.threads," Threads") if args.quality < 0 or args.quality > 100: print('[ERROR] Ungültige Qualität. Erlaubt: 0..100') sys.exit(1) input_files: list[Path] = [] if args.input: input_files.extend(expand_inputs(args.input)) if args.input_dir: input_files.extend(collect_from_dir(Path(args.input_dir), args.pattern, args.recursive)) if len(input_files) == 0: parser.print_help() sys.exit(1) return input_files def extractParserArguments(): parser = argparse.ArgumentParser( description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup)", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument('-i','--input', nargs='*', help='Input-PPTX (eine oder mehrere, Wildcards erlaubt). Bei mehreren: -O erforderlich.') parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)') parser.add_argument('-o','--output', help='Output-PPTX (nur Single-Mode)') parser.add_argument('-O','--output-dir', help='Output-Verzeichnis (erforderlich für Batch)') parser.add_argument('--pattern', default='*.pptx', help='Dateimuster für --input-dir') parser.add_argument('--recursive', action='store_true', help='Rekursiv in --input-dir suchen') #parser.add_argument('-t','--threads', type=int, default=min(32, os.cpu_count() or 4), help='Anzahl paralleler Threads pro Datei') parser.add_argument('-t','--threads', type=int, default=16, help='Anzahl paralleler Threads pro Datei') parser.add_argument('-q','--quality', type=int, default=90, help='Qualität für caesiumclt (0..100), höher = bessere Qualität / größere Datei') parser.add_argument('--min-savings', default=DEFAULT_MIN_SAVINGS, help="Mindestersparnis für caesiumclt (z. B. 2%%, 100KB, 1MB oder Bytes als Zahl)") parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help="Zeigt die Versionsnummer an" ) args = parser.parse_args() return parser,args if __name__ == '__main__': main()