334 lines
12 KiB
Python
334 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup)
|
|
Version: 1.0.0
|
|
|
|
Highlights:
|
|
- Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX
|
|
- Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird
|
|
- Overwrite Policy: -O bigger
|
|
- Log: image_name,size_before,size_after,saving,saving_percent
|
|
- Summary inkl. Zeit benötigt
|
|
|
|
Benutzung:
|
|
python pptx_image_compress.py -i input.pptx [-o output.pptx] [-t THREADS] [--version]
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import zipfile
|
|
import tempfile
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
from datetime import timedelta
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from threading import Lock
|
|
|
|
# -------------------- Version --------------------
|
|
__version__ = "1.0.0"
|
|
|
|
# -------------------- Konfiguration --------------------
|
|
ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"} # GIF wird übersprungen
|
|
CAESIUM_QUALITY = 90 # -q 90
|
|
PROGRESS_BAR_LEN = 40
|
|
TEMP_PREFIX = "pptx_compress_"
|
|
|
|
|
|
# -------------------- Utilities --------------------
|
|
def human_mb(nbytes: int) -> float:
|
|
return round(nbytes / (1024 * 1024), 2)
|
|
|
|
|
|
def ensure_clean_file(path: Path):
|
|
if path.exists():
|
|
try:
|
|
if path.is_file():
|
|
path.unlink()
|
|
else:
|
|
shutil.rmtree(path, ignore_errors=True)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def cleanup_old_temps():
|
|
tmp_root = Path(tempfile.gettempdir())
|
|
for p in tmp_root.glob(f"{TEMP_PREFIX}*"):
|
|
try:
|
|
if p.is_dir():
|
|
shutil.rmtree(p, ignore_errors=True)
|
|
else:
|
|
p.unlink(missing_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def print_progress(i: int, total: int):
|
|
if total <= 0:
|
|
return
|
|
done = int(PROGRESS_BAR_LEN * i / total)
|
|
bar = "█" * done + "-" * (PROGRESS_BAR_LEN - done)
|
|
pct = int(i * 100 / total)
|
|
print(f"\rBilder: |{bar}| {i}/{total} ({pct}%)", end="", flush=True)
|
|
|
|
|
|
def zip_dir_to_pptx(src_dir: Path, out_pptx: Path):
|
|
with zipfile.ZipFile(out_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z:
|
|
for root, _, files in os.walk(src_dir):
|
|
for f in files:
|
|
full = Path(root) / f
|
|
rel = full.relative_to(src_dir)
|
|
z.write(full, arcname=str(rel))
|
|
|
|
|
|
def which(cmd: str) -> str | None:
|
|
return shutil.which(cmd)
|
|
|
|
|
|
def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None) -> Path | None:
|
|
"""
|
|
Ruft caesiumclt auf, um eine komprimierte Version zu erzeugen.
|
|
Output wird ins out_dir geschrieben (gleicher Filename).
|
|
Gibt Pfad zur erzeugten Datei zurück oder None bei Fehler.
|
|
"""
|
|
exe = which("caesiumclt")
|
|
if not exe:
|
|
raise RuntimeError(
|
|
"❌ 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen."
|
|
)
|
|
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Nur Formate an Caesium geben, die es unterstützt: JPG/JPEG, PNG, WEBP
|
|
ext = original.suffix.lower()
|
|
if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
|
|
return None # GIF & andere werden übersprungen
|
|
|
|
cmd = [
|
|
exe,
|
|
"-q", str(CAESIUM_QUALITY),
|
|
"-O", "bigger", # <<< nur überschreiben, wenn Ziel größer ist
|
|
"-o", str(out_dir),
|
|
]
|
|
if caesium_threads is not None:
|
|
cmd += ["--threads", str(caesium_threads)]
|
|
cmd += [str(original)]
|
|
|
|
try:
|
|
r = subprocess.run(cmd, capture_output=True, text=True)
|
|
if r.returncode != 0:
|
|
sys.stderr.write(f"\n[caesiumclt] Fehler bei {original.name}:\n{r.stderr}\n")
|
|
return None
|
|
|
|
out_file = out_dir / original.name
|
|
return out_file if out_file.exists() else None
|
|
except Exception as ex:
|
|
sys.stderr.write(f"\n[caesiumclt] Ausnahme bei {original.name}: {ex}\n")
|
|
return None
|
|
|
|
|
|
def format_duration(seconds: float) -> str:
|
|
total_ms = int(round(seconds * 1000))
|
|
td = timedelta(milliseconds=total_ms)
|
|
base = str(td)
|
|
if "." in base:
|
|
hms, frac = base.split(".", 1)
|
|
return f"{hms}.{frac[:2]}"
|
|
return base
|
|
|
|
|
|
def main():
|
|
start_time = time.perf_counter()
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, sauberes Cleanup)",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
)
|
|
parser.add_argument("-i", "--input", help="Input-PPTX", required=False)
|
|
parser.add_argument("-o", "--output", help="Output-PPTX", required=False)
|
|
parser.add_argument(
|
|
"-t", "--threads",
|
|
type=int,
|
|
default=min(32, os.cpu_count() or 4),
|
|
help="Anzahl paralleler Threads für die Bildverarbeitung"
|
|
)
|
|
parser.add_argument(
|
|
"--version",
|
|
action="version",
|
|
version=f"%(prog)s {__version__}"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.input:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
input_pptx = Path(args.input).resolve()
|
|
if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx":
|
|
print("❌ Eingabedatei existiert nicht oder ist keine .pptx")
|
|
sys.exit(2)
|
|
|
|
if args.output:
|
|
output_pptx = Path(args.output).resolve()
|
|
else:
|
|
output_pptx = input_pptx.with_name(f"{input_pptx.stem}_compressed.pptx")
|
|
|
|
# Vorherige Temp-Files & existierendes Output löschen
|
|
cleanup_old_temps()
|
|
ensure_clean_file(output_pptx)
|
|
|
|
# --- Zwei getrennte Temp-Verzeichnisse ---
|
|
work_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "work_")) # entpackte PPTX
|
|
scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_")) # Caesium-Ausgaben (außerhalb!)
|
|
|
|
# Logdatei neben Output
|
|
log_file = output_pptx.with_suffix(".log")
|
|
ensure_clean_file(log_file)
|
|
log_lines = ["image_name,size_before,size_after,saving,saving_percent\n"]
|
|
|
|
size_before = input_pptx.stat().st_size
|
|
|
|
try:
|
|
# Entpacken
|
|
with zipfile.ZipFile(input_pptx, "r") as z:
|
|
z.extractall(work_dir)
|
|
|
|
media_dir = work_dir / "ppt" / "media"
|
|
images = []
|
|
if media_dir.exists():
|
|
for f in sorted(media_dir.iterdir()):
|
|
if f.is_file() and f.suffix.lower() in ALLOWED_EXT:
|
|
images.append(f)
|
|
|
|
total = len(images)
|
|
print(f"🔧 Finde Bilder in {media_dir} ... {total} Kandidaten")
|
|
print_progress(0, total)
|
|
|
|
# Vorab prüfen, ob caesiumclt verfügbar ist
|
|
if not which("caesiumclt"):
|
|
print("\n❌ 'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.")
|
|
sys.exit(3)
|
|
|
|
# Oversubscription vermeiden: viele Python-Threads -> caesium intern 1 Thread
|
|
caesium_threads = 1 if args.threads and args.threads > 1 else None
|
|
|
|
# Thread-sichere Fortschritts- & Log-Verwaltung
|
|
lock = Lock()
|
|
done_count = 0
|
|
|
|
def worker(idx: int, img_path: Path):
|
|
nonlocal done_count
|
|
ext = img_path.suffix.lower()
|
|
orig_size = img_path.stat().st_size
|
|
|
|
# GIF überspringen
|
|
if ext == ".gif":
|
|
with lock:
|
|
done_count += 1
|
|
log_lines.append(f"{img_path.name},{orig_size},{orig_size},0,0.0\n")
|
|
print_progress(done_count, total)
|
|
return
|
|
|
|
chosen_size = orig_size
|
|
try:
|
|
# Eigener Output-Unterordner pro Bild, um Kollisionen zu vermeiden
|
|
out_sub = scratch_dir / f"img_{idx:06d}"
|
|
caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads)
|
|
|
|
if caesium_out and caesium_out.exists():
|
|
s = caesium_out.stat().st_size
|
|
if s < orig_size:
|
|
# kleineren ersetzen (atomar)
|
|
tmp_target = img_path.with_suffix(img_path.suffix + ".tmp")
|
|
shutil.copy2(caesium_out, tmp_target)
|
|
tmp_target.replace(img_path)
|
|
chosen_size = s
|
|
|
|
except Exception:
|
|
chosen_size = orig_size # Original beibehalten
|
|
|
|
finally:
|
|
saving = orig_size - chosen_size
|
|
saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0
|
|
with lock:
|
|
log_lines.append(f"{img_path.name},{orig_size},{chosen_size},{saving},{saving_percent}\n")
|
|
done_count += 1
|
|
print_progress(done_count, total)
|
|
|
|
# Parallel ausführen
|
|
if total > 0:
|
|
with ThreadPoolExecutor(max_workers=max(1, args.threads)) as ex:
|
|
futures = [ex.submit(worker, i, p) for i, p in enumerate(images, start=1)]
|
|
for _ in as_completed(futures):
|
|
pass # Fortschritt wird im Worker gezeichnet
|
|
|
|
print() # newline nach Progressbar
|
|
|
|
# --- Safety-Cleanup innerhalb des Arbeitsverzeichnisses ---
|
|
# 1) Entferne evtl. vorhandene caesium*-Ordner (aus alten Runs)
|
|
for p in work_dir.rglob("*"):
|
|
try:
|
|
if p.is_dir() and p.name.lower().startswith("caesium"):
|
|
shutil.rmtree(p, ignore_errors=True)
|
|
except Exception:
|
|
pass
|
|
|
|
# 2) Lösche eventuelle .tmp-Dateien in ppt/media
|
|
media_dir = work_dir / "ppt" / "media"
|
|
if media_dir.exists():
|
|
for f in media_dir.iterdir():
|
|
if f.is_file() and f.suffix.lower() == ".tmp":
|
|
try:
|
|
f.unlink(missing_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
# Neue PPTX bauen (nur work_dir -> scratch_dir liegt außerhalb und ist damit sicher ausgeschlossen)
|
|
zip_dir_to_pptx(work_dir, output_pptx)
|
|
size_after = output_pptx.stat().st_size
|
|
|
|
# Log schreiben
|
|
try:
|
|
with open(log_file, "w", encoding="utf-8") as f:
|
|
f.writelines(log_lines)
|
|
except Exception:
|
|
pass
|
|
# Summary
|
|
savings_pct = 0.0
|
|
if size_before > 0:
|
|
savings_pct = round(100.0 * (size_before - size_after) / size_before, 2)
|
|
|
|
elapsed = time.perf_counter() - start_time
|
|
|
|
print("\n✅ Fertig!")
|
|
print("Summary")
|
|
print("-------")
|
|
print(f"Version: {__version__}")
|
|
print(f"Name: {output_pptx.name}")
|
|
print(f"Datei-Größe vorher: {human_mb(size_before)} MB")
|
|
print(f"Datei-Größe nachher: {human_mb(size_after)} MB")
|
|
print(f"Ersparnis: {savings_pct}%")
|
|
print(f"Zeit benötigt: {format_duration(elapsed)}")
|
|
print(f"Log-Datei: {log_file}")
|
|
|
|
finally:
|
|
# Aufräumen ALLER temporären Dateien/Ordner
|
|
try:
|
|
shutil.rmtree(work_dir, ignore_errors=True)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
shutil.rmtree(scratch_dir, ignore_errors=True)
|
|
except Exception:
|
|
pass
|
|
# Zusätzlich: ältere Reste entfernen
|
|
cleanup_old_temps()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|