Refactor and UnitTest

Funktionalität min_savings der caesiumclt 1.3.0 implementiert (default: 2%), log-Datei um "Bild in Folien Nr." ergänzt.
2026-04-09 10:10:57 +02:00 · 2026-04-09 09:40:19 +02:00
4 changed files with 309 additions and 94 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ python-3.*-embed-amd64.zip
 python-embed/*
 .vscode/launch.json
 logs/*.log
 __pycache__/*
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,11 @@
 {
    "python.testing.unittestArgs": [
        "-v",
        "-s",
        ".",
        "-p",
        "test_*.py"
    ],
    "python.testing.pytestEnabled": false,
    "python.testing.unittestEnabled": true
 }
--- a/pptx_image_compress.py
+++ b/pptx_image_compress.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 """
 PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup)
-Version: 1.1.4
+Version: 1.1.6
 Highlights:
@@ -12,8 +12,8 @@ Highlights:
 - Log: image_name,size_before,size_after,saving,saving_percent
 - Summary inkl. Zeit benötigt
-Änderungen in 1.1.4:
+Änderungen in 1.1.6:
- Libcaesium 1.1.0 kann nun auch gif verkleinern
+- Libcaesium 1.3.0 kann nun auch files ignorieren, wenn die Kompression kleiner als <MIN_SAVING> ist
 """
 import argparse
@@ -32,14 +32,51 @@ from pathlib import Path
 from datetime import timedelta
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from threading import Lock
-from typing import List, Optional
+from dataclasses import dataclass
 from typing import Callable, List, Optional
-__version__ = "1.1.4"
+__version__ = "1.1.6"
 ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
 PROGRESS_BAR_LEN = 40
 TEMP_PREFIX = "pptx_compress_"
 DEFAULT_MIN_SAVINGS = "2%"
@dataclass
 class DeckResult:
    input: str
    output: str
    ok: bool = False
    size_before: int = 0
    size_after: int = 0
    elapsed_sec: float = 0.0
    error: Optional[str] = None
    log_file: Optional[str] = None
@dataclass
 class ImageProcessResult:
    image_name: str
    orig_size: int
    chosen_size: int
    slide_nr: str
 def discover_images(media_dir: Path) -> list[Path]:
    images: list[Path] = []
    if media_dir.exists():
        for f in sorted(media_dir.iterdir()):
            if f.is_file() and f.suffix.lower() in ALLOWED_EXT:
                images.append(f)
    return images
 def image_result_to_log_line(image_result: ImageProcessResult) -> str:
    saving = image_result.orig_size - image_result.chosen_size
    saving_percent = round((saving / image_result.orig_size) * 100, 2) if image_result.orig_size > 0 else 0.0
    return f"{image_result.image_name};{human_kb(image_result.orig_size)};{human_kb(image_result.chosen_size)};{human_kb(saving)};{saving_percent};{image_result.slide_nr}\n"
 # -------------------- Utilities --------------------
@@ -89,7 +126,7 @@ def zip_dir_to_pptx(src_dir: Path, out_pptx: Path):
 def which(cmd: str):
    return shutil.which(cmd)
-def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None, quality: int) -> Path | None:
+def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str) -> Path | None:
    exe = which("caesiumclt")
    if not exe:
        raise RuntimeError("[ERROR] 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen.")
@@ -97,7 +134,7 @@ def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int |
    ext = original.suffix.lower()
    if ext not in {".jpg", ".jpeg", ".png", ".webp", ".gif"}:
        return None
-    cmd = [exe, "-q", str(quality), "-O", "bigger", "-o", str(out_dir)]
+    cmd = [exe, "-q", str(quality), "-O", "bigger", "--min-savings", min_savings, "-o", str(out_dir)]
    if caesium_threads is not None:
        cmd += ["--threads", str(caesium_threads)]
    cmd += [str(original)]
@@ -121,51 +158,91 @@ def format_duration(seconds: float) -> str:
        return f"{hms}.{frac[:2]}"
    return base
-def get_slide_numbers_for_image(rels_dir: str, image_filename: str) -> Optional[List[int]]:
+def build_image_slide_index(rels_dir: Path) -> dict[str, List[int]]:
-    """
+    if not rels_dir.exists() or not rels_dir.is_dir():
-    Durchsucht alle .rels-Dateien im angegebenen Verzeichnis und gibt die Slide-Nummern zurück,
+        return {}
    in denen die angegebene Bilddatei referenziert wird.
-    :param rels_dir: Pfad zum Verzeichnis ppt/slides/_rels
+    image_to_slides: dict[str, set[int]] = {}
    :param image_filename: z. B. 'image80.png'
    :return: Liste von Slide-Nummern oder None
    """
    slide_numbers = []
-    for rels_file in os.listdir(rels_dir):
+    for rels_path in rels_dir.iterdir():
-        if rels_file.startswith("slide") and rels_file.endswith(".xml.rels"):
+        rels_file = rels_path.name
-            rels_path = os.path.join(rels_dir, rels_file)
+        if rels_file.startswith("slide") and rels_file.endswith(".xml.rels") and rels_path.is_file():
            match = re.search(r"slide(\d+)\.xml\.rels$", rels_file)
            if not match:
                continue
            slide_number = int(match.group(1))
            try:
                tree = ET.parse(rels_path)
                root = tree.getroot()
                for rel in root.findall(".//{http://schemas.openxmlformats.org/package/2006/relationships}Relationship"):
                    target = rel.attrib.get("Target", "")
-                    if image_filename in target:
+                    image_name = Path(target).name
-                        match = re.search(r"slide(\d+).xml.rels", rels_file)
+                    if image_name:
-                        if match:
+                        if image_name not in image_to_slides:
-                            slide_number = int(match.group(1))
+                            image_to_slides[image_name] = set()
-                            slide_numbers.append(slide_number)
+                        image_to_slides[image_name].add(slide_number)
-            except ET.ParseError:
+            except (ET.ParseError, OSError):
-                print(f"Fehler beim Parsen von {rels_file}")
+                print(f"Fehler beim Lesen von {rels_file}")
-    return slide_numbers if slide_numbers else None
+    return {img: sorted(slides) for img, slides in image_to_slides.items()}
 def get_slide_numbers_for_image(rels_dir: Path, image_filename: str) -> Optional[List[int]]:
    image_to_slides = build_image_slide_index(rels_dir)
    slides = image_to_slides.get(image_filename)
    return slides if slides else None
 def process_image_file(
    idx: int,
    img_path: Path,
    scratch_dir: Path,
    image_to_slides: dict[str, List[int]],
    caesium_threads: int | None,
    quality: int,
    min_savings: str,
    compressor: Callable[[Path, Path, int | None, int, str], Path | None],
 ) -> ImageProcessResult:
    orig_size = img_path.stat().st_size
    chosen_size = orig_size
    found_in_slide = image_to_slides.get(img_path.name)
    slide_nr = "NOT_USED" if found_in_slide is None else str(found_in_slide)
    try:
        out_sub = scratch_dir / f"img_{idx:06d}"
        caesium_out = compressor(img_path, out_sub, caesium_threads, quality, min_savings)
        if caesium_out and caesium_out.exists():
            s = caesium_out.stat().st_size
            if s < orig_size:
                tmp_target = img_path.with_suffix(img_path.suffix + ".tmp")
                shutil.copy2(caesium_out, tmp_target)
                tmp_target.replace(img_path)
                chosen_size = s
    except Exception:
        chosen_size = orig_size
    return ImageProcessResult(
        image_name=img_path.name,
        orig_size=orig_size,
        chosen_size=chosen_size,
        slide_nr=slide_nr,
    )
 # -------------------- Core per-deck processing --------------------
-def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quality: int) -> dict:
+def process_single_deck(
    input_pptx: Path,
    output_pptx: Path,
    threads: int,
    quality: int,
    min_savings: str,
    compressor: Callable[[Path, Path, int | None, int, str], Path | None] = compress_with_caesium,
 ) -> DeckResult:
    start_time = time.perf_counter()
-    result = {
+    result = DeckResult(
-        "input": str(input_pptx),
+        input=str(input_pptx),
-        "output": str(output_pptx),
+        output=str(output_pptx),
-        "ok": False,
+    )
        "size_before": 0,
        "size_after": 0,
        "elapsed_sec": 0.0,
        "error": None,
        "log_file": None,
    }
    try:
        if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx":
@@ -182,63 +259,44 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
        log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number\n"]
        size_before = input_pptx.stat().st_size
-        result["size_before"] = size_before
+        result.size_before = size_before
        with zipfile.ZipFile(input_pptx, "r") as z:
            z.extractall(work_dir)
        slides_dir  = work_dir / "ppt" / "slides"
        rels_dir = slides_dir / "_rels"
        media_dir = work_dir / "ppt" / "media"
-        images = []
+        images = discover_images(media_dir)
        if media_dir.exists():
            for f in sorted(media_dir.iterdir()):
                if f.is_file() and f.suffix.lower() in ALLOWED_EXT:
                    images.append(f)
        total = len(images)
        print(f"[Processing] {input_pptx.name}: {total} Bild(er) gefunden")
        print_progress(0, total)
-        if not which("caesiumclt"):
+        if not which("caesiumclt") and compressor is compress_with_caesium:
            raise RuntimeError("'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.")
        caesium_threads = 1 if threads and threads > 1 else None
        lock = Lock()
        done_count = 0
        image_to_slides = build_image_slide_index(rels_dir)
        def worker(idx: int, img_path: Path):
            nonlocal done_count
-            ext = img_path.suffix.lower()
+            image_result = process_image_file(
-            orig_size = img_path.stat().st_size
+                idx=idx,
-            chosen_size = orig_size
+                img_path=img_path,
-            found_in_slide=None
+                scratch_dir=scratch_dir,
-            slide_nr=""
+                image_to_slides=image_to_slides,
-
+                caesium_threads=caesium_threads,
-            try:
+                quality=quality,
-                found_in_slide = get_slide_numbers_for_image(slides_dir.name, img_path.name) 
+                min_savings=min_savings,
-                if found_in_slide is None:
+                compressor=compressor,
-                    slide_nr = "NOT_USED"
+            )
                else:
                    slide_nr = str(found_in_slide)
                out_sub = scratch_dir / f"img_{idx:06d}"
                caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads, quality)
                if caesium_out and caesium_out.exists():
                    s = caesium_out.stat().st_size
                    if s < orig_size:
                        tmp_target = img_path.with_suffix(img_path.suffix + ".tmp")
                        shutil.copy2(caesium_out, tmp_target)
                        tmp_target.replace(img_path)
                        chosen_size = s
            except Exception:
                chosen_size = orig_size
            finally:
                saving = orig_size - chosen_size
                saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0
            with lock:
-                    log_lines.append(f"{img_path.name};{human_kb(orig_size)};{human_kb(chosen_size)};{human_kb(saving)};{saving_percent};{slide_nr}\n")
+                log_lines.append(image_result_to_log_line(image_result))
                done_count += 1
                print_progress(done_count, total)
@@ -268,7 +326,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
        zip_dir_to_pptx(work_dir, output_pptx)
        size_after = output_pptx.stat().st_size
-        result["size_after"] = size_after
+        result.size_after = size_after
        try:
            with open(log_file, "w", encoding="utf-8") as f:
@@ -277,9 +335,9 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
            pass
        elapsed = time.perf_counter() - start_time
-        result["elapsed_sec"] = elapsed
+        result.elapsed_sec = elapsed
-        result["log_file"] = str(log_file)
+        result.log_file = str(log_file)
-        result["ok"] = True
+        result.ok = True
        savings_pct = 0.0 if size_before == 0 else round(100.0 * (size_before - size_after) / size_before, 2)
        print(f"[OK] Fertig!  ({input_pptx.name})")
@@ -291,7 +349,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
        print("  Log:       ", log_file)
    except Exception as e:
-        result["error"] = str(e)
+        result.error = str(e)
    finally:
        try:
            shutil.rmtree(work_dir, ignore_errors=True)  # type: ignore[name-defined]
@@ -359,15 +417,16 @@ def main():
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument('-i','--input', nargs='*', help='Input-PPTX (eine oder mehrere, Wildcards erlaubt). Bei mehreren: -O erforderlich.')
    parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)')
    parser.add_argument('-o','--output', help='Output-PPTX (nur Single-Mode)')
    parser.add_argument('-O','--output-dir', help='Output-Verzeichnis (erforderlich für Batch)')
    parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)')
    parser.add_argument('--pattern', default='*.pptx', help='Dateimuster für --input-dir')
    parser.add_argument('--recursive', action='store_true', help='Rekursiv in --input-dir suchen')
    #parser.add_argument('-t','--threads', type=int, default=min(32, os.cpu_count() or 4), help='Anzahl paralleler Threads pro Datei')
    parser.add_argument('-t','--threads', type=int, default=16, help='Anzahl paralleler Threads pro Datei')
    parser.add_argument('-q','--quality', type=int, default=90, help='Qualität für caesiumclt (0..100), höher = bessere Qualität / größere Datei')
-    parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}')
+    parser.add_argument('--min-savings', default=DEFAULT_MIN_SAVINGS, help="Mindestersparnis für caesiumclt (z. B. 2%%, 100KB, 1MB oder Bytes als Zahl)")
    parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}',  help="Zeigt die Versionsnummer an" )
    args = parser.parse_args()
@@ -412,14 +471,14 @@ def main():
                failures += 1
                continue
            dst = out_dir / f"{src.stem}_compressed.pptx"
-            res = process_single_deck(src, dst, args.threads, args.quality)
+            res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings)
-            if res['ok']:
+            if res.ok:
                successes += 1
-                overall_before += res['size_before']
+                overall_before += res.size_before
-                overall_after += res['size_after']
+                overall_after += res.size_after
            else:
                failures += 1
-                print(f"  Fehler: {src.name} -> {res['error']}")
+                print(f"  Fehler: {src.name} -> {res.error}")
    else:
        src = input_files[0]
        if args.output_dir:
@@ -427,14 +486,14 @@ def main():
            dst = Path(args.output_dir) / f"{src.stem}_compressed.pptx"
        else:
            dst = Path(args.output).resolve() if args.output else src.with_name(f"{src.stem}_compressed.pptx")
-        res = process_single_deck(src, dst, args.threads, args.quality)
+        res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings)
-        if res['ok']:
+        if res.ok:
            successes += 1
-            overall_before += res['size_before']
+            overall_before += res.size_before
-            overall_after += res['size_after']
+            overall_after += res.size_after
        else:
            failures += 1
-            print(f"  Fehler: {src.name} -> {res['error']}")
+            print(f"  Fehler: {src.name} -> {res.error}")
    if batch_mode:
--- a/test_pptx_image_compress.py
+++ b/test_pptx_image_compress.py
@@ -0,0 +1,144 @@
 import tempfile
 import unittest
 import zipfile
 from pathlib import Path
 import pptx_image_compress as pic
 class TestPptxImageCompress(unittest.TestCase):
    def test_discover_images_filters_extensions(self):
        with tempfile.TemporaryDirectory() as td:
            media_dir = Path(td)
            (media_dir / "a.jpg").write_bytes(b"1")
            (media_dir / "b.png").write_bytes(b"1")
            (media_dir / "c.txt").write_bytes(b"1")
            (media_dir / "d.GIF").write_bytes(b"1")
            images = pic.discover_images(media_dir)
            self.assertEqual([p.name for p in images], ["a.jpg", "b.png", "d.GIF"])
    def test_image_result_to_log_line(self):
        image_result = pic.ImageProcessResult(
            image_name="image1.png",
            orig_size=1000,
            chosen_size=800,
            slide_nr="[1, 2]",
        )
        line = pic.image_result_to_log_line(image_result)
        self.assertIn("image1.png", line)
        self.assertIn("[1, 2]", line)
        self.assertIn("20.0", line)
    def test_process_image_file_replaces_when_smaller(self):
        with tempfile.TemporaryDirectory() as td:
            root = Path(td)
            img = root / "image1.png"
            img.write_bytes(b"A" * 100)
            scratch = root / "scratch"
            def fake_compressor(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str):
                out_dir.mkdir(parents=True, exist_ok=True)
                out = out_dir / original.name
                out.write_bytes(b"B" * 40)
                return out
            result = pic.process_image_file(
                idx=1,
                img_path=img,
                scratch_dir=scratch,
                image_to_slides={"image1.png": [1]},
                caesium_threads=1,
                quality=90,
                min_savings="2%",
                compressor=fake_compressor,
            )
            self.assertEqual(result.chosen_size, 40)
            self.assertEqual(img.stat().st_size, 40)
            self.assertEqual(result.slide_nr, "[1]")
    def test_process_image_file_keeps_original_when_bigger(self):
        with tempfile.TemporaryDirectory() as td:
            root = Path(td)
            img = root / "image1.png"
            img.write_bytes(b"A" * 100)
            scratch = root / "scratch"
            def fake_compressor(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str):
                out_dir.mkdir(parents=True, exist_ok=True)
                out = out_dir / original.name
                out.write_bytes(b"B" * 120)
                return out
            result = pic.process_image_file(
                idx=1,
                img_path=img,
                scratch_dir=scratch,
                image_to_slides={},
                caesium_threads=1,
                quality=90,
                min_savings="2%",
                compressor=fake_compressor,
            )
            self.assertEqual(result.chosen_size, 100)
            self.assertEqual(img.stat().st_size, 100)
            self.assertEqual(result.slide_nr, "NOT_USED")
    def test_process_single_deck_with_injected_compressor(self):
        with tempfile.TemporaryDirectory() as td:
            root = Path(td)
            input_pptx = root / "input.pptx"
            output_pptx = root / "output.pptx"
            source_tree = root / "src"
            rels_dir = source_tree / "ppt" / "slides" / "_rels"
            media_dir = source_tree / "ppt" / "media"
            rels_dir.mkdir(parents=True, exist_ok=True)
            media_dir.mkdir(parents=True, exist_ok=True)
            rels_xml = (
                "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
                "<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">"
                "<Relationship Id=\"rId2\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image\" Target=\"../media/image1.png\"/>"
                "</Relationships>"
            )
            (rels_dir / "slide1.xml.rels").write_text(rels_xml, encoding="utf-8")
            (media_dir / "image1.png").write_bytes(b"A" * 100)
            with zipfile.ZipFile(input_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z:
                for p in source_tree.rglob("*"):
                    if p.is_file():
                        z.write(p, arcname=str(p.relative_to(source_tree)))
            def fake_compressor(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str):
                out_dir.mkdir(parents=True, exist_ok=True)
                out = out_dir / original.name
                out.write_bytes(b"B" * 50)
                return out
            result = pic.process_single_deck(
                input_pptx=input_pptx,
                output_pptx=output_pptx,
                threads=2,
                quality=90,
                min_savings="2%",
                compressor=fake_compressor,
            )
            self.assertTrue(result.ok)
            self.assertEqual(result.error, None)
            self.assertTrue(output_pptx.exists())
            self.assertIsNotNone(result.log_file)
            with zipfile.ZipFile(output_pptx, "r") as z:
                out_image = z.read("ppt/media/image1.png")
            self.assertEqual(len(out_image), 50)
            log_file = result.log_file
            if log_file is None:
                self.fail("log_file should not be None")
            log_text = Path(log_file).read_text(encoding="utf-8")
            self.assertIn("image1.png", log_text)
            self.assertIn("[1]", log_text)
 if __name__ == "__main__":
    unittest.main()
Author	SHA1	Message	Date
Frank Conrads	698aac0aba	Refactor and UnitTest	2026-04-09 10:10:57 +02:00
Frank Conrads	332e62b764	Funktionalität min_savings der caesiumclt 1.3.0 implementiert (default: 2%), log-Datei um "Bild in Folien Nr." ergänzt.	2026-04-09 09:40:19 +02:00