Refactor and UnitTest

This commit is contained in:
2026-04-09 10:10:57 +02:00
parent 332e62b764
commit 698aac0aba
4 changed files with 270 additions and 63 deletions

View File

@@ -32,7 +32,8 @@ from pathlib import Path
from datetime import timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
from typing import List, Optional
from dataclasses import dataclass
from typing import Callable, List, Optional
__version__ = "1.1.6"
@@ -43,6 +44,41 @@ TEMP_PREFIX = "pptx_compress_"
DEFAULT_MIN_SAVINGS = "2%"
@dataclass
class DeckResult:
input: str
output: str
ok: bool = False
size_before: int = 0
size_after: int = 0
elapsed_sec: float = 0.0
error: Optional[str] = None
log_file: Optional[str] = None
@dataclass
class ImageProcessResult:
image_name: str
orig_size: int
chosen_size: int
slide_nr: str
def discover_images(media_dir: Path) -> list[Path]:
images: list[Path] = []
if media_dir.exists():
for f in sorted(media_dir.iterdir()):
if f.is_file() and f.suffix.lower() in ALLOWED_EXT:
images.append(f)
return images
def image_result_to_log_line(image_result: ImageProcessResult) -> str:
saving = image_result.orig_size - image_result.chosen_size
saving_percent = round((saving / image_result.orig_size) * 100, 2) if image_result.orig_size > 0 else 0.0
return f"{image_result.image_name};{human_kb(image_result.orig_size)};{human_kb(image_result.chosen_size)};{human_kb(saving)};{saving_percent};{image_result.slide_nr}\n"
# -------------------- Utilities --------------------
def human_mb(nbytes: int) -> float:
return round(nbytes / (1024 * 1024), 2)
@@ -157,21 +193,56 @@ def get_slide_numbers_for_image(rels_dir: Path, image_filename: str) -> Optional
return slides if slides else None
def process_image_file(
idx: int,
img_path: Path,
scratch_dir: Path,
image_to_slides: dict[str, List[int]],
caesium_threads: int | None,
quality: int,
min_savings: str,
compressor: Callable[[Path, Path, int | None, int, str], Path | None],
) -> ImageProcessResult:
orig_size = img_path.stat().st_size
chosen_size = orig_size
found_in_slide = image_to_slides.get(img_path.name)
slide_nr = "NOT_USED" if found_in_slide is None else str(found_in_slide)
try:
out_sub = scratch_dir / f"img_{idx:06d}"
caesium_out = compressor(img_path, out_sub, caesium_threads, quality, min_savings)
if caesium_out and caesium_out.exists():
s = caesium_out.stat().st_size
if s < orig_size:
tmp_target = img_path.with_suffix(img_path.suffix + ".tmp")
shutil.copy2(caesium_out, tmp_target)
tmp_target.replace(img_path)
chosen_size = s
except Exception:
chosen_size = orig_size
return ImageProcessResult(
image_name=img_path.name,
orig_size=orig_size,
chosen_size=chosen_size,
slide_nr=slide_nr,
)
# -------------------- Core per-deck processing --------------------
def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quality: int, min_savings: str) -> dict:
def process_single_deck(
input_pptx: Path,
output_pptx: Path,
threads: int,
quality: int,
min_savings: str,
compressor: Callable[[Path, Path, int | None, int, str], Path | None] = compress_with_caesium,
) -> DeckResult:
start_time = time.perf_counter()
result = {
"input": str(input_pptx),
"output": str(output_pptx),
"ok": False,
"size_before": 0,
"size_after": 0,
"elapsed_sec": 0.0,
"error": None,
"log_file": None,
}
result = DeckResult(
input=str(input_pptx),
output=str(output_pptx),
)
try:
if not input_pptx.exists() or input_pptx.suffix.lower() != ".pptx":
@@ -188,7 +259,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number\n"]
size_before = input_pptx.stat().st_size
result["size_before"] = size_before
result.size_before = size_before
with zipfile.ZipFile(input_pptx, "r") as z:
z.extractall(work_dir)
@@ -196,19 +267,14 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
slides_dir = work_dir / "ppt" / "slides"
rels_dir = slides_dir / "_rels"
media_dir = work_dir / "ppt" / "media"
images = []
if media_dir.exists():
for f in sorted(media_dir.iterdir()):
if f.is_file() and f.suffix.lower() in ALLOWED_EXT:
images.append(f)
images = discover_images(media_dir)
total = len(images)
print(f"[Processing] {input_pptx.name}: {total} Bild(er) gefunden")
print_progress(0, total)
if not which("caesiumclt"):
if not which("caesiumclt") and compressor is compress_with_caesium:
raise RuntimeError("'caesiumclt' nicht gefunden. Bitte installieren und in PATH verfügbar machen.")
caesium_threads = 1 if threads and threads > 1 else None
@@ -218,36 +284,21 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
def worker(idx: int, img_path: Path):
nonlocal done_count
orig_size = img_path.stat().st_size
chosen_size = orig_size
found_in_slide = None
slide_nr = ""
image_result = process_image_file(
idx=idx,
img_path=img_path,
scratch_dir=scratch_dir,
image_to_slides=image_to_slides,
caesium_threads=caesium_threads,
quality=quality,
min_savings=min_savings,
compressor=compressor,
)
try:
found_in_slide = image_to_slides.get(img_path.name)
if found_in_slide is None:
slide_nr = "NOT_USED"
else:
slide_nr = str(found_in_slide)
out_sub = scratch_dir / f"img_{idx:06d}"
caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads, quality, min_savings)
if caesium_out and caesium_out.exists():
s = caesium_out.stat().st_size
if s < orig_size:
tmp_target = img_path.with_suffix(img_path.suffix + ".tmp")
shutil.copy2(caesium_out, tmp_target)
tmp_target.replace(img_path)
chosen_size = s
except Exception:
chosen_size = orig_size
finally:
saving = orig_size - chosen_size
saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0
with lock:
log_lines.append(f"{img_path.name};{human_kb(orig_size)};{human_kb(chosen_size)};{human_kb(saving)};{saving_percent};{slide_nr}\n")
done_count += 1
print_progress(done_count, total)
with lock:
log_lines.append(image_result_to_log_line(image_result))
done_count += 1
print_progress(done_count, total)
if total > 0:
with ThreadPoolExecutor(max_workers=max(1, threads)) as ex:
@@ -275,7 +326,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
zip_dir_to_pptx(work_dir, output_pptx)
size_after = output_pptx.stat().st_size
result["size_after"] = size_after
result.size_after = size_after
try:
with open(log_file, "w", encoding="utf-8") as f:
@@ -284,9 +335,9 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
pass
elapsed = time.perf_counter() - start_time
result["elapsed_sec"] = elapsed
result["log_file"] = str(log_file)
result["ok"] = True
result.elapsed_sec = elapsed
result.log_file = str(log_file)
result.ok = True
savings_pct = 0.0 if size_before == 0 else round(100.0 * (size_before - size_after) / size_before, 2)
print(f"[OK] Fertig! ({input_pptx.name})")
@@ -298,7 +349,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
print(" Log: ", log_file)
except Exception as e:
result["error"] = str(e)
result.error = str(e)
finally:
try:
shutil.rmtree(work_dir, ignore_errors=True) # type: ignore[name-defined]
@@ -421,13 +472,13 @@ def main():
continue
dst = out_dir / f"{src.stem}_compressed.pptx"
res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings)
if res['ok']:
if res.ok:
successes += 1
overall_before += res['size_before']
overall_after += res['size_after']
overall_before += res.size_before
overall_after += res.size_after
else:
failures += 1
print(f" Fehler: {src.name} -> {res['error']}")
print(f" Fehler: {src.name} -> {res.error}")
else:
src = input_files[0]
if args.output_dir:
@@ -436,13 +487,13 @@ def main():
else:
dst = Path(args.output).resolve() if args.output else src.with_name(f"{src.stem}_compressed.pptx")
res = process_single_deck(src, dst, args.threads, args.quality, args.min_savings)
if res['ok']:
if res.ok:
successes += 1
overall_before += res['size_before']
overall_after += res['size_after']
overall_before += res.size_before
overall_after += res.size_after
else:
failures += 1
print(f" Fehler: {src.name} -> {res['error']}")
print(f" Fehler: {src.name} -> {res.error}")
if batch_mode: