Version 1.1.7 fügt PNG to JPEG Funktion hinzu. Ist das komprimierte PNG immer noch >500 KB wird versucht, dies als JPEG mit quality=90 zu konvertieren, das ist oft sehr viel besser komprimiert

This commit is contained in:
2026-06-03 14:24:25 +02:00
parent d779821198
commit 337bf1e97b
4 changed files with 319 additions and 50 deletions
+2 -1
View File
@@ -2,4 +2,5 @@ python-3.*-embed-amd64.zip
python-embed/*
.vscode/launch.json
logs/*.log
__pycache__/*
__pycache__/*
.coverage
+7 -2
View File
@@ -1,6 +1,6 @@
# PPTX Image Compressor (CaesiumCLT only)
**Version 1.1.6**
**Version 1.1.7**
Dieses Paket enthält:
@@ -32,11 +32,16 @@ Die Batch lädt bei Bedarf automatisch das **Windows Embeddable Python Package**
- Entpackt die PPTX in einen TempOrdner
- Komprimiert **JPG/JPEG, PNG, WebP, GIF** mit **CaesiumCLT** (Default `-q 90`, `-O bigger`)
- Ersetzt Bilder nur, wenn die komprimierte Datei kleiner ist
- Versucht bei PNG zusätzlich einen PNG->JPG Wechsel, wenn das Bild nach Kompression noch größer als 500 KB ist
- Ersetzt Bilder nur, wenn sei mindestens 2% kleiner sind (verhindert *doppelte Komprimierung*)
- Schreibt ein CSVLog (`.log` neben der OutputPPTX)
- Baut eine neue PPTX und zeigt eine Summary (Name, Größe vorher/nachher, Ersparnis %, Zeit)
Baut eine neue PPTX und zeigt eine Summary (Name, Größe vorher/nachher, Ersparnis %, Zeit)
- Räumt alle temporären Dateien auf (keine CaesiumTempfiles in der finalen PPTX)
## Änderungen in 1.1.7
- PNG->JPG Fallback für große PNGs (> 500 KB nach Kompression) hinzugefügt
- CSV-Logging um `image_type_changed` erweitert (`png_jpg` bei Typwechsel)
## Hinweise
- `-t` steuert die Parallelität der PythonThreads; intern wird `caesiumclt --threads 1` gesetzt, sobald `-t > 1`, um Oversubscription zu vermeiden. Default ist 16
- `-q` steuert das Qualitätslevel; intern wird `caesiumclt -q` mit diesem Wert von `0..100` benutzt, Default ist 90
+206 -47
View File
@@ -2,21 +2,26 @@
# -*- coding: utf-8 -*-
"""
PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup)
Version: 1.1.6
Version: 1.1.7
Highlights:
- Caesium-Scratch außerhalb des PPTX-Arbeitsverzeichnisses -> keine Tempfiles in finaler PPTX
- Safety-Cleanup: entfernt 'caesium*' Ordner und '*.tmp' in ppt/media, bevor gezippt wird
- Overwrite Policy: -O bigger
- Log: image_name,size_before,size_after,saving,saving_percent
- Log: image_name,size_before,size_after,saving,saving_percent,in_slide_number,image_type_changed
- Summary inkl. Zeit benötigt
Änderungen in 1.1.7:
- PNG->JPG Fallback für große PNGs hinzugefügt (wenn nach Kompression weiterhin > 500 KB)
- Logging erweitert: neue Spalte image_type_changed mit Wert png_jpg bei Typwechsel
Änderungen in 1.1.6:
- Libcaesium 1.3.0 kann nun auch files ignorieren, wenn die Kompression kleiner als <MIN_SAVING> ist
"""
import argparse
import inspect
import os
import re
import xml.etree.ElementTree as ET
@@ -36,12 +41,14 @@ from dataclasses import dataclass
from typing import Callable, List, Optional
__version__ = "1.1.6"
__version__ = "1.1.7"
ALLOWED_EXT = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
PROGRESS_BAR_LEN = 40
TEMP_PREFIX = "pptx_compress_"
DEFAULT_MIN_SAVINGS = "2%"
PNG_TO_JPEG_THRESHOLD_BYTES = 500 * 1024
@dataclass
@@ -62,6 +69,7 @@ class ImageProcessResult:
orig_size: int
chosen_size: int
slide_nr: str
image_type_changed: str = ""
def discover_images(media_dir: Path) -> list[Path]:
@@ -76,7 +84,7 @@ def discover_images(media_dir: Path) -> list[Path]:
def image_result_to_log_line(image_result: ImageProcessResult) -> str:
saving = image_result.orig_size - image_result.chosen_size
saving_percent = round((saving / image_result.orig_size) * 100, 2) if image_result.orig_size > 0 else 0.0
return f"{image_result.image_name};{human_kb(image_result.orig_size)};{human_kb(image_result.chosen_size)};{human_kb(saving)};{saving_percent};{image_result.slide_nr}\n"
return f"{image_result.image_name};{human_kb(image_result.orig_size)};{human_kb(image_result.chosen_size)};{human_kb(saving)};{saving_percent};{image_result.slide_nr};{image_result.image_type_changed}\n"
# -------------------- Utilities --------------------
@@ -132,7 +140,14 @@ def zip_dir_to_pptx(src_dir: Path, out_pptx: Path):
def which(cmd: str):
return shutil.which(cmd)
def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str) -> Path | None:
def compress_with_caesium(
original: Path,
out_dir: Path,
caesium_threads: int | None,
quality: int,
min_savings: str,
output_format: str = "original",
) -> Path | None:
exe = which("caesiumclt")
if not exe:
raise RuntimeError("[ERROR] 'caesiumclt' wurde nicht gefunden. Bitte CaesiumCLT installieren und in PATH verfügbar machen.")
@@ -140,7 +155,19 @@ def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int |
ext = original.suffix.lower()
if ext not in ALLOWED_EXT:
return None
cmd = [exe, "-q", str(quality), "-O", "bigger", "--min-savings", min_savings, "-o", str(out_dir)]
cmd = [
exe,
"-q",
str(quality),
"-O",
"bigger",
"--min-savings",
min_savings,
"--format",
output_format,
"-o",
str(out_dir),
]
if caesium_threads is not None:
cmd += ["--threads", str(caesium_threads)]
cmd += [str(original)]
@@ -149,12 +176,94 @@ def compress_with_caesium(original: Path, out_dir: Path, caesium_threads: int |
if r.returncode != 0:
sys.stderr.write(f"[caesiumclt] Fehler bei {original.name}:{r.stderr}")
return None
if output_format == "jpeg":
jpg_out = out_dir / f"{original.stem}.jpg"
jpeg_out = out_dir / f"{original.stem}.jpeg"
if jpg_out.exists():
return jpg_out
if jpeg_out.exists():
return jpeg_out
return None
out_file = out_dir / original.name
return out_file if out_file.exists() else None
except Exception as ex:
sys.stderr.write(f"[caesiumclt] Ausnahme bei {original.name}: {ex}")
return None
def compressor_accepts_output_format(compressor: Callable[..., Path | None]) -> bool:
if compressor is compress_with_caesium:
return True
try:
signature = inspect.signature(compressor)
except (TypeError, ValueError):
return False
return "output_format" in signature.parameters
def run_compressor(
compressor: Callable[..., Path | None],
original: Path,
out_dir: Path,
caesium_threads: int | None,
quality: int,
min_savings: str,
output_format: str = "original",
) -> Path | None:
if output_format != "original" and not compressor_accepts_output_format(compressor):
return None
if compressor is compress_with_caesium:
return compressor(original, out_dir, caesium_threads, quality, min_savings, output_format)
if compressor_accepts_output_format(compressor):
return compressor(original, out_dir, caesium_threads, quality, min_savings, output_format)
return compressor(original, out_dir, caesium_threads, quality, min_savings)
def update_relationship_targets(work_dir: Path, old_name: str, new_name: str) -> None:
rels_namespace = "{http://schemas.openxmlformats.org/package/2006/relationships}Relationship"
for rels_file in work_dir.rglob("*.rels"):
try:
tree = ET.parse(rels_file)
root = tree.getroot()
changed = False
for rel in root.findall(f".//{rels_namespace}"):
target = rel.attrib.get("Target", "")
if Path(target).name == old_name:
rel.attrib["Target"] = re.sub(r"[^/\\]+$", new_name, target)
changed = True
if changed:
tree.write(rels_file, encoding="utf-8", xml_declaration=True)
except (ET.ParseError, OSError):
continue
def ensure_jpg_content_type(work_dir: Path) -> None:
content_types_path = work_dir / "[Content_Types].xml"
if not content_types_path.exists():
return
content_ns = "{http://schemas.openxmlformats.org/package/2006/content-types}"
try:
tree = ET.parse(content_types_path)
root = tree.getroot()
has_jpg_default = False
for default in root.findall(f"{content_ns}Default"):
ext = default.attrib.get("Extension", "").lower()
if ext == "jpg":
has_jpg_default = True
break
if not has_jpg_default:
ET.SubElement(
root,
f"{content_ns}Default",
{
"Extension": "jpg",
"ContentType": "image/jpeg",
},
)
tree.write(content_types_path, encoding="utf-8", xml_declaration=True)
except (ET.ParseError, OSError):
return
def format_duration(seconds: float) -> str:
total_ms = int(round(seconds * 1000))
td = timedelta(milliseconds=total_ms)
@@ -201,34 +310,69 @@ def process_image_file(
caesium_threads: int | None,
quality: int,
min_savings: str,
compressor: Callable[[Path, Path, int | None, int, str], Path | None],
compressor: Callable[..., Path | None],
) -> ImageProcessResult:
orig_size = img_path.stat().st_size
chosen_size = orig_size
chosen_name = img_path.name
image_type_changed = ""
found_in_slide = image_to_slides.get(img_path.name)
slide_nr = "NOT_USED" if found_in_slide is None else str(found_in_slide)
try:
out_sub = scratch_dir / f"img_{idx:06d}"
caesium_out = compressor(img_path, out_sub, caesium_threads, quality, min_savings)
caesium_out = run_compressor(
compressor=compressor,
original=img_path,
out_dir=out_sub,
caesium_threads=caesium_threads,
quality=quality,
min_savings=min_savings,
)
if caesium_out and caesium_out.exists():
s = caesium_out.stat().st_size
if s < orig_size:
compressed_size = caesium_out.stat().st_size
if compressed_size < orig_size:
tmp_target = img_path.with_suffix(img_path.suffix + ".tmp")
shutil.copy2(caesium_out, tmp_target)
tmp_target.replace(img_path)
chosen_size = s
chosen_size = compressed_size
if img_path.suffix.lower() == ".png" and chosen_size > PNG_TO_JPEG_THRESHOLD_BYTES:
jpg_candidate = run_compressor(
compressor=compressor,
original=img_path,
out_dir=out_sub,
caesium_threads=caesium_threads,
quality=quality,
min_savings="0%",
output_format="jpeg",
)
if jpg_candidate and jpg_candidate.exists():
jpg_size = jpg_candidate.stat().st_size
if jpg_size < chosen_size:
jpg_target = img_path.with_suffix(".jpg")
tmp_target = jpg_target.with_suffix(".jpg.tmp")
shutil.copy2(jpg_candidate, tmp_target)
tmp_target.replace(jpg_target)
img_path.unlink(missing_ok=True)
chosen_size = jpg_size
chosen_name = jpg_target.name
image_type_changed = "png_jpg"
except Exception:
chosen_size = orig_size
chosen_name = img_path.name
image_type_changed = ""
return ImageProcessResult(
image_name=img_path.name,
image_name=chosen_name,
orig_size=orig_size,
chosen_size=chosen_size,
slide_nr=slide_nr,
image_type_changed=image_type_changed,
)
# -------------------- Core per-deck processing --------------------
def process_single_deck(
input_pptx: Path,
@@ -236,7 +380,7 @@ def process_single_deck(
threads: int,
quality: int,
min_savings: str,
compressor: Callable[[Path, Path, int | None, int, str], Path | None] = compress_with_caesium,
compressor: Callable[..., Path | None] = compress_with_caesium,
) -> DeckResult:
start_time = time.perf_counter()
result = DeckResult(
@@ -257,7 +401,7 @@ def process_single_deck(
scratch_dir = Path(tempfile.mkdtemp(prefix=TEMP_PREFIX + "scratch_"))
log_file = output_pptx.with_suffix(".log.csv")
ensure_clean_file(log_file)
log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number\n"]
log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number;image_type_changed\n"]
size_before = input_pptx.stat().st_size
result.size_before = size_before
@@ -282,6 +426,7 @@ def process_single_deck(
lock = Lock()
done_count = 0
image_to_slides = build_image_slide_index(rels_dir)
renamed_images: list[tuple[str, str]] = []
def worker(idx: int, img_path: Path):
nonlocal done_count
@@ -297,6 +442,8 @@ def process_single_deck(
)
with lock:
if image_result.image_name != img_path.name:
renamed_images.append((img_path.name, image_result.image_name))
log_lines.append(image_result_to_log_line(image_result))
done_count += 1
print_progress(done_count, total)
@@ -310,6 +457,11 @@ def process_single_deck(
except Exception as exc:
sys.stderr.write(f"[worker] Unerwarteter Fehler: {exc}\n")
if renamed_images:
for old_name, new_name in renamed_images:
update_relationship_targets(work_dir, old_name, new_name)
ensure_jpg_content_type(work_dir)
print() # newline
# Safety cleanup inside work_dir
@@ -412,39 +564,8 @@ def collect_from_dir(input_dir: Path, pattern: str, recursive: bool) -> list[Pat
# -------------------- CLI --------------------
def main():
parser = argparse.ArgumentParser(
description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup)",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument('-i','--input', nargs='*', help='Input-PPTX (eine oder mehrere, Wildcards erlaubt). Bei mehreren: -O erforderlich.')
parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)')
parser.add_argument('-o','--output', help='Output-PPTX (nur Single-Mode)')
parser.add_argument('-O','--output-dir', help='Output-Verzeichnis (erforderlich für Batch)')
parser.add_argument('--pattern', default='*.pptx', help='Dateimuster für --input-dir')
parser.add_argument('--recursive', action='store_true', help='Rekursiv in --input-dir suchen')
#parser.add_argument('-t','--threads', type=int, default=min(32, os.cpu_count() or 4), help='Anzahl paralleler Threads pro Datei')
parser.add_argument('-t','--threads', type=int, default=16, help='Anzahl paralleler Threads pro Datei')
parser.add_argument('-q','--quality', type=int, default=90, help='Qualität für caesiumclt (0..100), höher = bessere Qualität / größere Datei')
parser.add_argument('--min-savings', default=DEFAULT_MIN_SAVINGS, help="Mindestersparnis für caesiumclt (z. B. 2%%, 100KB, 1MB oder Bytes als Zahl)")
parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help="Zeigt die Versionsnummer an" )
args = parser.parse_args()
print("Threads used: ", args.threads," Threads")
if args.quality < 0 or args.quality > 100:
print('[ERROR] Ungültige Qualität. Erlaubt: 0..100')
sys.exit(1)
input_files: list[Path] = []
if args.input:
input_files.extend(expand_inputs(args.input))
if args.input_dir:
input_files.extend(collect_from_dir(Path(args.input_dir), args.pattern, args.recursive))
if len(input_files) == 0:
parser.print_help()
sys.exit(1)
parser, args = extractParserArguments()
input_files = validateParserArguments(parser, args)
batch_mode = len(input_files) > 1
@@ -511,6 +632,44 @@ def main():
print(f"Gesamtgröße nachher: {human_mb(overall_after)} MB")
print(f"Gesamt-Ersparnis: {pct}%")
def validateParserArguments(parser, args):
print("Threads used: ", args.threads," Threads")
if args.quality < 0 or args.quality > 100:
print('[ERROR] Ungültige Qualität. Erlaubt: 0..100')
sys.exit(1)
input_files: list[Path] = []
if args.input:
input_files.extend(expand_inputs(args.input))
if args.input_dir:
input_files.extend(collect_from_dir(Path(args.input_dir), args.pattern, args.recursive))
if len(input_files) == 0:
parser.print_help()
sys.exit(1)
return input_files
def extractParserArguments():
parser = argparse.ArgumentParser(
description="PPTX Grafik-Komprimier-Tool (nur CaesiumCLT, Multi-Thread, Batch, sauberes Cleanup)",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument('-i','--input', nargs='*', help='Input-PPTX (eine oder mehrere, Wildcards erlaubt). Bei mehreren: -O erforderlich.')
parser.add_argument('--input-dir', help='Eingabe-Verzeichnis (optional, für Batch)')
parser.add_argument('-o','--output', help='Output-PPTX (nur Single-Mode)')
parser.add_argument('-O','--output-dir', help='Output-Verzeichnis (erforderlich für Batch)')
parser.add_argument('--pattern', default='*.pptx', help='Dateimuster für --input-dir')
parser.add_argument('--recursive', action='store_true', help='Rekursiv in --input-dir suchen')
#parser.add_argument('-t','--threads', type=int, default=min(32, os.cpu_count() or 4), help='Anzahl paralleler Threads pro Datei')
parser.add_argument('-t','--threads', type=int, default=16, help='Anzahl paralleler Threads pro Datei')
parser.add_argument('-q','--quality', type=int, default=90, help='Qualität für caesiumclt (0..100), höher = bessere Qualität / größere Datei')
parser.add_argument('--min-savings', default=DEFAULT_MIN_SAVINGS, help="Mindestersparnis für caesiumclt (z. B. 2%%, 100KB, 1MB oder Bytes als Zahl)")
parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}', help="Zeigt die Versionsnummer an" )
args = parser.parse_args()
return parser,args
+104
View File
@@ -23,11 +23,13 @@ class TestPptxImageCompress(unittest.TestCase):
orig_size=1000,
chosen_size=800,
slide_nr="[1, 2]",
image_type_changed="",
)
line = pic.image_result_to_log_line(image_result)
self.assertIn("image1.png", line)
self.assertIn("[1, 2]", line)
self.assertIn("20.0", line)
self.assertTrue(line.endswith(";\n"))
def test_process_image_file_replaces_when_smaller(self):
with tempfile.TemporaryDirectory() as td:
@@ -57,6 +59,40 @@ class TestPptxImageCompress(unittest.TestCase):
self.assertEqual(img.stat().st_size, 40)
self.assertEqual(result.slide_nr, "[1]")
def test_process_image_file_converts_large_png_to_jpg(self):
with tempfile.TemporaryDirectory() as td:
root = Path(td)
img = root / "image1.png"
img.write_bytes(b"A" * 800000)
scratch = root / "scratch"
def fake_compressor(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str, output_format: str = "original"):
out_dir.mkdir(parents=True, exist_ok=True)
if output_format == "jpeg":
out = out_dir / "image1.jpg"
out.write_bytes(b"C" * 200000)
return out
out = out_dir / original.name
out.write_bytes(b"B" * 700000)
return out
result = pic.process_image_file(
idx=1,
img_path=img,
scratch_dir=scratch,
image_to_slides={"image1.png": [2]},
caesium_threads=1,
quality=90,
min_savings="2%",
compressor=fake_compressor,
)
self.assertEqual(result.image_name, "image1.jpg")
self.assertEqual(result.chosen_size, 200000)
self.assertEqual(result.image_type_changed, "png_jpg")
self.assertFalse(img.exists())
self.assertTrue((root / "image1.jpg").exists())
def test_process_image_file_keeps_original_when_bigger(self):
with tempfile.TemporaryDirectory() as td:
root = Path(td)
@@ -84,6 +120,7 @@ class TestPptxImageCompress(unittest.TestCase):
self.assertEqual(result.chosen_size, 100)
self.assertEqual(img.stat().st_size, 100)
self.assertEqual(result.slide_nr, "NOT_USED")
self.assertEqual(result.image_type_changed, "")
def test_process_single_deck_with_injected_compressor(self):
with tempfile.TemporaryDirectory() as td:
@@ -136,8 +173,75 @@ class TestPptxImageCompress(unittest.TestCase):
if log_file is None:
self.fail("log_file should not be None")
log_text = Path(log_file).read_text(encoding="utf-8")
self.assertIn("image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number;image_type_changed", log_text)
self.assertIn("image1.png", log_text)
self.assertIn("[1]", log_text)
def test_process_single_deck_updates_rels_on_png_to_jpg(self):
with tempfile.TemporaryDirectory() as td:
root = Path(td)
input_pptx = root / "input_convert.pptx"
output_pptx = root / "output_convert.pptx"
source_tree = root / "src_convert"
rels_dir = source_tree / "ppt" / "slides" / "_rels"
media_dir = source_tree / "ppt" / "media"
rels_dir.mkdir(parents=True, exist_ok=True)
media_dir.mkdir(parents=True, exist_ok=True)
rels_xml = (
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
"<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">"
"<Relationship Id=\"rId2\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image\" Target=\"../media/image1.png\"/>"
"</Relationships>"
)
content_types_xml = (
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>"
"<Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\">"
"<Default Extension=\"rels\" ContentType=\"application/vnd.openxmlformats-package.relationships+xml\"/>"
"<Default Extension=\"xml\" ContentType=\"application/xml\"/>"
"<Default Extension=\"png\" ContentType=\"image/png\"/>"
"</Types>"
)
(source_tree / "[Content_Types].xml").write_text(content_types_xml, encoding="utf-8")
(rels_dir / "slide1.xml.rels").write_text(rels_xml, encoding="utf-8")
(media_dir / "image1.png").write_bytes(b"A" * 800000)
with zipfile.ZipFile(input_pptx, "w", compression=zipfile.ZIP_DEFLATED) as z:
for p in source_tree.rglob("*"):
if p.is_file():
z.write(p, arcname=str(p.relative_to(source_tree)))
def fake_compressor(original: Path, out_dir: Path, caesium_threads: int | None, quality: int, min_savings: str, output_format: str = "original"):
out_dir.mkdir(parents=True, exist_ok=True)
if output_format == "jpeg":
out = out_dir / "image1.jpg"
out.write_bytes(b"C" * 200000)
return out
out = out_dir / original.name
out.write_bytes(b"B" * 700000)
return out
result = pic.process_single_deck(
input_pptx=input_pptx,
output_pptx=output_pptx,
threads=1,
quality=90,
min_savings="2%",
compressor=fake_compressor,
)
self.assertTrue(result.ok)
with zipfile.ZipFile(output_pptx, "r") as z:
self.assertIn("ppt/media/image1.jpg", z.namelist())
self.assertNotIn("ppt/media/image1.png", z.namelist())
rels_out = z.read("ppt/slides/_rels/slide1.xml.rels").decode("utf-8")
content_types_out = z.read("[Content_Types].xml").decode("utf-8")
self.assertIn("image1.jpg", rels_out)
self.assertIn("Extension=\"jpg\"", content_types_out)
log_file = result.log_file
if log_file is None:
self.fail("log_file should not be None")
log_text = Path(log_file).read_text(encoding="utf-8")
self.assertIn("png_jpg", log_text)
if __name__ == "__main__":