diff --git a/bin/caesiumclt.exe b/bin/caesiumclt.exe index 88c16ae..9731e9a 100644 Binary files a/bin/caesiumclt.exe and b/bin/caesiumclt.exe differ diff --git a/install_and_run.bat b/install_and_run.bat index 9a8ec51..17cb269 100644 --- a/install_and_run.bat +++ b/install_and_run.bat @@ -12,7 +12,7 @@ set "SELF_DIR=%~dp0" set "SCRIPT=%SELF_DIR%pptx_image_compress.py" rem ---- Python Embeddable config ---- -set "PY_EMBED_VERSION=3.13.9" +set "PY_EMBED_VERSION=3.14.4" set "PY_EMBED_ZIP=python-%PY_EMBED_VERSION%-embed-amd64.zip" set "PY_EMBED_URL=https://www.python.org/ftp/python/%PY_EMBED_VERSION%/%PY_EMBED_ZIP%" set "PY_DIR=%SELF_DIR%python-embed" diff --git a/pptx_image_compress.py b/pptx_image_compress.py index b1061d1..044e67a 100644 --- a/pptx_image_compress.py +++ b/pptx_image_compress.py @@ -18,6 +18,8 @@ Highlights: import argparse import os +import re +import xml.etree.ElementTree as ET import sys import zipfile import tempfile @@ -30,6 +32,8 @@ from pathlib import Path from datetime import timedelta from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Lock +from typing import List, Optional + __version__ = "1.1.4" @@ -117,6 +121,38 @@ def format_duration(seconds: float) -> str: return f"{hms}.{frac[:2]}" return base +def get_slide_numbers_for_image(rels_dir: str, image_filename: str) -> Optional[List[int]]: + """ + Durchsucht alle .rels-Dateien im angegebenen Verzeichnis und gibt die Slide-Nummern zurück, + in denen die angegebene Bilddatei referenziert wird. + + :param rels_dir: Pfad zum Verzeichnis ppt/slides/_rels + :param image_filename: z. B. 'image80.png' + :return: Liste von Slide-Nummern oder None + """ + slide_numbers = [] + + for rels_file in os.listdir(rels_dir): + if rels_file.startswith("slide") and rels_file.endswith(".xml.rels"): + rels_path = os.path.join(rels_dir, rels_file) + try: + tree = ET.parse(rels_path) + root = tree.getroot() + for rel in root.findall(".//{http://schemas.openxmlformats.org/package/2006/relationships}Relationship"): + target = rel.attrib.get("Target", "") + if image_filename in target: + match = re.search(r"slide(\d+).xml.rels", rels_file) + if match: + slide_number = int(match.group(1)) + slide_numbers.append(slide_number) + except ET.ParseError: + print(f"Fehler beim Parsen von {rels_file}") + + return slide_numbers if slide_numbers else None + + + + # -------------------- Core per-deck processing -------------------- def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quality: int) -> dict: start_time = time.perf_counter() @@ -143,7 +179,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali log_file = output_pptx.with_suffix(".log.csv") ensure_clean_file(log_file) - log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%)\n"] + log_lines = ["image_name;size_before(kb);size_after(kb);saving(kb);saving_percent(%);in_slide_number\n"] size_before = input_pptx.stat().st_size result["size_before"] = size_before @@ -151,8 +187,11 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali with zipfile.ZipFile(input_pptx, "r") as z: z.extractall(work_dir) + slides_dir = work_dir / "ppt" / "slides" media_dir = work_dir / "ppt" / "media" + images = [] + if media_dir.exists(): for f in sorted(media_dir.iterdir()): if f.is_file() and f.suffix.lower() in ALLOWED_EXT: @@ -174,7 +213,15 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali ext = img_path.suffix.lower() orig_size = img_path.stat().st_size chosen_size = orig_size + found_in_slide=None + slide_nr="" + try: + found_in_slide = get_slide_numbers_for_image(slides_dir.name, img_path.name) + if found_in_slide is None: + slide_nr = "NOT_USED" + else: + slide_nr = str(found_in_slide) out_sub = scratch_dir / f"img_{idx:06d}" caesium_out = compress_with_caesium(img_path, out_sub, caesium_threads, quality) if caesium_out and caesium_out.exists(): @@ -189,8 +236,9 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali finally: saving = orig_size - chosen_size saving_percent = round((saving / orig_size) * 100, 2) if orig_size > 0 else 0.0 + with lock: - log_lines.append(f"{img_path.name};{human_kb(orig_size)};{human_kb(chosen_size)};{human_kb(saving)};{saving_percent}\n") + log_lines.append(f"{img_path.name};{human_kb(orig_size)};{human_kb(chosen_size)};{human_kb(saving)};{saving_percent};{slide_nr}\n") done_count += 1 print_progress(done_count, total) diff --git a/samples/README.txt b/samples/README.txt deleted file mode 100644 index 517e728..0000000 --- a/samples/README.txt +++ /dev/null @@ -1 +0,0 @@ -Place your PPTX files here for testing, or use -i with a full path.