@@ -18,6 +18,8 @@ Highlights:
import argparse
import os
import re
import xml . etree . ElementTree as ET
import sys
import zipfile
import tempfile
@@ -30,6 +32,8 @@ from pathlib import Path
from datetime import timedelta
from concurrent . futures import ThreadPoolExecutor , as_completed
from threading import Lock
from typing import List , Optional
__version__ = " 1.1.4 "
@@ -117,6 +121,38 @@ def format_duration(seconds: float) -> str:
return f " { hms } . { frac [ : 2 ] } "
return base
def get_slide_numbers_for_image ( rels_dir : str , image_filename : str ) - > Optional [ List [ int ] ] :
"""
Durchsucht alle .rels-Dateien im angegebenen Verzeichnis und gibt die Slide-Nummern zurück,
in denen die angegebene Bilddatei referenziert wird.
:param rels_dir: Pfad zum Verzeichnis ppt/slides/_rels
:param image_filename: z. B. ' image80.png '
:return: Liste von Slide-Nummern oder None
"""
slide_numbers = [ ]
for rels_file in os . listdir ( rels_dir ) :
if rels_file . startswith ( " slide " ) and rels_file . endswith ( " .xml.rels " ) :
rels_path = os . path . join ( rels_dir , rels_file )
try :
tree = ET . parse ( rels_path )
root = tree . getroot ( )
for rel in root . findall ( " .// { http://schemas.openxmlformats.org/package/2006/relationships}Relationship " ) :
target = rel . attrib . get ( " Target " , " " )
if image_filename in target :
match = re . search ( r " slide( \ d+).xml.rels " , rels_file )
if match :
slide_number = int ( match . group ( 1 ) )
slide_numbers . append ( slide_number )
except ET . ParseError :
print ( f " Fehler beim Parsen von { rels_file } " )
return slide_numbers if slide_numbers else None
# -------------------- Core per-deck processing --------------------
def process_single_deck ( input_pptx : Path , output_pptx : Path , threads : int , quality : int ) - > dict :
start_time = time . perf_counter ( )
@@ -143,7 +179,7 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
log_file = output_pptx . with_suffix ( " .log.csv " )
ensure_clean_file ( log_file )
log_lines = [ " image_name;size_before(kb);size_after(kb);saving(kb);saving_percent( % ) \n " ]
log_lines = [ " image_name;size_before(kb);size_after(kb);saving(kb);saving_percent( % );in_slide_number \n " ]
size_before = input_pptx . stat ( ) . st_size
result [ " size_before " ] = size_before
@@ -151,8 +187,11 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
with zipfile . ZipFile ( input_pptx , " r " ) as z :
z . extractall ( work_dir )
slides_dir = work_dir / " ppt " / " slides "
media_dir = work_dir / " ppt " / " media "
images = [ ]
if media_dir . exists ( ) :
for f in sorted ( media_dir . iterdir ( ) ) :
if f . is_file ( ) and f . suffix . lower ( ) in ALLOWED_EXT :
@@ -174,7 +213,15 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
ext = img_path . suffix . lower ( )
orig_size = img_path . stat ( ) . st_size
chosen_size = orig_size
found_in_slide = None
slide_nr = " "
try :
found_in_slide = get_slide_numbers_for_image ( slides_dir . name , img_path . name )
if found_in_slide is None :
slide_nr = " NOT_USED "
else :
slide_nr = str ( found_in_slide )
out_sub = scratch_dir / f " img_ { idx : 06d } "
caesium_out = compress_with_caesium ( img_path , out_sub , caesium_threads , quality )
if caesium_out and caesium_out . exists ( ) :
@@ -189,8 +236,9 @@ def process_single_deck(input_pptx: Path, output_pptx: Path, threads: int, quali
finally :
saving = orig_size - chosen_size
saving_percent = round ( ( saving / orig_size ) * 100 , 2 ) if orig_size > 0 else 0.0
with lock :
log_lines . append ( f " { img_path . name } ; { human_kb ( orig_size ) } ; { human_kb ( chosen_size ) } ; { human_kb ( saving ) } ; { saving_percent } \n " )
log_lines . append ( f " { img_path . name } ; { human_kb ( orig_size ) } ; { human_kb ( chosen_size ) } ; { human_kb ( saving ) } ; { saving_percent } ; { slide_nr } \n " )
done_count + = 1
print_progress ( done_count , total )