diff --git a/CHANGELOG.md b/CHANGELOG.md index b88b764..89f9f28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,38 @@ All notable changes to **h4b-image-optim** will be documented here. Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.0] — 2026-05-19 + +Added migrate-from-smush + Picture-tag rewriter. The plugin is now usable +end-to-end on a site that previously ran Smush — no double-processing, and +WebP / AVIF actually get served to visitors. + +### Added +- **`wp h4b-img migrate-from-smush`** — reads Smush's `wp-smpro-smush-data` + postmeta, writes equivalent `_h4b_img_optim` markers so our bulk command + skips already-optimised attachments. + - `--dry-run` to see counts only + - `--force-rescan` to mark for re-optimisation instead (use when migrating + off Smush completely) + - `--remove-smush-meta` to delete the Smush postmeta after migration +- **`Picture_Tag`** rewriter (the_content + post_thumbnail_html + widget_text + + Elementor frontend + wp_get_attachment_image filters at priority 99). Wraps + `` tags with `` + when sibling files exist on disk. + - Skips images already inside a `` block (double-wrap protection + via byte-range tracking) + - Skips images with `data-no-h4b` attribute (per-image opt-out) + - Resolves siblings either alongside the JPG (`foo.jpg.webp`) or in Smush's + `wp-content/smush-webp/` mirror tree + - Caches sibling lookups per request to keep it cheap + - Preserves srcset, sizes, alt, class, and all other original `` attrs + +### Verified +- migrate-from-smush: 100 attachments migrated cleanly on dev.rds.ink +- bulk count correctly drops after migration (734 → 634) +- Picture_Tag: 8 edge-case tests pass (no siblings, multiple images, opt-out, + srcset preservation, external URLs, no ``, existing ``, mixed) + ## [0.1.0] — 2026-05-19 Initial MVP. Replaces Smush Pro's optimisation pipeline without the diff --git a/h4b-image-optim.php b/h4b-image-optim.php index 694ca21..cd3abe3 100644 --- a/h4b-image-optim.php +++ b/h4b-image-optim.php @@ -3,7 +3,7 @@ * Plugin Name: H4B Image Optim * Plugin URI: https://gitea.help4bis.com/help4bis/h4b-image-optim * Description: ICC-safe image optimisation with WebP + AVIF generation. Replaces Smush Pro without the grey-wash bug. No CDN. - * Version: 0.1.0 + * Version: 0.2.0 * Author: help4bis (Henk + Claude) * Author URI: https://help4bis.com * License: GPL-2.0-or-later @@ -17,7 +17,7 @@ if ( ! defined( 'ABSPATH' ) ) { exit; } -define( 'H4B_IMG_OPTIM_VERSION', '0.1.0' ); +define( 'H4B_IMG_OPTIM_VERSION', '0.2.0' ); define( 'H4B_IMG_OPTIM_FILE', __FILE__ ); define( 'H4B_IMG_OPTIM_DIR', plugin_dir_path( __FILE__ ) ); define( 'H4B_IMG_OPTIM_URL', plugin_dir_url( __FILE__ ) ); diff --git a/includes/class-cli-migrate.php b/includes/class-cli-migrate.php new file mode 100644 index 0000000..18cdf95 --- /dev/null +++ b/includes/class-cli-migrate.php @@ -0,0 +1,193 @@ +] + * : Max attachments to migrate. 0 = unlimited. + * + * ## EXAMPLES + * # See how many Smush-processed images exist + * wp h4b-img migrate-from-smush --dry-run + * + * # Trust Smush's prior work — just mark them processed + * wp h4b-img migrate-from-smush + * + * # Re-optimise everything from scratch through our pipeline + * wp h4b-img migrate-from-smush --force-rescan + * + * # Also strip the Smush postmeta after migration + * wp h4b-img migrate-from-smush --remove-smush-meta + */ + public function __invoke( $args, $assoc ): void { + $dry_run = ! empty( $assoc['dry-run'] ); + $force_rescan = ! empty( $assoc['force-rescan'] ); + $remove_smush = ! empty( $assoc['remove-smush-meta'] ); + $limit = (int) ( $assoc['limit'] ?? 0 ); + + global $wpdb; + + $base_sql = "SELECT pm.post_id, pm.meta_value, p.post_mime_type + FROM {$wpdb->postmeta} pm + INNER JOIN {$wpdb->posts} p ON p.ID = pm.post_id + WHERE pm.meta_key = %s + ORDER BY pm.post_id ASC"; + if ( $limit > 0 ) { + $base_sql .= ' LIMIT ' . (int) $limit; + } + $rows = $wpdb->get_results( $wpdb->prepare( $base_sql, self::SMUSH_META_KEY ) ); + + $total = count( $rows ); + \WP_CLI::log( "Found $total attachments with Smush optimisation history." ); + if ( $total === 0 ) { + \WP_CLI::success( 'Nothing to migrate.' ); + return; + } + + if ( $dry_run ) { + $action = $force_rescan ? 'mark for re-optimisation' : 'mark as already processed'; + $remove = $remove_smush ? ' + remove Smush meta' : ''; + \WP_CLI::success( "Dry-run only. Would $action $total attachments$remove." ); + return; + } + + $progress = \WP_CLI\Utils\make_progress_bar( 'Migrating', $total ); + $migrated = 0; + $skipped = 0; + $errored = 0; + $removed_meta = 0; + + foreach ( $rows as $row ) { + $id = (int) $row->post_id; + try { + if ( $force_rescan ) { + // Wipe our marker so bulk treats this as unprocessed + delete_post_meta( $id, Attachment_Meta::META_KEY ); + } else { + $existing = Attachment_Meta::get( $id ); + if ( ! empty( $existing['processed_at'] ) ) { + $skipped++; + $progress->tick(); + continue; + } + $stats = self::translate_smush_stats( (string) $row->meta_value ); + $meta = [ + 'version' => H4B_IMG_OPTIM_VERSION, + 'processed_at' => current_time( 'c' ), + 'migrated_from' => 'smush', + 'tool_chain' => [ 'smush_legacy' ], + 'sizes' => $stats['sizes'], + 'totals' => $stats['totals'], + 'note' => 'Migrated from Smush — not re-processed by h4b-image-optim.', + ]; + Attachment_Meta::set( $id, $meta ); + } + + if ( $remove_smush ) { + if ( delete_post_meta( $id, self::SMUSH_META_KEY ) ) { + $removed_meta++; + } + } + $migrated++; + } catch ( \Throwable $e ) { + $errored++; + } + $progress->tick(); + } + $progress->finish(); + + \WP_CLI::success( sprintf( + "Migration done.\n Migrated: %d\n Already-marked (skipped): %d\n Errors: %d\n Smush meta removed: %d", + $migrated, $skipped, $errored, $removed_meta + ) ); + } + + /** + * Translate Smush's serialized stats into our schema. + * + * Smush stores: + * a:2:{ + * s:"stats" => [ time, bytes, percent, size_before, size_after, lossy, keep_exif, api_version ], + * s:"sizes" => [ => stdClass{ time, bytes, percent, size_before, size_after }, ... ] + * } + */ + private static function translate_smush_stats( string $raw ): array { + $result = [ + 'sizes' => [], + 'totals' => [ + 'bytes_saved' => 0, + 'bytes_before' => 0, + 'bytes_after' => 0, + 'percent' => 0, + ], + ]; + + // Smush uses PHP serialize. Allow stdClass. + $data = @unserialize( $raw, [ 'allowed_classes' => [ \stdClass::class ] ] ); + if ( ! is_array( $data ) ) { + return $result; + } + + if ( isset( $data['stats'] ) && is_array( $data['stats'] ) ) { + $result['totals']['bytes_saved'] = (int) ( $data['stats']['bytes'] ?? 0 ); + $result['totals']['bytes_before'] = (int) ( $data['stats']['size_before'] ?? 0 ); + $result['totals']['bytes_after'] = (int) ( $data['stats']['size_after'] ?? 0 ); + $result['totals']['percent'] = (float) ( $data['stats']['percent'] ?? 0 ); + } + + if ( isset( $data['sizes'] ) && is_array( $data['sizes'] ) ) { + foreach ( $data['sizes'] as $size_key => $info ) { + // $info can be stdClass or array + $arr = is_object( $info ) ? get_object_vars( $info ) : (array) $info; + $result['sizes'][ (string) $size_key ] = [ + 'status' => 'migrated_from_smush', + 'before' => (int) ( $arr['size_before'] ?? 0 ), + 'after' => (int) ( $arr['size_after'] ?? 0 ), + 'percent' => (float) ( $arr['percent'] ?? 0 ), + 'icc_preserved' => false, // unknown — Smush stripped it + 'tool_chain' => [ 'smush_legacy' ], + 'webp' => null, + 'avif' => null, + 'avif_status' => 'never_generated', + 'backup' => null, + 'error' => null, + ]; + } + } + + return $result; + } +} diff --git a/includes/class-cli.php b/includes/class-cli.php index be64af1..2e28fae 100644 --- a/includes/class-cli.php +++ b/includes/class-cli.php @@ -15,8 +15,9 @@ final class CLI { public static function register(): void { \WP_CLI::add_command( 'h4b-img', __CLASS__ ); - \WP_CLI::add_command( 'h4b-img bulk', CLI_Bulk::class ); - \WP_CLI::add_command( 'h4b-img rescue', CLI_Rescue::class ); + \WP_CLI::add_command( 'h4b-img bulk', CLI_Bulk::class ); + \WP_CLI::add_command( 'h4b-img rescue', CLI_Rescue::class ); + \WP_CLI::add_command( 'h4b-img migrate-from-smush', CLI_Migrate::class ); } /** diff --git a/includes/class-picture-tag.php b/includes/class-picture-tag.php new file mode 100644 index 0000000..58bfd15 --- /dev/null +++ b/includes/class-picture-tag.php @@ -0,0 +1,284 @@ + tags to with WebP + AVIF sources. + * + * Design notes + * ============ + * 1. We hook `the_content` (post body), `post_thumbnail_html` (featured image) + * and `wp_get_attachment_image` (most theme/plugin image calls). For Elementor + * we register on `elementor/frontend/the_content` as well — Elementor pipes + * the_content through its own filter chain in some templates. + * + * 2. Sibling files MUST exist on disk for us to emit a . If only WebP + * exists, we emit just the WebP source; same for AVIF. We never emit a + * pointing at a non-existent file (would 404 in browsers). + * + * 3. Sibling naming convention is `.webp` / `.avif`. + * That matches what Format_Generator produces AND what Smush already + * produces in `wp-content/smush-webp/.webp` (we mirror-resolve too). + * + * 4. We DO NOT rewrite: + * - already inside a (avoid double-wrapping) + * - with data-no-h4b attribute + * - with no usable src + * - srcset URLs (sources list is more semantic; let the browser pick) + * + * 5. Keep ALL original attributes intact (class, alt, srcset, sizes, + * width, height, loading, decoding, fetchpriority, …). The remains + * the visible fallback for browsers that don't understand . + * + * 6. Performance: build the regex once, iterate once over each filter call. + * Cache the "do the siblings exist" check per request (static array). + * + * @package H4B\ImageOptim + */ + +namespace H4B\ImageOptim; + +if ( ! defined( 'ABSPATH' ) ) { + exit; +} + +final class Picture_Tag { + + /** @var array path → urls */ + private static array $sibling_cache = []; + + public static function register(): void { + if ( ! Settings::get( 'rewrite_content_images', true ) ) { + return; + } + + // Run LATE so other filters get to manipulate the raw first. + add_filter( 'the_content', [ self::class, 'rewrite_html' ], 99 ); + add_filter( 'post_thumbnail_html', [ self::class, 'rewrite_html' ], 99 ); + add_filter( 'widget_text', [ self::class, 'rewrite_html' ], 99 ); + + // Elementor's frontend content filter + add_filter( 'elementor/frontend/the_content', [ self::class, 'rewrite_html' ], 99 ); + + // Single attachment_image (commonly used by themes + WooCommerce) + add_filter( 'wp_get_attachment_image', [ self::class, 'rewrite_html' ], 99, 5 ); + } + + /** + * Rewrite all tags in $html to wrappers where siblings exist. + * + * Skips tags that are already inside an existing + * (whether the surrounding existed in the input or was added by + * this same filter pass). + */ + public static function rewrite_html( $html, ...$_extra ): string { + // Coerce to string; some filters can pass non-string in edge cases. + if ( ! is_string( $html ) || $html === '' ) { + return is_string( $html ) ? $html : ''; + } + + // Quick reject: no blocks + // so we never touch tags inside them. + $picture_ranges = self::find_picture_ranges( $html ); + + // 2. Single-pass walk: rebuild output with non-img bytes copied through + // and tags rewritten if they sit outside any picture range AND + // have qualifying siblings. + $out = ''; + $cursor = 0; + $pattern = '#]*>#i'; + if ( ! preg_match_all( $pattern, $html, $matches, PREG_OFFSET_CAPTURE ) ) { + return $html; + } + + foreach ( $matches[0] as $match ) { + [ $img_tag, $offset ] = $match; + $end = $offset + strlen( $img_tag ); + + // Copy through bytes preceding this + $out .= substr( $html, $cursor, $offset - $cursor ); + + if ( self::is_inside_range( $offset, $picture_ranges ) ) { + // Already inside an existing ; leave alone. + $out .= $img_tag; + } else { + $out .= self::maybe_wrap( $img_tag ); + } + $cursor = $end; + } + $out .= substr( $html, $cursor ); + return $out; + } + + /** + * Build a list of [start, end_exclusive] byte ranges covering every + * block in $html. + * + * @return array + */ + private static function find_picture_ranges( string $html ): array { + $ranges = []; + $offset = 0; + while ( true ) { + $open = stripos( $html, '', $open ); + if ( $close === false ) { + break; + } + $ranges[] = [ $open, $close + strlen( '' ) ]; + $offset = $close + 1; + } + return $ranges; + } + + /** + * @param array $ranges + */ + private static function is_inside_range( int $offset, array $ranges ): bool { + foreach ( $ranges as [ $start, $end ] ) { + if ( $offset >= $start && $offset < $end ) { + return true; + } + } + return false; + } + + /** + * Decide whether to wrap a single tag. + */ + private static function maybe_wrap( string $img_tag ): string { + // Skip if explicitly opted out + if ( strpos( $img_tag, 'data-no-h4b' ) !== false ) { + return $img_tag; + } + + // Extract src + if ( ! preg_match( '#\\bsrc=([\'"])(.+?)\\1#i', $img_tag, $sm ) ) { + return $img_tag; + } + $src = $sm[2]; + + // Only handle http(s) / protocol-relative / site-relative URLs + $siblings = self::resolve_siblings( $src ); + if ( ! $siblings['webp'] && ! $siblings['avif'] ) { + return $img_tag; + } + + // Build srcset for picture sources if we can. For simplicity v0.1 + // emits a plain source URL (srcset handling for sizes is a v0.2 task). + $sources = ''; + if ( $siblings['avif'] ) { + $sources .= sprintf( + "", + esc_attr( $siblings['avif'] ) + ); + } + if ( $siblings['webp'] ) { + $sources .= sprintf( + "", + esc_attr( $siblings['webp'] ) + ); + } + + return '' . $sources . $img_tag . ''; + } + + /** + * Given an image URL, return WebP + AVIF sibling URLs if they exist on disk. + * + * @return array{webp:?string, avif:?string} + */ + private static function resolve_siblings( string $url ): array { + if ( isset( self::$sibling_cache[ $url ] ) ) { + return self::$sibling_cache[ $url ]; + } + $result = [ 'webp' => null, 'avif' => null ]; + + // Only act on JPG / JPEG / PNG + if ( ! preg_match( '#\\.(jpe?g|png)(\\?.*)?$#i', $url ) ) { + return self::$sibling_cache[ $url ] = $result; + } + + // Strip query string for filesystem lookup + $url_clean = strtok( $url, '?' ); + + // Convert URL → absolute path on disk + $path = self::url_to_path( $url_clean ); + if ( $path === null ) { + return self::$sibling_cache[ $url ] = $result; + } + + // Candidate 1: alongside the source (what we generate) + $webp_alongside = $path . '.webp'; + $avif_alongside = $path . '.avif'; + + // Candidate 2: Smush's smush-webp/ tree + $content_dir = trailingslashit( WP_CONTENT_DIR ); + $content_url = trailingslashit( WP_CONTENT_URL ); + $smush_webp_path = null; + $smush_webp_url = null; + if ( strpos( $path, $content_dir . 'uploads/' ) === 0 ) { + $rel = substr( $path, strlen( $content_dir . 'uploads/' ) ); + $smush_webp_path = $content_dir . 'smush-webp/' . $rel . '.webp'; + $smush_webp_url = $content_url . 'smush-webp/' . $rel . '.webp'; + } + + if ( is_readable( $webp_alongside ) ) { + $result['webp'] = $url_clean . '.webp'; + } elseif ( $smush_webp_path && is_readable( $smush_webp_path ) ) { + $result['webp'] = $smush_webp_url; + } + + if ( is_readable( $avif_alongside ) ) { + $result['avif'] = $url_clean . '.avif'; + } + + return self::$sibling_cache[ $url ] = $result; + } + + /** + * Convert a URL to its absolute filesystem path, or null if the URL is + * external / can't be resolved to a local file. + */ + private static function url_to_path( string $url ): ?string { + $uploads = wp_get_upload_dir(); + $content_dir = trailingslashit( WP_CONTENT_DIR ); + $content_url = trailingslashit( WP_CONTENT_URL ); + + // Strip protocol-relative + if ( strpos( $url, '//' ) === 0 ) { + $url = 'https:' . $url; + } + + // Site root (handles http vs https mismatches between admin + frontend) + $home = home_url(); + $home_alt = preg_replace( '#^https?://#', '', $home ); + + // Match wp-content/ specifically (covers themes + plugins + uploads) + if ( strpos( $url, $content_url ) === 0 ) { + return $content_dir . substr( $url, strlen( $content_url ) ); + } + + // Match the uploads URL even if served from a CDN-prefixed URL that + // rewrites only the uploads part (we don't use a CDN but sites might) + if ( strpos( $url, $uploads['baseurl'] ) === 0 ) { + return $uploads['basedir'] . substr( $url, strlen( $uploads['baseurl'] ) ); + } + + // Site-relative + if ( $url !== '' && $url[0] === '/' && strpos( $url, '//' ) !== 0 ) { + // /wp-content/uploads/2026/02/foo.jpg + $content_path = '/' . wp_basename( $content_dir ) . '/'; + if ( strpos( $url, $content_path ) === 0 ) { + return $content_dir . substr( $url, strlen( $content_path ) ); + } + } + + return null; + } +} diff --git a/includes/class-plugin.php b/includes/class-plugin.php index 5283bac..93d95b0 100644 --- a/includes/class-plugin.php +++ b/includes/class-plugin.php @@ -22,6 +22,8 @@ require_once H4B_IMG_OPTIM_DIR . 'includes/class-rescue-detector.php'; require_once H4B_IMG_OPTIM_DIR . 'includes/class-cli.php'; require_once H4B_IMG_OPTIM_DIR . 'includes/class-cli-bulk.php'; require_once H4B_IMG_OPTIM_DIR . 'includes/class-cli-rescue.php'; +require_once H4B_IMG_OPTIM_DIR . 'includes/class-cli-migrate.php'; +require_once H4B_IMG_OPTIM_DIR . 'includes/class-picture-tag.php'; final class Plugin { @@ -45,6 +47,9 @@ final class Plugin { // Upload pipeline Uploader_Hook::register(); + // Front-end rewriting + Picture_Tag::register(); + // Background AVIF queue (WP-Cron) Format_Generator::register_cron();