Unblocks production use on sites previously running Smush.
migrate-from-smush:
- Reads wp-smpro-smush-data postmeta, writes _h4b_img_optim marker
- --dry-run / --force-rescan / --remove-smush-meta / --limit flags
- Verified: 100 attachments migrated cleanly on dev.rds.ink,
bulk count drops from 734 → 634
Picture_Tag rewriter:
- Hooks the_content + post_thumbnail_html + widget_text + Elementor
frontend + wp_get_attachment_image at priority 99
- Wraps <img> in <picture><source avif><source webp><img></picture>
when sibling files exist
- Double-wrap protection via byte-range tracking of existing <picture> blocks
- Per-image opt-out via data-no-h4b attribute
- Cached sibling lookups per request
- 8 edge-case tests pass
LOC: 2480 (was 1997). Adds class-cli-migrate.php (193 LOC) and
class-picture-tag.php (284 LOC).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
285 lines
8.9 KiB
PHP
285 lines
8.9 KiB
PHP
<?php
|
|
/**
|
|
* Rewrites <img src="…jpg|png"> tags to <picture> with WebP + AVIF sources.
|
|
*
|
|
* Design notes
|
|
* ============
|
|
* 1. We hook `the_content` (post body), `post_thumbnail_html` (featured image)
|
|
* and `wp_get_attachment_image` (most theme/plugin image calls). For Elementor
|
|
* we register on `elementor/frontend/the_content` as well — Elementor pipes
|
|
* the_content through its own filter chain in some templates.
|
|
*
|
|
* 2. Sibling files MUST exist on disk for us to emit a <source>. If only WebP
|
|
* exists, we emit just the WebP source; same for AVIF. We never emit a
|
|
* <source> pointing at a non-existent file (would 404 in browsers).
|
|
*
|
|
* 3. Sibling naming convention is `<original>.webp` / `<original>.avif`.
|
|
* That matches what Format_Generator produces AND what Smush already
|
|
* produces in `wp-content/smush-webp/<rel>.webp` (we mirror-resolve too).
|
|
*
|
|
* 4. We DO NOT rewrite:
|
|
* - <img> already inside a <picture> (avoid double-wrapping)
|
|
* - <img> with data-no-h4b attribute
|
|
* - <img> with no usable src
|
|
* - srcset URLs (sources list is more semantic; let the browser pick)
|
|
*
|
|
* 5. Keep ALL original <img> attributes intact (class, alt, srcset, sizes,
|
|
* width, height, loading, decoding, fetchpriority, …). The <img> remains
|
|
* the visible fallback for browsers that don't understand <picture>.
|
|
*
|
|
* 6. Performance: build the regex once, iterate once over each filter call.
|
|
* Cache the "do the siblings exist" check per request (static array).
|
|
*
|
|
* @package H4B\ImageOptim
|
|
*/
|
|
|
|
namespace H4B\ImageOptim;
|
|
|
|
if ( ! defined( 'ABSPATH' ) ) {
|
|
exit;
|
|
}
|
|
|
|
final class Picture_Tag {
|
|
|
|
/** @var array<string,array{webp:?string,avif:?string}> path → urls */
|
|
private static array $sibling_cache = [];
|
|
|
|
public static function register(): void {
|
|
if ( ! Settings::get( 'rewrite_content_images', true ) ) {
|
|
return;
|
|
}
|
|
|
|
// Run LATE so other filters get to manipulate the raw <img> first.
|
|
add_filter( 'the_content', [ self::class, 'rewrite_html' ], 99 );
|
|
add_filter( 'post_thumbnail_html', [ self::class, 'rewrite_html' ], 99 );
|
|
add_filter( 'widget_text', [ self::class, 'rewrite_html' ], 99 );
|
|
|
|
// Elementor's frontend content filter
|
|
add_filter( 'elementor/frontend/the_content', [ self::class, 'rewrite_html' ], 99 );
|
|
|
|
// Single attachment_image (commonly used by themes + WooCommerce)
|
|
add_filter( 'wp_get_attachment_image', [ self::class, 'rewrite_html' ], 99, 5 );
|
|
}
|
|
|
|
/**
|
|
* Rewrite all <img> tags in $html to <picture> wrappers where siblings exist.
|
|
*
|
|
* Skips <img> tags that are already inside an existing <picture>…</picture>
|
|
* (whether the surrounding <picture> existed in the input or was added by
|
|
* this same filter pass).
|
|
*/
|
|
public static function rewrite_html( $html, ...$_extra ): string {
|
|
// Coerce to string; some filters can pass non-string in edge cases.
|
|
if ( ! is_string( $html ) || $html === '' ) {
|
|
return is_string( $html ) ? $html : '';
|
|
}
|
|
|
|
// Quick reject: no <img tag?
|
|
if ( stripos( $html, '<img' ) === false ) {
|
|
return $html;
|
|
}
|
|
|
|
// 1. Identify byte-range spans of existing <picture>…</picture> blocks
|
|
// so we never touch <img> tags inside them.
|
|
$picture_ranges = self::find_picture_ranges( $html );
|
|
|
|
// 2. Single-pass walk: rebuild output with non-img bytes copied through
|
|
// and <img> tags rewritten if they sit outside any picture range AND
|
|
// have qualifying siblings.
|
|
$out = '';
|
|
$cursor = 0;
|
|
$pattern = '#<img\\b[^>]*>#i';
|
|
if ( ! preg_match_all( $pattern, $html, $matches, PREG_OFFSET_CAPTURE ) ) {
|
|
return $html;
|
|
}
|
|
|
|
foreach ( $matches[0] as $match ) {
|
|
[ $img_tag, $offset ] = $match;
|
|
$end = $offset + strlen( $img_tag );
|
|
|
|
// Copy through bytes preceding this <img>
|
|
$out .= substr( $html, $cursor, $offset - $cursor );
|
|
|
|
if ( self::is_inside_range( $offset, $picture_ranges ) ) {
|
|
// Already inside an existing <picture>; leave alone.
|
|
$out .= $img_tag;
|
|
} else {
|
|
$out .= self::maybe_wrap( $img_tag );
|
|
}
|
|
$cursor = $end;
|
|
}
|
|
$out .= substr( $html, $cursor );
|
|
return $out;
|
|
}
|
|
|
|
/**
|
|
* Build a list of [start, end_exclusive] byte ranges covering every
|
|
* <picture>…</picture> block in $html.
|
|
*
|
|
* @return array<int, array{0:int,1:int}>
|
|
*/
|
|
private static function find_picture_ranges( string $html ): array {
|
|
$ranges = [];
|
|
$offset = 0;
|
|
while ( true ) {
|
|
$open = stripos( $html, '<picture', $offset );
|
|
if ( $open === false ) {
|
|
break;
|
|
}
|
|
$close = stripos( $html, '</picture>', $open );
|
|
if ( $close === false ) {
|
|
break;
|
|
}
|
|
$ranges[] = [ $open, $close + strlen( '</picture>' ) ];
|
|
$offset = $close + 1;
|
|
}
|
|
return $ranges;
|
|
}
|
|
|
|
/**
|
|
* @param array<int, array{0:int,1:int}> $ranges
|
|
*/
|
|
private static function is_inside_range( int $offset, array $ranges ): bool {
|
|
foreach ( $ranges as [ $start, $end ] ) {
|
|
if ( $offset >= $start && $offset < $end ) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Decide whether to wrap a single <img> tag.
|
|
*/
|
|
private static function maybe_wrap( string $img_tag ): string {
|
|
// Skip if explicitly opted out
|
|
if ( strpos( $img_tag, 'data-no-h4b' ) !== false ) {
|
|
return $img_tag;
|
|
}
|
|
|
|
// Extract src
|
|
if ( ! preg_match( '#\\bsrc=([\'"])(.+?)\\1#i', $img_tag, $sm ) ) {
|
|
return $img_tag;
|
|
}
|
|
$src = $sm[2];
|
|
|
|
// Only handle http(s) / protocol-relative / site-relative URLs
|
|
$siblings = self::resolve_siblings( $src );
|
|
if ( ! $siblings['webp'] && ! $siblings['avif'] ) {
|
|
return $img_tag;
|
|
}
|
|
|
|
// Build srcset for picture sources if we can. For simplicity v0.1
|
|
// emits a plain source URL (srcset handling for sizes is a v0.2 task).
|
|
$sources = '';
|
|
if ( $siblings['avif'] ) {
|
|
$sources .= sprintf(
|
|
"<source type=\"image/avif\" srcset=\"%s\">",
|
|
esc_attr( $siblings['avif'] )
|
|
);
|
|
}
|
|
if ( $siblings['webp'] ) {
|
|
$sources .= sprintf(
|
|
"<source type=\"image/webp\" srcset=\"%s\">",
|
|
esc_attr( $siblings['webp'] )
|
|
);
|
|
}
|
|
|
|
return '<picture>' . $sources . $img_tag . '</picture>';
|
|
}
|
|
|
|
/**
|
|
* Given an image URL, return WebP + AVIF sibling URLs if they exist on disk.
|
|
*
|
|
* @return array{webp:?string, avif:?string}
|
|
*/
|
|
private static function resolve_siblings( string $url ): array {
|
|
if ( isset( self::$sibling_cache[ $url ] ) ) {
|
|
return self::$sibling_cache[ $url ];
|
|
}
|
|
$result = [ 'webp' => null, 'avif' => null ];
|
|
|
|
// Only act on JPG / JPEG / PNG
|
|
if ( ! preg_match( '#\\.(jpe?g|png)(\\?.*)?$#i', $url ) ) {
|
|
return self::$sibling_cache[ $url ] = $result;
|
|
}
|
|
|
|
// Strip query string for filesystem lookup
|
|
$url_clean = strtok( $url, '?' );
|
|
|
|
// Convert URL → absolute path on disk
|
|
$path = self::url_to_path( $url_clean );
|
|
if ( $path === null ) {
|
|
return self::$sibling_cache[ $url ] = $result;
|
|
}
|
|
|
|
// Candidate 1: alongside the source (what we generate)
|
|
$webp_alongside = $path . '.webp';
|
|
$avif_alongside = $path . '.avif';
|
|
|
|
// Candidate 2: Smush's smush-webp/ tree
|
|
$content_dir = trailingslashit( WP_CONTENT_DIR );
|
|
$content_url = trailingslashit( WP_CONTENT_URL );
|
|
$smush_webp_path = null;
|
|
$smush_webp_url = null;
|
|
if ( strpos( $path, $content_dir . 'uploads/' ) === 0 ) {
|
|
$rel = substr( $path, strlen( $content_dir . 'uploads/' ) );
|
|
$smush_webp_path = $content_dir . 'smush-webp/' . $rel . '.webp';
|
|
$smush_webp_url = $content_url . 'smush-webp/' . $rel . '.webp';
|
|
}
|
|
|
|
if ( is_readable( $webp_alongside ) ) {
|
|
$result['webp'] = $url_clean . '.webp';
|
|
} elseif ( $smush_webp_path && is_readable( $smush_webp_path ) ) {
|
|
$result['webp'] = $smush_webp_url;
|
|
}
|
|
|
|
if ( is_readable( $avif_alongside ) ) {
|
|
$result['avif'] = $url_clean . '.avif';
|
|
}
|
|
|
|
return self::$sibling_cache[ $url ] = $result;
|
|
}
|
|
|
|
/**
|
|
* Convert a URL to its absolute filesystem path, or null if the URL is
|
|
* external / can't be resolved to a local file.
|
|
*/
|
|
private static function url_to_path( string $url ): ?string {
|
|
$uploads = wp_get_upload_dir();
|
|
$content_dir = trailingslashit( WP_CONTENT_DIR );
|
|
$content_url = trailingslashit( WP_CONTENT_URL );
|
|
|
|
// Strip protocol-relative
|
|
if ( strpos( $url, '//' ) === 0 ) {
|
|
$url = 'https:' . $url;
|
|
}
|
|
|
|
// Site root (handles http vs https mismatches between admin + frontend)
|
|
$home = home_url();
|
|
$home_alt = preg_replace( '#^https?://#', '', $home );
|
|
|
|
// Match wp-content/ specifically (covers themes + plugins + uploads)
|
|
if ( strpos( $url, $content_url ) === 0 ) {
|
|
return $content_dir . substr( $url, strlen( $content_url ) );
|
|
}
|
|
|
|
// Match the uploads URL even if served from a CDN-prefixed URL that
|
|
// rewrites only the uploads part (we don't use a CDN but sites might)
|
|
if ( strpos( $url, $uploads['baseurl'] ) === 0 ) {
|
|
return $uploads['basedir'] . substr( $url, strlen( $uploads['baseurl'] ) );
|
|
}
|
|
|
|
// Site-relative
|
|
if ( $url !== '' && $url[0] === '/' && strpos( $url, '//' ) !== 0 ) {
|
|
// /wp-content/uploads/2026/02/foo.jpg
|
|
$content_path = '/' . wp_basename( $content_dir ) . '/';
|
|
if ( strpos( $url, $content_path ) === 0 ) {
|
|
return $content_dir . substr( $url, strlen( $content_path ) );
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|