Replaces Smush Pro's optimisation pipeline without the grey-wash bug. CLI commands working: wp h4b-img status wp h4b-img optimise --id=<n> wp h4b-img bulk wp h4b-img rescue Verified on dev.rds.ink: - ICC profile preservation works (the Smush-bug fix) - Bulk: 20 attachments → 487 KB saved (10.4%), 0 errors - Rescue: end-to-end mechanism verified on WorkingAsOne_horse fixture - WebP synchronous, AVIF queued via WP-Cron - Originals backed up to wp-content/h4b-img-originals/ See CHANGELOG.md for details + ../DESIGN-h4b-image-optim.md for architecture. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
162 lines
4.7 KiB
PHP
162 lines
4.7 KiB
PHP
<?php
|
|
/**
|
|
* PHP port of the Python grey-wash detector used to find Smush-mangled JPGs.
|
|
*
|
|
* The detection strategy:
|
|
* 1. Find pixels in the .webp twin where all three RGB channels are
|
|
* >= WEBP_WHITE_RGB_MIN (i.e. should-be-white regions).
|
|
* 2. Sample those same pixel coordinates in the JPG.
|
|
* 3. If the JPG average there is significantly darker AND the stddev is
|
|
* high, classify as `broken` (the Smush grey-wash bug fingerprint).
|
|
*
|
|
* @package H4B\ImageOptim
|
|
*/
|
|
|
|
namespace H4B\ImageOptim;
|
|
|
|
use Imagick;
|
|
use ImagickPixel;
|
|
use ImagickException;
|
|
|
|
if ( ! defined( 'ABSPATH' ) ) {
|
|
exit;
|
|
}
|
|
|
|
final class Rescue_Detector {
|
|
|
|
// Calibrated thresholds (validated against rds.ink + dayboro + tawnytrails)
|
|
public const WEBP_WHITE_RGB_MIN = 248;
|
|
public const JPG_DARKER_THAN_WEBP = 10;
|
|
public const JPG_WHITE_STDDEV_BROKEN = 4.0;
|
|
public const MIN_WHITE_PIXELS = 100;
|
|
|
|
/**
|
|
* @return array{
|
|
* classification:string,
|
|
* severity:float,
|
|
* delta_mean:float,
|
|
* jpg_stddev:float,
|
|
* webp_mean:float,
|
|
* jpg_mean:float,
|
|
* webp_white_pixels:int,
|
|
* error:?string
|
|
* }
|
|
*/
|
|
public static function analyse( string $jpg_path, string $webp_path ): array {
|
|
$result = [
|
|
'classification' => 'unknown',
|
|
'severity' => 0.0,
|
|
'delta_mean' => 0.0,
|
|
'jpg_stddev' => 0.0,
|
|
'webp_mean' => 0.0,
|
|
'jpg_mean' => 0.0,
|
|
'webp_white_pixels' => 0,
|
|
'error' => null,
|
|
];
|
|
|
|
if ( ! is_readable( $jpg_path ) ) {
|
|
$result['classification'] = 'error';
|
|
$result['error'] = 'jpg_not_readable';
|
|
return $result;
|
|
}
|
|
if ( ! is_readable( $webp_path ) ) {
|
|
$result['classification'] = 'no_webp';
|
|
return $result;
|
|
}
|
|
|
|
try {
|
|
$jpg = new Imagick( $jpg_path );
|
|
$webp = new Imagick( $webp_path );
|
|
} catch ( ImagickException $e ) {
|
|
$result['classification'] = 'error';
|
|
$result['error'] = 'imagick_open: ' . $e->getMessage();
|
|
return $result;
|
|
}
|
|
|
|
// Resize webp to match jpg if needed (handles minor dimension drift)
|
|
$jw = $jpg->getImageWidth();
|
|
$jh = $jpg->getImageHeight();
|
|
if ( $webp->getImageWidth() !== $jw || $webp->getImageHeight() !== $jh ) {
|
|
$webp->resizeImage( $jw, $jh, Imagick::FILTER_LANCZOS, 1.0 );
|
|
}
|
|
|
|
// Pull raw pixel data as RGB bytes
|
|
try {
|
|
$jpg_bytes = $jpg->exportImagePixels( 0, 0, $jw, $jh, 'RGB', Imagick::PIXEL_CHAR );
|
|
$webp_bytes = $webp->exportImagePixels( 0, 0, $jw, $jh, 'RGB', Imagick::PIXEL_CHAR );
|
|
} catch ( ImagickException $e ) {
|
|
$jpg->clear(); $webp->clear();
|
|
$result['classification'] = 'error';
|
|
$result['error'] = 'pixel_export: ' . $e->getMessage();
|
|
return $result;
|
|
}
|
|
$jpg->clear();
|
|
$webp->clear();
|
|
|
|
$n_pixels = $jw * $jh;
|
|
$threshold = self::WEBP_WHITE_RGB_MIN;
|
|
|
|
$webp_white_count = 0;
|
|
$jpg_sum = 0.0;
|
|
$webp_sum = 0.0;
|
|
$jpg_sum_sq = 0.0;
|
|
|
|
// Iterate; PHP arrays from exportImagePixels are 0-indexed integers
|
|
for ( $i = 0; $i < $n_pixels; $i++ ) {
|
|
$base = $i * 3;
|
|
$wr = $webp_bytes[ $base ];
|
|
$wg = $webp_bytes[ $base + 1 ];
|
|
$wb = $webp_bytes[ $base + 2 ];
|
|
if ( $wr < $threshold || $wg < $threshold || $wb < $threshold ) {
|
|
continue;
|
|
}
|
|
$jr = $jpg_bytes[ $base ];
|
|
$jg = $jpg_bytes[ $base + 1 ];
|
|
$jb = $jpg_bytes[ $base + 2 ];
|
|
$jpg_mean_pixel = ( $jr + $jg + $jb ) / 3.0;
|
|
$webp_mean_pixel = ( $wr + $wg + $wb ) / 3.0;
|
|
|
|
$webp_white_count++;
|
|
$jpg_sum += $jpg_mean_pixel;
|
|
$webp_sum += $webp_mean_pixel;
|
|
$jpg_sum_sq += $jpg_mean_pixel * $jpg_mean_pixel;
|
|
}
|
|
|
|
$result['webp_white_pixels'] = $webp_white_count;
|
|
|
|
if ( $webp_white_count < self::MIN_WHITE_PIXELS ) {
|
|
$result['classification'] = 'no_white_area';
|
|
return $result;
|
|
}
|
|
|
|
$jpg_mean = $jpg_sum / $webp_white_count;
|
|
$webp_mean = $webp_sum / $webp_white_count;
|
|
$variance = ( $jpg_sum_sq / $webp_white_count ) - ( $jpg_mean * $jpg_mean );
|
|
$jpg_stddev = $variance > 0 ? sqrt( $variance ) : 0.0;
|
|
|
|
$result['jpg_mean'] = $jpg_mean;
|
|
$result['webp_mean'] = $webp_mean;
|
|
$result['delta_mean'] = $webp_mean - $jpg_mean;
|
|
$result['jpg_stddev'] = $jpg_stddev;
|
|
|
|
$is_darker = $result['delta_mean'] > self::JPG_DARKER_THAN_WEBP;
|
|
$is_mottled = $jpg_stddev > self::JPG_WHITE_STDDEV_BROKEN;
|
|
|
|
if ( $is_darker && $is_mottled ) {
|
|
$result['classification'] = 'broken';
|
|
$result['severity'] = $result['delta_mean']
|
|
* ( $jpg_stddev / self::JPG_WHITE_STDDEV_BROKEN );
|
|
} elseif ( $is_darker ) {
|
|
$result['classification'] = 'uniformly_darker';
|
|
$result['severity'] = $result['delta_mean'];
|
|
} elseif ( $is_mottled ) {
|
|
$result['classification'] = 'mottled_but_correct_brightness';
|
|
$result['severity'] = $jpg_stddev;
|
|
} else {
|
|
$result['classification'] = 'clean';
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
}
|