Files
h4b-image-optim/includes/class-rescue-detector.php
Henk 7e1c86f215 feat: initial v0.1.0 MVP
Replaces Smush Pro's optimisation pipeline without the grey-wash bug.

CLI commands working:
  wp h4b-img status
  wp h4b-img optimise --id=<n>
  wp h4b-img bulk
  wp h4b-img rescue

Verified on dev.rds.ink:
- ICC profile preservation works (the Smush-bug fix)
- Bulk: 20 attachments → 487 KB saved (10.4%), 0 errors
- Rescue: end-to-end mechanism verified on WorkingAsOne_horse fixture
- WebP synchronous, AVIF queued via WP-Cron
- Originals backed up to wp-content/h4b-img-originals/

See CHANGELOG.md for details + ../DESIGN-h4b-image-optim.md for architecture.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 13:41:03 +10:00

162 lines
4.7 KiB
PHP

<?php
/**
* PHP port of the Python grey-wash detector used to find Smush-mangled JPGs.
*
* The detection strategy:
* 1. Find pixels in the .webp twin where all three RGB channels are
* >= WEBP_WHITE_RGB_MIN (i.e. should-be-white regions).
* 2. Sample those same pixel coordinates in the JPG.
* 3. If the JPG average there is significantly darker AND the stddev is
* high, classify as `broken` (the Smush grey-wash bug fingerprint).
*
* @package H4B\ImageOptim
*/
namespace H4B\ImageOptim;
use Imagick;
use ImagickPixel;
use ImagickException;
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
final class Rescue_Detector {
// Calibrated thresholds (validated against rds.ink + dayboro + tawnytrails)
public const WEBP_WHITE_RGB_MIN = 248;
public const JPG_DARKER_THAN_WEBP = 10;
public const JPG_WHITE_STDDEV_BROKEN = 4.0;
public const MIN_WHITE_PIXELS = 100;
/**
* @return array{
* classification:string,
* severity:float,
* delta_mean:float,
* jpg_stddev:float,
* webp_mean:float,
* jpg_mean:float,
* webp_white_pixels:int,
* error:?string
* }
*/
public static function analyse( string $jpg_path, string $webp_path ): array {
$result = [
'classification' => 'unknown',
'severity' => 0.0,
'delta_mean' => 0.0,
'jpg_stddev' => 0.0,
'webp_mean' => 0.0,
'jpg_mean' => 0.0,
'webp_white_pixels' => 0,
'error' => null,
];
if ( ! is_readable( $jpg_path ) ) {
$result['classification'] = 'error';
$result['error'] = 'jpg_not_readable';
return $result;
}
if ( ! is_readable( $webp_path ) ) {
$result['classification'] = 'no_webp';
return $result;
}
try {
$jpg = new Imagick( $jpg_path );
$webp = new Imagick( $webp_path );
} catch ( ImagickException $e ) {
$result['classification'] = 'error';
$result['error'] = 'imagick_open: ' . $e->getMessage();
return $result;
}
// Resize webp to match jpg if needed (handles minor dimension drift)
$jw = $jpg->getImageWidth();
$jh = $jpg->getImageHeight();
if ( $webp->getImageWidth() !== $jw || $webp->getImageHeight() !== $jh ) {
$webp->resizeImage( $jw, $jh, Imagick::FILTER_LANCZOS, 1.0 );
}
// Pull raw pixel data as RGB bytes
try {
$jpg_bytes = $jpg->exportImagePixels( 0, 0, $jw, $jh, 'RGB', Imagick::PIXEL_CHAR );
$webp_bytes = $webp->exportImagePixels( 0, 0, $jw, $jh, 'RGB', Imagick::PIXEL_CHAR );
} catch ( ImagickException $e ) {
$jpg->clear(); $webp->clear();
$result['classification'] = 'error';
$result['error'] = 'pixel_export: ' . $e->getMessage();
return $result;
}
$jpg->clear();
$webp->clear();
$n_pixels = $jw * $jh;
$threshold = self::WEBP_WHITE_RGB_MIN;
$webp_white_count = 0;
$jpg_sum = 0.0;
$webp_sum = 0.0;
$jpg_sum_sq = 0.0;
// Iterate; PHP arrays from exportImagePixels are 0-indexed integers
for ( $i = 0; $i < $n_pixels; $i++ ) {
$base = $i * 3;
$wr = $webp_bytes[ $base ];
$wg = $webp_bytes[ $base + 1 ];
$wb = $webp_bytes[ $base + 2 ];
if ( $wr < $threshold || $wg < $threshold || $wb < $threshold ) {
continue;
}
$jr = $jpg_bytes[ $base ];
$jg = $jpg_bytes[ $base + 1 ];
$jb = $jpg_bytes[ $base + 2 ];
$jpg_mean_pixel = ( $jr + $jg + $jb ) / 3.0;
$webp_mean_pixel = ( $wr + $wg + $wb ) / 3.0;
$webp_white_count++;
$jpg_sum += $jpg_mean_pixel;
$webp_sum += $webp_mean_pixel;
$jpg_sum_sq += $jpg_mean_pixel * $jpg_mean_pixel;
}
$result['webp_white_pixels'] = $webp_white_count;
if ( $webp_white_count < self::MIN_WHITE_PIXELS ) {
$result['classification'] = 'no_white_area';
return $result;
}
$jpg_mean = $jpg_sum / $webp_white_count;
$webp_mean = $webp_sum / $webp_white_count;
$variance = ( $jpg_sum_sq / $webp_white_count ) - ( $jpg_mean * $jpg_mean );
$jpg_stddev = $variance > 0 ? sqrt( $variance ) : 0.0;
$result['jpg_mean'] = $jpg_mean;
$result['webp_mean'] = $webp_mean;
$result['delta_mean'] = $webp_mean - $jpg_mean;
$result['jpg_stddev'] = $jpg_stddev;
$is_darker = $result['delta_mean'] > self::JPG_DARKER_THAN_WEBP;
$is_mottled = $jpg_stddev > self::JPG_WHITE_STDDEV_BROKEN;
if ( $is_darker && $is_mottled ) {
$result['classification'] = 'broken';
$result['severity'] = $result['delta_mean']
* ( $jpg_stddev / self::JPG_WHITE_STDDEV_BROKEN );
} elseif ( $is_darker ) {
$result['classification'] = 'uniformly_darker';
$result['severity'] = $result['delta_mean'];
} elseif ( $is_mottled ) {
$result['classification'] = 'mottled_but_correct_brightness';
$result['severity'] = $jpg_stddev;
} else {
$result['classification'] = 'clean';
}
return $result;
}
}