Sky Optimization的GuidedFilterUpsample

Guided Filter from SkyOptimization https://github.com/google/sky-optimization

import numpy as np
import cv2

class GuidedFilter:
    """
    Guided Filter from SkyOptimization https://github.com/google/sky-optimization
    """
    def __init__(self,
                 compute_confidence=True):
        self.compute_confidence = compute_confidence

    def inference(self,
                  reference,
                  source,
                  kernel_size,
                  confidence=None,
                  eps_luma=1e-2,
                  eps_chroma=1e-2,
                  clip_output=True):
        assert reference.shape[2] == 3

        if np.any(np.array(source.shape) < np.array(reference.shape[:2])):
            source = self.resize(source, reference.shape[:2])
        if confidence is None:
            confidence = self.probability_to_confidence(source)
        assert confidence.shape == source.shape

        reference_small = self.weighted_downsample(reference, confidence, kernel_size)
        small_shape = reference_small.shape[:2]
        source_small = self.weighted_downsample(source, confidence, target_size=small_shape)

        outer_reference = self.outer_product_images(reference, reference)
        outer_reference = self.weighted_downsample(outer_reference, confidence, target_size=small_shape)
        covar = outer_reference - self.outer_product_images(reference_small, reference_small)
        var = self.weighted_downsample(
            reference * source[..., np.newaxis], confidence, target_size=small_shape
        )
        residual_small = var - reference_small * source_small[..., np.newaxis]
        covar[..., 0] += eps_luma ** 2
        covar[..., [3, 5]] += eps_chroma ** 2

        affine = self.solve_image_ldl3(covar, residual_small)
        residual = source_small - (affine * reference_small).sum(axis=2)
        affine = self.smooth_upsample(affine, reference.shape[:2])
        residual = self.smooth_upsample(residual, reference.shape[:2])
        output = (affine * reference).sum(axis=2) + residual
        if clip_output:
            output = output.clip(0, 1)
        return output

    def bias(self, x, b=0.8):
        denom = ((1 / b) - 2) * (1 - x) + 1
        return x / denom

    def probability_to_confidence(self, probabilty, low_thresh=0.3, high_thresh=0.5):
        eps = 0.01
        low = probabilty < low_thresh
        high = probabilty > high_thresh
        confidence_low = self.bias((low_thresh - probabilty[low]) / low_thresh)
        confidence_high = self.bias((probabilty[high] - high_thresh) / (1 - high_thresh))
        confidence = np.zeros_like(probabilty)
        confidence[low] = confidence_low
        confidence[high] = confidence_high
        confidence = np.maximum(eps, confidence)
        return confidence

    def downsample2_antialiased(self, X):
        kernel = np.array([1, 3, 3, 1]) / 8
        dst = cv2.sepFilter2D(X, -1, kernel, kernel, anchor=(1, 1), borderType=cv2.BORDER_REPLICATE)
        return dst[::2, ::2]

    def resize_helper(self, X, shape):
        X = X.squeeze()
        while np.all(np.array(X.shape[:2]) >= np.array(shape) * 2):
            X = self.downsample2_antialiased(X)
        return cv2.resize(X, dsize=tuple(shape[1::-1]), interpolation=cv2.INTER_LINEAR)

    def resize(self, X, shape):
        if X.ndim == 2 or X.shape[2] <= 4:
            return self.resize_helper(X, shape)
        # opencv doesn't work on more than 4 channels
        X1 = self.resize_helper(X[..., :3], shape)
        X2 = self.resize_helper(X[..., 3:], shape)
        return np.concatenate([X1, X2], axis=2)

    def outer_product_images(self, X, Y):
        assert X.shape[-1] == 3 and Y.shape[-1] == 3
        X_flat = X[..., :, np.newaxis]
        Y_flat = Y[..., np.newaxis, :]

        outer = np.matmul(X_flat, Y_flat)
        ind = np.triu_indices(3)
        outer = outer[..., ind[0], ind[1]]
        return outer.reshape(X.shape[:-1] + (6,))

    def smooth_upsample(self, X, size, num_steps=None):
        if num_steps is None:
            log4ratio = np.max(0.5 * np.log2(np.array(size) / X.shape[:2]))
            num_steps = np.maximum(1, log4ratio.round().astype(np.int))
        ratio = np.array(size) / X.shape[:2]
        ratio_per_step = np.array(X.shape[:2]) * ratio / num_steps
        for step in np.arange(1, num_steps + 1):
            target_shape_for_step = np.round(step * ratio_per_step).astype(np.int)
            X = self.resize(X, target_shape_for_step)
        return X

    def solve_image_ldl3(self, A, b):
        A11, A12, A13, A22, A23, A33 = np.split(A, A.shape[-1], axis=-1)
        b1, b2, b3 = np.split(b, b.shape[-1], axis=-1)
        d1 = A11
        L_12 = A12 / d1
        d2 = A22 - L_12 * A12
        L_13 = A13 / d1
        L_23 = (A23 - L_13 * A12) / d2
        d3 = A33 - L_13 * A13 - L_23 * L_23 * d2
        y1 = b1
        y2 = b2 - L_12 * y1
        y3 = b3 - L_13 * y1 - L_23 * y2
        x3 = y3 / d3
        x2 = y2 / d2 - L_23 * x3
        x1 = y1 / d1 - L_12 * x2 - L_13 * x3
        return np.stack([x1, x2, x3], axis=-1).squeeze()

    def weighted_downsample(self, X, confidence, scale=None, target_size=None):
        if target_size is None:
            target_size = (np.array(X.shape[:2]) / scale).round().astype(np.int)
        if X.shape[1] > confidence.shape[1]:
            X = self.resize(X, confidence.shape)
        if X.ndim == 3:
            confidence = confidence[..., np.newaxis]
        numerator = self.resize(X * confidence, target_size)
        denom = self.resize(confidence, target_size)
        if X.ndim == 3:
            denom = denom[..., np.newaxis]
        return numerator / denom

Simple Version

import numpy as np
import cv2

class SimplifiedGuidedFilter:
    """
    Guided Filter from SkyOptimization https://github.com/google/sky-optimization
    Simplify guided filter:
    1. change smooth upsample to bilinear upsample
    2. not add eps to covariance
    """
    def __init__(self,
                 compute_confidence=True):
        self.compute_confidence = compute_confidence

    def inference(self,
                  reference,
                  source,
                  kernel_size,
                  confidence=None,
                  eps_luma=1e-2,
                  eps_chroma=1e-2,
                  clip_output=True):
        assert reference.shape[2] == 3

        if np.any(np.array(source.shape) < np.array(reference.shape[:2])):
            source = self.resize(source, reference.shape[:2])
        if confidence is None:
            confidence = self.probability_to_confidence(source)
        assert confidence.shape == source.shape

        # 1. weighted downsample reference and source
        reference_small = self.weighted_downsample(reference, confidence, kernel_size) # RGB image
        small_shape = reference_small.shape[:2]
        source_small = self.weighted_downsample(source, confidence, target_size=small_shape) # mask

        # 2. compute covariance and variance, weighted downsample
        outer_reference = self.outer_product_images(reference, reference)
        outer_reference = self.weighted_downsample(outer_reference, confidence, target_size=small_shape)
        covar = outer_reference - self.outer_product_images(reference_small, reference_small)

        var = self.weighted_downsample(
            reference * source[..., np.newaxis], confidence, target_size=small_shape
        )
        residual_small = var - reference_small * source_small[..., np.newaxis]
        covar[..., 0] += eps_luma ** 2
        covar[..., [3, 5]] += eps_chroma ** 2

        # 3. LDL decomposition
        affine = self.solve_image_ldl3(covar, residual_small)

        residual = source_small - (affine * reference_small).sum(axis=2)

        # 4. upsample
        affine = self.bilinear_upsample(affine, (reference.shape[1], reference.shape[0]))
        residual = self.bilinear_upsample(residual, (reference.shape[1], reference.shape[0]))

        output = (affine * reference).sum(axis=2) + residual
        if clip_output:
            output = output.clip(0, 1)
        return output

    def bias(self, x, b=0.8):
        denom = ((1 / b) - 2) * (1 - x) + 1
        return x / denom

    def probability_to_confidence(self, probabilty, low_thresh=0.3, high_thresh=0.5):
        eps = 0.01
        low = probabilty < low_thresh
        high = probabilty > high_thresh
        confidence_low = self.bias((low_thresh - probabilty[low]) / low_thresh)
        confidence_high = self.bias((probabilty[high] - high_thresh) / (1 - high_thresh))
        confidence = np.zeros_like(probabilty)
        confidence[low] = confidence_low
        confidence[high] = confidence_high
        confidence = np.maximum(eps, confidence)
        return confidence

    def downsample2_antialiased(self, X):
        kernel = np.array([1, 3, 3, 1]) / 8
        dst = cv2.sepFilter2D(X, -1, kernel, kernel, anchor=(1, 1), borderType=cv2.BORDER_REPLICATE)
        return dst[::2, ::2]

    def resize_helper(self, X, shape):
        X = X.squeeze()
        while np.all(np.array(X.shape[:2]) >= np.array(shape) * 2):
            X = self.downsample2_antialiased(X)
        return cv2.resize(X, dsize=tuple(shape[1::-1]), interpolation=cv2.INTER_LINEAR)

    def resize(self, X, shape):
        if X.ndim == 2 or X.shape[2] <= 4:
            return self.resize_helper(X, shape)
        # opencv doesn't work on more than 4 channels
        X1 = self.resize_helper(X[..., :3], shape)
        X2 = self.resize_helper(X[..., 3:], shape)
        return np.concatenate([X1, X2], axis=2)

    def outer_product_images(self, X, Y):
        assert X.shape[-1] == 3 and Y.shape[-1] == 3
        X_flat = X[..., :, np.newaxis]
        Y_flat = Y[..., np.newaxis, :]

        outer = np.matmul(X_flat, Y_flat)
        ind = np.triu_indices(3)
        outer = outer[..., ind[0], ind[1]]
        return outer.reshape(X.shape[:-1] + (6,))

    def bilinear_upsample(self, X, size):
        X = cv2.resize(X, size, interpolation=cv2.INTER_LINEAR)
        return X

    def solve_image_ldl3(self, A, b):
        A11, A12, A13, A22, A23, A33 = np.split(A, A.shape[-1], axis=-1)
        b1, b2, b3 = np.split(b, b.shape[-1], axis=-1)
        d1 = A11
        L_12 = A12 / d1
        d2 = A22 - L_12 * A12
        L_13 = A13 / d1
        L_23 = (A23 - L_13 * A12) / d2
        d3 = A33 - L_13 * A13 - L_23 * L_23 * d2
        y1 = b1
        y2 = b2 - L_12 * y1
        y3 = b3 - L_13 * y1 - L_23 * y2
        x3 = y3 / d3
        x2 = y2 / d2 - L_23 * x3
        x1 = y1 / d1 - L_12 * x2 - L_13 * x3
        return np.stack([x1, x2, x3], axis=-1).squeeze()

    def weighted_downsample(self, X, confidence, scale=None, target_size=None):
        if target_size is None:
            target_size = (np.array(X.shape[:2]) / scale).round().astype(np.int)
        if X.shape[1] > confidence.shape[1]:
            X = self.resize(X, confidence.shape)
        if X.ndim == 3:
            confidence = confidence[..., np.newaxis]
        numerator = self.resize(X * confidence, target_size)
        denom = self.resize(confidence, target_size)
        if X.ndim == 3:
            denom = denom[..., np.newaxis]
        return numerator / denom

发表评论