Source code for diffprivlib.mechanisms.snapping

"""
The Snapping mechanism in differential privacy, which eliminates a weakness to floating point errors in the classic
Laplace mechanism with standard Laplace sampling.
"""
import struct

import numpy as np
try:
    from crlibm import log_rn  # pylint: disable=no-name-in-module
except ModuleNotFoundError:
    log_rn = np.log

from diffprivlib.mechanisms import LaplaceTruncated



[docs]
class Snapping(LaplaceTruncated):
    r"""
    The Snapping mechanism for differential privacy.

    First proposed by Ilya Mironov [Mir12]_.

    It eliminates a vulnerability stemming from the representation of reals as floating-point numbers in implementations
    of the classic Laplace mechanism and its variants which use the inverse CDF of the Laplace distribution to sample
    it. It causes a high degree of reduction in the granularity of the output.

    For the most faithful implementation of the mechanism, the ``crlibm`` package should be installed.

    Parameters
    ----------
    epsilon : float
        Privacy parameter :math:`\epsilon` for the mechanism.  Must be in [:math:`2 \eta`, ∞], where :math:`\eta` is the
        machine epsilon of the floating point type.

    sensitivity : float
        The sensitivity of the mechanism.  Must be in [0, ∞).

    lower : float
        The lower bound of the mechanism.

    upper : float
        The upper bound of the mechanism.

    random_state : int or RandomState, optional
        Controls the randomness of the mechanism.  To obtain a deterministic behaviour during randomisation,
        ``random_state`` has to be fixed to an integer.

    References
    ----------
    .. [Mir12] Mironov, Ilya. "On significance of the least significant bits for differential privacy." Proceedings of
     the 2012 ACM conference on Computer and communications security (2012).

    """
    def __init__(self, *, epsilon, sensitivity, lower, upper, random_state=None):
        super().__init__(epsilon=epsilon, sensitivity=sensitivity, delta=0.0, lower=lower, upper=upper,
                         random_state=random_state)
        self._bound = self._scale_bound()

    @classmethod
    def _check_epsilon_delta(cls, epsilon, delta):
        epsilon, delta = super()._check_epsilon_delta(epsilon, delta)

        machine_epsilon = np.finfo(float).epsneg
        if epsilon <= 2 * machine_epsilon:
            raise ValueError("Epsilon must be at least as large as twice the machine epsilon for the floating point "
                             "type, as the effective epsilon must be non-negative")

        return epsilon, delta

    def _scale_bound(self):
        """
        Scales the lower and upper bounds to be proportionate to sensitivity 1, and symmetrical about 0.
        For sensitivity 0, only centres the bound, as scaling up and down is not defined.

        Returns
        -------
        float
            A symmetric bound around 0 scaled to sensitivity 1

        """
        if self.sensitivity == 0:
            return (self.upper - self.lower) / 2.0
        return (self.upper - self.lower) / 2.0 / self.sensitivity

    def _truncate(self, value):
        if value > self._bound:
            return self._bound
        if value < -self._bound:
            return -self._bound

        return value

    def bias(self, value):
        raise NotImplementedError

    def variance(self, value):
        raise NotImplementedError


[docs]
    def effective_epsilon(self):
        r"""
        Returns the effective value used in the Snapping mechanism to give the required :math:`\epsilon`-DP, based on
        the bounds and the machine epsilon.
        Based on section 5.2 of [Mir12]_.

        Returns
        -------
        float
            The effective value of :math:`\epsilon`

        """
        machine_epsilon = np.finfo(float).epsneg
        return (self.epsilon - 2 * machine_epsilon) / (1 + 12 * self._bound * machine_epsilon)


    def _scale_and_offset_value(self, value):
        """
        Centre value around 0 with symmetric bound and scale to sensitivity 1

        Parameters
        ----------
        value : float
            value to be scaled
        Returns
        -------
        float
            value offset to be centered on 0 and scaled to sensitivity 1

        """
        value_scaled = value / self.sensitivity
        return value_scaled - self._bound - (self.lower / self.sensitivity)

    def _reverse_scale_and_offset_value(self, value):
        return (value + self._bound) * self.sensitivity + self.lower

    @staticmethod
    def _get_nearest_power_of_2(x):
        def float_to_bits(d):
            s = struct.pack('>d', d)
            return struct.unpack('>q', s)[0]

        def bits_to_float(b):
            s = struct.pack('>q', b)
            return struct.unpack('>d', s)[0]

        bits = float_to_bits(x)
        mantissa_size = np.finfo(float).nmant
        if bits % (1 << mantissa_size) == 0:
            return x
        return bits_to_float(((bits >> mantissa_size) + 1) << mantissa_size)

    def _round_to_nearest_power_of_2(self, value, lambda_):
        """ Performs the rounding step from [Mir12]_ with ties resolved towards +∞

        Parameters
        ----------
        value : float
            Value to be rounded

        Returns
        -------
        float
            Rounded value

        """
        if self.epsilon == float('inf'):  # infinitely small rounding
            return value
        remainder = value % lambda_
        if remainder > lambda_ / 2:
            return value - remainder + lambda_
        if remainder == lambda_ / 2:
            return value + remainder
        return value - remainder

    def _uniform_sampler(self):
        """
        Uniformly sample the full domain of floating-point numbers between (0, 1), rather than only multiples of 2^-53.
        A uniform distribution over D ∩ (0, 1) can be generated by independently sampling an exponent
        from the geometric distribution with parameter .5 and a significand by drawing a uniform string from
        {0, 1}^52 [Mir12]_

        Based on code recipe in Python standard library documentation [Py21]_.

        Returns
        -------
        float
            A value sampled from float in (0, 1) with probability proportional to the size of the infinite-precision
            real interval each float represents

        References
        ----------
        .. [Py21]  The Python Standard Library. "random — Generate pseudo-random numbers", 2021
        https://docs.python.org/3/library/random.html#recipes

        """
        mantissa_size = np.finfo(float).nmant
        mantissa = 1 << mantissa_size | self._getrandbits(mantissa_size)
        exponent = -(mantissa_size + 1)
        x = 0
        while not x:
            x = self._getrandbits(32)
            exponent += x.bit_length() - 32
        return np.ldexp(mantissa, exponent)

    def _getrandbits(self, bits):
        try:
            return self._rng.getrandbits(bits)
        except AttributeError:
            return self._rng.randint(0, 2 ** bits)

    @staticmethod
    def _laplace_sampler(unif_bit, unif):
        r"""
        Laplace inverse CDF random sampling implementation which uses full domain uniform sampling and exact log
        implementation from crlibm (if installed), as mentioned in [Mir12]_.
        Outputs a random value scaled according to privacy budget and sensitivity 1, as bounds and input are scaled to
        sensitivity 1 before Laplacian noise is added.

        Returns
        -------
        float
            Random value from Laplace distribution scaled according to :math:`\epsilon`

        """
        laplace = (-1) ** unif_bit * log_rn(unif)
        return laplace


[docs]
    def randomise(self, value):
        """Randomise `value` with the mechanism.

        Parameters
        ----------
        value : float
            The value to be randomised.

        Returns
        -------
        float
            The randomised value.

        """
        self._check_all(value)
        if self.sensitivity == 0:
            return self._truncate(value)

        value_scaled_offset = self._scale_and_offset_value(value)
        value_clamped = self._truncate(value_scaled_offset)

        scale = 1.0 / self.effective_epsilon()  # everything is already scaled to sensitivity 1
        lambda_ = self._get_nearest_power_of_2(scale)
        laplace = scale * self._laplace_sampler(self._getrandbits(1), self._uniform_sampler())
        value_rounded = self._round_to_nearest_power_of_2(value_clamped + laplace, lambda_)
        return self._reverse_scale_and_offset_value(self._truncate(value_rounded))