Skip to content

Commit

Permalink
Privacy-on-Beam: support for quantiles, C++ accounting: fast delta
Browse files Browse the repository at this point in the history
C++ DP Lib:
  * Migrated to absl::Status

Privacy-on-Beam:
  * Implement QuantilesPerKey along with pbeamtest support
  * Updated dependencies

C++ accounting library:
  * Fast computation of delta without convolution

GitOrigin-RevId: 9faba9cd2873465e0f52bc2f9304dd9fa27e6998
Change-Id: I908cb6281e0316be82ae7264a6a79dc3ac126f03
  • Loading branch information
Differential Privacy Team authored and dibakch committed Apr 21, 2021
1 parent 2d07a0c commit 68bdbb2
Show file tree
Hide file tree
Showing 26 changed files with 2,195 additions and 173 deletions.
57 changes: 55 additions & 2 deletions cc/accounting/privacy_loss_distribution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "absl/strings/str_format.h"
#include "accounting/common/common.h"
#include "accounting/convolution.h"
#include "base/status_macros.h"

namespace differential_privacy {
namespace accounting {
Expand Down Expand Up @@ -206,8 +207,8 @@ PrivacyLossDistribution::CreateForPrivacyParameters(
discretization_interval, /*infinity_mass=*/delta, rounded_pmf));
}

absl::Status PrivacyLossDistribution::Compose(
const PrivacyLossDistribution& other_pld, double tail_mass_truncation) {
absl::Status PrivacyLossDistribution::ValidateComposition(
const PrivacyLossDistribution& other_pld) const {
if (other_pld.DiscretizationInterval() != discretization_interval_) {
return absl::InvalidArgumentError(absl::StrFormat(
"Cannot compose, discretization intervals are different "
Expand All @@ -220,6 +221,13 @@ absl::Status PrivacyLossDistribution::Compose(
"Cannot compose, estimate types are different");
}

return absl::OkStatus();
}

absl::Status PrivacyLossDistribution::Compose(
const PrivacyLossDistribution& other_pld, double tail_mass_truncation) {
RETURN_IF_ERROR(ValidateComposition(other_pld));

double new_infinity_mass = infinity_mass_ + other_pld.InfinityMass() -
infinity_mass_ * other_pld.InfinityMass();

Expand All @@ -237,6 +245,51 @@ absl::Status PrivacyLossDistribution::Compose(
return absl::OkStatus();
}

base::StatusOr<double>
PrivacyLossDistribution::GetDeltaForEpsilonForComposedPLD(
const PrivacyLossDistribution& other_pld, double epsilon) const {
RETURN_IF_ERROR(ValidateComposition(other_pld));

UnpackedProbabilityMassFunction this_pmf =
UnpackProbabilityMassFunction(probability_mass_function_);
UnpackedProbabilityMassFunction other_pmf =
UnpackProbabilityMassFunction(other_pld.probability_mass_function_);

// Compute the hockey stick divergence using equation (2) in the
// supplementary material. other_cumulative_upper_mass below represents the
// summation in equation (3) and other_cumulative_lower_mass represents the
// summation in equation (4).

double other_cumulative_upper_mass = 0;
double other_cumulative_lower_mass = 0;
int current_idx = other_pmf.items.size() - 1;
double delta = 0;

for (int this_idx = 0; this_idx < this_pmf.items.size(); ++this_idx) {
double this_privacy_loss =
discretization_interval_ * (this_idx + this_pmf.min_key);
double this_probability_mass = this_pmf.items[this_idx];
while (current_idx >= 0) {
double other_privacy_loss = other_pld.discretization_interval_ *
(current_idx + other_pmf.min_key);
if (other_privacy_loss + this_privacy_loss <= epsilon) break;
other_cumulative_upper_mass += other_pmf.items[current_idx];
other_cumulative_lower_mass +=
other_pmf.items[current_idx] / std::exp(other_privacy_loss);
--current_idx;
}
delta += this_probability_mass * (other_cumulative_upper_mass -
std::exp(epsilon - this_privacy_loss) *
other_cumulative_lower_mass);
}

// The probability that the composed privacy loss is infinite.
double composed_infinity_mass = infinity_mass_ + other_pld.InfinityMass() -
infinity_mass_ * other_pld.InfinityMass();

return delta + composed_infinity_mass;
}

void PrivacyLossDistribution::Compose(int num_times) {
double new_infinity_mass = 1 - pow((1 - infinity_mass_), num_times);

Expand Down
18 changes: 16 additions & 2 deletions cc/accounting/privacy_loss_distribution.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,27 @@ class PrivacyLossDistribution {
// Observation 1 in the supplementary material.)
double GetEpsilonForDelta(double delta) const;

// Composes other PLD into itself. The discretization intervals should be
// the same otherwise failure status is returned. Additional parameter:
// Validates that a given PLD can be composed with this PLD. The
// discretization intervals and the estimate types should be the same;
// otherwise failure status is returned.
absl::Status ValidateComposition(
const PrivacyLossDistribution& other_pld) const;

// Composes other PLD into itself. Additional parameter:
// tail_mass_truncation: an upper bound on the tails of the probability
// mass of the PLD that might be truncated.
absl::Status Compose(const PrivacyLossDistribution& other_pld,
double tail_mass_truncation = 1e-15);

// Computes delta for given epsilon for the result of composing this PLD and a
// given PLD. Note that this function does not modify the current PLD.
//
// The output of this function should be the same as first composing this PLD
// and other_pld, and then call GetEpsilonForDelta on the resulting
// PLD. The main advantage is that this function is faster.
base::StatusOr<double> GetDeltaForEpsilonForComposedPLD(
const PrivacyLossDistribution& other_pld, double epsilon) const;

// Composes PLD into itself num_times.
void Compose(int num_times);

Expand Down
46 changes: 39 additions & 7 deletions cc/accounting/privacy_loss_distribution_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,25 @@ TEST(PrivacyLossDistributionTest, Compose) {
EXPECT_FALSE(pld->Pmf().empty());
}

TEST(PrivacyLossDistributionTest, GetDeltaForEpsilonForComposedPLD) {
ProbabilityMassFunction pmf = {{0, 0.1}, {1, 0.7}, {2, 0.1}};
std::unique_ptr<PrivacyLossDistribution> pld =
PrivacyLossDistributionTestPeer::Create(pmf,
/*infinity_mass=*/0.1,
/*discretization_interval=*/0.4);

ProbabilityMassFunction pmf_other = {{1, 0.1}, {2, 0.6}, {3, 0.25}};
std::unique_ptr<PrivacyLossDistribution> pld_other =
PrivacyLossDistributionTestPeer::Create(pmf_other,
/*infinity_mass=*/0.05,
/*discretization_interval=*/0.4);

base::StatusOr<double> delta =
pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1.1);
ASSERT_OK(delta);
EXPECT_THAT(*delta, DoubleNear(0.2956, kMaxError));
}

TEST(PrivacyLossDistributionTest, ComposeTruncation) {
ProbabilityMassFunction pmf = {{0, 0.1}, {1, 0.7}, {2, 0.1}};
std::unique_ptr<PrivacyLossDistribution> pld =
Expand Down Expand Up @@ -209,10 +228,16 @@ TEST(PrivacyLossDistributionTest,
std::unique_ptr<PrivacyLossDistribution> pld_other =
PrivacyLossDistributionTestPeer::Create(pmf, 0.3, 2e-4);

EXPECT_THAT(pld->Compose(*pld_other),
StatusIs(absl::InvalidArgumentError("").code(),
HasSubstr("Cannot compose, discretization intervals "
"are different - 0.000200 vs 0.000100")));
std::string error_msg = "discretization interval";
EXPECT_THAT(
pld->ValidateComposition(*pld_other),
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
EXPECT_THAT(
pld->Compose(*pld_other),
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
EXPECT_THAT(
pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1),
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
}

TEST(PrivacyLossDistributionTest, ComposeErrorDifferentEstimateTypes) {
Expand All @@ -227,9 +252,16 @@ TEST(PrivacyLossDistributionTest, ComposeErrorDifferentEstimateTypes) {
pmf, /*infinity_mass=*/0.3, /*discretization_interval=*/1e-4,
/*estimate_type=*/EstimateType::kOptimistic);

EXPECT_THAT(pld->Compose(*pld_other),
StatusIs(absl::StatusCode::kInvalidArgument,
Eq("Cannot compose, estimate types are different")));
std::string error_msg = "estimate type";
EXPECT_THAT(
pld->ValidateComposition(*pld_other),
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
EXPECT_THAT(
pld->Compose(*pld_other),
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
EXPECT_THAT(
pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1),
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
}

struct GetEpsilonFromDeltaParam {
Expand Down
1 change: 1 addition & 0 deletions cc/algorithms/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ cc_library(
"//base:logging",
"@boringssl//:crypto",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/numeric:bits",
"@com_google_absl//absl/synchronization",
],
)
Expand Down
15 changes: 3 additions & 12 deletions cc/algorithms/rand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,12 @@
#include <limits>

#include "base/logging.h"
#include "absl/numeric/bits.h"
#include "absl/synchronization/mutex.h"
#include "openssl/rand.h"

namespace differential_privacy {
namespace {
// From absl/base/internal/bits.h.
int CountLeadingZeros64Slow(uint64_t n) {
int zeroes = 60;
if (n >> 32) zeroes -= 32, n >>= 32;
if (n >> 16) zeroes -= 16, n >>= 16;
if (n >> 8) zeroes -= 8, n >>= 8;
if (n >> 4) zeroes -= 4, n >>= 4;
return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
}

// We usually expect DBL_MANT_DIG to be 53.
static_assert(DBL_MANT_DIG < 64,
"Double mantissa must have less than 64 bits.");
Expand All @@ -59,7 +50,7 @@ double UniformDouble() {
uint64_t j = uint_64_number >> kMantDigits;

// exponent is the number of leading zeros in the first 11 bits plus one.
uint64_t exponent = CountLeadingZeros64Slow(j) - kMantDigits + 1;
uint64_t exponent = absl::countl_zero(j) - kMantDigits + 1;

// Extra geometric sampling is needed only when the leading 11 bits are all 0.
if (j == 0) {
Expand All @@ -84,7 +75,7 @@ uint64_t Geometric() {
uint64_t r = 0;
while (r == 0 && result < 1023) {
r = SecureURBG::GetSingleton()();
result += CountLeadingZeros64Slow(r);
result += absl::countl_zero(r);
}
return result;
}
Expand Down
6 changes: 3 additions & 3 deletions cc/cc_differential_privacy_deps.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def cc_differential_privacy_deps():
# Abseil
http_archive(
name = "com_google_absl",
url = "https://github.com/abseil/abseil-cpp/archive/20200923.3.tar.gz",
sha256 = "ebe2ad1480d27383e4bf4211e2ca2ef312d5e6a09eba869fd2e8a5c5d553ded2",
strip_prefix = "abseil-cpp-20200923.3",
url = "https://github.com/abseil/abseil-cpp/archive/20210324.0.tar.gz",
sha256 = "dd7db6815204c2a62a2160e32c55e97113b0a0178b2f090d6bab5ce36111db4b",
strip_prefix = "abseil-cpp-20210324.0",
)

# Common bazel rules
Expand Down
2 changes: 1 addition & 1 deletion cc/docs/algorithms/algorithm.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ an error.

```
Summary Serialize();
util::Status Merge(const Summary& summary);
absl::Status Merge(const Summary& summary);
```

Serialization and merging can allow these algorithms to be used in a distributed
Expand Down
4 changes: 2 additions & 2 deletions cc/postgres/postgres.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

licenses(["notice"]) # Apache v2.0

load("@rules_foreign_cc//tools/build_defs:configure.bzl", "configure_make")
load("@rules_foreign_cc//foreign_cc:configure.bzl", "configure_make")

package(
default_visibility = ["//visibility:public"],
Expand Down Expand Up @@ -50,7 +50,7 @@ configure_make(
"CFLAGS": "-fPIC",
},
}),
headers_only = True,
out_headers_only = True,
lib_source = "@postgres//:all",
)

Expand Down
12 changes: 6 additions & 6 deletions cc/testing/stochastic_tester_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ class NonDpSum : public Algorithm<T> {
void ResetState() override { result_ = 0; }

Summary Serialize() const override { return Summary(); }
base::Status Merge(const Summary& summary) override {
return base::OkStatus();
absl::Status Merge(const Summary& summary) override {
return absl::OkStatus();
}
int64_t MemoryUsed() override { return sizeof(NonDpSum<T>); };

Expand All @@ -74,8 +74,8 @@ class NonDpCount : public Algorithm<T> {
void ResetState() override { result_ = 0; }

Summary Serialize() const override { return Summary(); }
base::Status Merge(const Summary& summary) override {
return base::OkStatus();
absl::Status Merge(const Summary& summary) override {
return absl::OkStatus();
}
int64_t MemoryUsed() override { return sizeof(NonDpCount<T>); };

Expand Down Expand Up @@ -166,8 +166,8 @@ class AlwaysError : public Algorithm<T> {
void ResetState() override {}

Summary Serialize() const override { return Summary(); }
base::Status Merge(const Summary& summary) override {
return base::OkStatus();
absl::Status Merge(const Summary& summary) override {
return absl::OkStatus();
}
int64_t MemoryUsed() override { return sizeof(AlwaysError<T>); };

Expand Down
20 changes: 11 additions & 9 deletions go/dpagg/quantiles.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ import (
"github.com/google/differential-privacy/go/noise"
)

// Constants used for QuantileTrees.
const (
numericalTolerance = 1e-6
defaultTreeHeight = 4
defaultBranchingFactor = 16
DefaultTreeHeight = 4
DefaultBranchingFactor = 16
rootIndex = 0
// Fraction a node needs to contribute to the total count of itself and its siblings to be
// considered during the search for a particular quantile. The idea of alpha is to filter out
Expand Down Expand Up @@ -61,7 +62,7 @@ type BoundedQuantiles struct {
branchingFactor int
l0Sensitivity int64
lInfSensitivity float64
noise noise.Noise
Noise noise.Noise
noiseKind noise.Kind // necessary for serializing noise.Noise information

// State variables
Expand Down Expand Up @@ -128,15 +129,15 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles {
// Check tree height and branching factor, set defaults if not specified, and use them to compute numLeaves and leftmostLeafIndex.
treeHeight := opt.TreeHeight
if treeHeight == 0 {
treeHeight = defaultTreeHeight
treeHeight = DefaultTreeHeight
}
if err := checks.CheckTreeHeight("NewBoundedQuantiles", treeHeight); err != nil {
// TODO: do not exit the program from within library code
log.Fatalf("CheckTreeHeight failed with %v", err)
}
branchingFactor := opt.BranchingFactor
if branchingFactor == 0 {
branchingFactor = defaultBranchingFactor
branchingFactor = DefaultBranchingFactor
}
if err := checks.CheckBranchingFactor("NewBoundedQuantiles", branchingFactor); err != nil {
// TODO: do not exit the program from within library code
Expand Down Expand Up @@ -171,7 +172,7 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles {
branchingFactor: branchingFactor,
l0Sensitivity: l0Sensitivity,
lInfSensitivity: lInfSensitivity,
noise: n,
Noise: n,
noiseKind: noise.ToKind(n),
tree: make(map[int]int64),
noisedTree: make(map[int]float64),
Expand Down Expand Up @@ -328,7 +329,7 @@ func (bq *BoundedQuantiles) getNoisedCount(index int) float64 {
return noisedCount
}
rawCount := bq.tree[index]
noisedCount := bq.noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta)
noisedCount := bq.Noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta)
bq.noisedTree[index] = noisedCount
return noisedCount
}
Expand Down Expand Up @@ -359,6 +360,7 @@ func (bq *BoundedQuantiles) Merge(bq2 *BoundedQuantiles) {
for index, count := range bq2.tree {
bq.tree[index] += count
}
bq2.state = merged
}

func checkMergeBoundedQuantiles(bq1, bq2 *BoundedQuantiles) error {
Expand Down Expand Up @@ -421,7 +423,7 @@ func (bq *BoundedQuantiles) GobEncode() ([]byte, error) {
Upper: bq.upper,
NumLeaves: bq.numLeaves,
LeftmostLeafIndex: bq.leftmostLeafIndex,
NoiseKind: noise.ToKind(bq.noise),
NoiseKind: noise.ToKind(bq.Noise),
QuantileTree: bq.tree,
}
bq.state = serialized
Expand All @@ -446,7 +448,7 @@ func (bq *BoundedQuantiles) GobDecode(data []byte) error {
lower: enc.Lower,
upper: enc.Upper,
noiseKind: enc.NoiseKind,
noise: noise.ToNoise(enc.NoiseKind),
Noise: noise.ToNoise(enc.NoiseKind),
numLeaves: enc.NumLeaves,
leftmostLeafIndex: enc.LeftmostLeafIndex,
tree: enc.QuantileTree,
Expand Down
Loading

0 comments on commit 68bdbb2

Please sign in to comment.