diff --git a/cc/accounting/privacy_loss_distribution.cc b/cc/accounting/privacy_loss_distribution.cc index 67a12bab..b5b20415 100644 --- a/cc/accounting/privacy_loss_distribution.cc +++ b/cc/accounting/privacy_loss_distribution.cc @@ -21,6 +21,7 @@ #include "absl/strings/str_format.h" #include "accounting/common/common.h" #include "accounting/convolution.h" +#include "base/status_macros.h" namespace differential_privacy { namespace accounting { @@ -206,8 +207,8 @@ PrivacyLossDistribution::CreateForPrivacyParameters( discretization_interval, /*infinity_mass=*/delta, rounded_pmf)); } -absl::Status PrivacyLossDistribution::Compose( - const PrivacyLossDistribution& other_pld, double tail_mass_truncation) { +absl::Status PrivacyLossDistribution::ValidateComposition( + const PrivacyLossDistribution& other_pld) const { if (other_pld.DiscretizationInterval() != discretization_interval_) { return absl::InvalidArgumentError(absl::StrFormat( "Cannot compose, discretization intervals are different " @@ -220,6 +221,13 @@ absl::Status PrivacyLossDistribution::Compose( "Cannot compose, estimate types are different"); } + return absl::OkStatus(); +} + +absl::Status PrivacyLossDistribution::Compose( + const PrivacyLossDistribution& other_pld, double tail_mass_truncation) { + RETURN_IF_ERROR(ValidateComposition(other_pld)); + double new_infinity_mass = infinity_mass_ + other_pld.InfinityMass() - infinity_mass_ * other_pld.InfinityMass(); @@ -237,6 +245,51 @@ absl::Status PrivacyLossDistribution::Compose( return absl::OkStatus(); } +base::StatusOr +PrivacyLossDistribution::GetDeltaForEpsilonForComposedPLD( + const PrivacyLossDistribution& other_pld, double epsilon) const { + RETURN_IF_ERROR(ValidateComposition(other_pld)); + + UnpackedProbabilityMassFunction this_pmf = + UnpackProbabilityMassFunction(probability_mass_function_); + UnpackedProbabilityMassFunction other_pmf = + UnpackProbabilityMassFunction(other_pld.probability_mass_function_); + + // Compute the hockey stick divergence using equation (2) in the + // supplementary material. other_cumulative_upper_mass below represents the + // summation in equation (3) and other_cumulative_lower_mass represents the + // summation in equation (4). + + double other_cumulative_upper_mass = 0; + double other_cumulative_lower_mass = 0; + int current_idx = other_pmf.items.size() - 1; + double delta = 0; + + for (int this_idx = 0; this_idx < this_pmf.items.size(); ++this_idx) { + double this_privacy_loss = + discretization_interval_ * (this_idx + this_pmf.min_key); + double this_probability_mass = this_pmf.items[this_idx]; + while (current_idx >= 0) { + double other_privacy_loss = other_pld.discretization_interval_ * + (current_idx + other_pmf.min_key); + if (other_privacy_loss + this_privacy_loss <= epsilon) break; + other_cumulative_upper_mass += other_pmf.items[current_idx]; + other_cumulative_lower_mass += + other_pmf.items[current_idx] / std::exp(other_privacy_loss); + --current_idx; + } + delta += this_probability_mass * (other_cumulative_upper_mass - + std::exp(epsilon - this_privacy_loss) * + other_cumulative_lower_mass); + } + + // The probability that the composed privacy loss is infinite. + double composed_infinity_mass = infinity_mass_ + other_pld.InfinityMass() - + infinity_mass_ * other_pld.InfinityMass(); + + return delta + composed_infinity_mass; +} + void PrivacyLossDistribution::Compose(int num_times) { double new_infinity_mass = 1 - pow((1 - infinity_mass_), num_times); diff --git a/cc/accounting/privacy_loss_distribution.h b/cc/accounting/privacy_loss_distribution.h index 0ac375b0..a601feaf 100644 --- a/cc/accounting/privacy_loss_distribution.h +++ b/cc/accounting/privacy_loss_distribution.h @@ -146,13 +146,27 @@ class PrivacyLossDistribution { // Observation 1 in the supplementary material.) double GetEpsilonForDelta(double delta) const; - // Composes other PLD into itself. The discretization intervals should be - // the same otherwise failure status is returned. Additional parameter: + // Validates that a given PLD can be composed with this PLD. The + // discretization intervals and the estimate types should be the same; + // otherwise failure status is returned. + absl::Status ValidateComposition( + const PrivacyLossDistribution& other_pld) const; + + // Composes other PLD into itself. Additional parameter: // tail_mass_truncation: an upper bound on the tails of the probability // mass of the PLD that might be truncated. absl::Status Compose(const PrivacyLossDistribution& other_pld, double tail_mass_truncation = 1e-15); + // Computes delta for given epsilon for the result of composing this PLD and a + // given PLD. Note that this function does not modify the current PLD. + // + // The output of this function should be the same as first composing this PLD + // and other_pld, and then call GetEpsilonForDelta on the resulting + // PLD. The main advantage is that this function is faster. + base::StatusOr GetDeltaForEpsilonForComposedPLD( + const PrivacyLossDistribution& other_pld, double epsilon) const; + // Composes PLD into itself num_times. void Compose(int num_times); diff --git a/cc/accounting/privacy_loss_distribution_test.cc b/cc/accounting/privacy_loss_distribution_test.cc index 937b90c0..a6a257f4 100644 --- a/cc/accounting/privacy_loss_distribution_test.cc +++ b/cc/accounting/privacy_loss_distribution_test.cc @@ -160,6 +160,25 @@ TEST(PrivacyLossDistributionTest, Compose) { EXPECT_FALSE(pld->Pmf().empty()); } +TEST(PrivacyLossDistributionTest, GetDeltaForEpsilonForComposedPLD) { + ProbabilityMassFunction pmf = {{0, 0.1}, {1, 0.7}, {2, 0.1}}; + std::unique_ptr pld = + PrivacyLossDistributionTestPeer::Create(pmf, + /*infinity_mass=*/0.1, + /*discretization_interval=*/0.4); + + ProbabilityMassFunction pmf_other = {{1, 0.1}, {2, 0.6}, {3, 0.25}}; + std::unique_ptr pld_other = + PrivacyLossDistributionTestPeer::Create(pmf_other, + /*infinity_mass=*/0.05, + /*discretization_interval=*/0.4); + + base::StatusOr delta = + pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1.1); + ASSERT_OK(delta); + EXPECT_THAT(*delta, DoubleNear(0.2956, kMaxError)); +} + TEST(PrivacyLossDistributionTest, ComposeTruncation) { ProbabilityMassFunction pmf = {{0, 0.1}, {1, 0.7}, {2, 0.1}}; std::unique_ptr pld = @@ -209,10 +228,16 @@ TEST(PrivacyLossDistributionTest, std::unique_ptr pld_other = PrivacyLossDistributionTestPeer::Create(pmf, 0.3, 2e-4); - EXPECT_THAT(pld->Compose(*pld_other), - StatusIs(absl::InvalidArgumentError("").code(), - HasSubstr("Cannot compose, discretization intervals " - "are different - 0.000200 vs 0.000100"))); + std::string error_msg = "discretization interval"; + EXPECT_THAT( + pld->ValidateComposition(*pld_other), + StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg))); + EXPECT_THAT( + pld->Compose(*pld_other), + StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg))); + EXPECT_THAT( + pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1), + StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg))); } TEST(PrivacyLossDistributionTest, ComposeErrorDifferentEstimateTypes) { @@ -227,9 +252,16 @@ TEST(PrivacyLossDistributionTest, ComposeErrorDifferentEstimateTypes) { pmf, /*infinity_mass=*/0.3, /*discretization_interval=*/1e-4, /*estimate_type=*/EstimateType::kOptimistic); - EXPECT_THAT(pld->Compose(*pld_other), - StatusIs(absl::StatusCode::kInvalidArgument, - Eq("Cannot compose, estimate types are different"))); + std::string error_msg = "estimate type"; + EXPECT_THAT( + pld->ValidateComposition(*pld_other), + StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg))); + EXPECT_THAT( + pld->Compose(*pld_other), + StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg))); + EXPECT_THAT( + pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1), + StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg))); } struct GetEpsilonFromDeltaParam { diff --git a/cc/algorithms/BUILD b/cc/algorithms/BUILD index bbad4890..14499d08 100644 --- a/cc/algorithms/BUILD +++ b/cc/algorithms/BUILD @@ -486,6 +486,7 @@ cc_library( "//base:logging", "@boringssl//:crypto", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/synchronization", ], ) diff --git a/cc/algorithms/rand.cc b/cc/algorithms/rand.cc index 66919b51..aeecd534 100644 --- a/cc/algorithms/rand.cc +++ b/cc/algorithms/rand.cc @@ -23,21 +23,12 @@ #include #include "base/logging.h" +#include "absl/numeric/bits.h" #include "absl/synchronization/mutex.h" #include "openssl/rand.h" namespace differential_privacy { namespace { -// From absl/base/internal/bits.h. -int CountLeadingZeros64Slow(uint64_t n) { - int zeroes = 60; - if (n >> 32) zeroes -= 32, n >>= 32; - if (n >> 16) zeroes -= 16, n >>= 16; - if (n >> 8) zeroes -= 8, n >>= 8; - if (n >> 4) zeroes -= 4, n >>= 4; - return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes; -} - // We usually expect DBL_MANT_DIG to be 53. static_assert(DBL_MANT_DIG < 64, "Double mantissa must have less than 64 bits."); @@ -59,7 +50,7 @@ double UniformDouble() { uint64_t j = uint_64_number >> kMantDigits; // exponent is the number of leading zeros in the first 11 bits plus one. - uint64_t exponent = CountLeadingZeros64Slow(j) - kMantDigits + 1; + uint64_t exponent = absl::countl_zero(j) - kMantDigits + 1; // Extra geometric sampling is needed only when the leading 11 bits are all 0. if (j == 0) { @@ -84,7 +75,7 @@ uint64_t Geometric() { uint64_t r = 0; while (r == 0 && result < 1023) { r = SecureURBG::GetSingleton()(); - result += CountLeadingZeros64Slow(r); + result += absl::countl_zero(r); } return result; } diff --git a/cc/cc_differential_privacy_deps.bzl b/cc/cc_differential_privacy_deps.bzl index d03e7827..5ecf3398 100644 --- a/cc/cc_differential_privacy_deps.bzl +++ b/cc/cc_differential_privacy_deps.bzl @@ -27,9 +27,9 @@ def cc_differential_privacy_deps(): # Abseil http_archive( name = "com_google_absl", - url = "https://github.com/abseil/abseil-cpp/archive/20200923.3.tar.gz", - sha256 = "ebe2ad1480d27383e4bf4211e2ca2ef312d5e6a09eba869fd2e8a5c5d553ded2", - strip_prefix = "abseil-cpp-20200923.3", + url = "https://github.com/abseil/abseil-cpp/archive/20210324.0.tar.gz", + sha256 = "dd7db6815204c2a62a2160e32c55e97113b0a0178b2f090d6bab5ce36111db4b", + strip_prefix = "abseil-cpp-20210324.0", ) # Common bazel rules diff --git a/cc/docs/algorithms/algorithm.md b/cc/docs/algorithms/algorithm.md index 7200118a..7e4ce954 100644 --- a/cc/docs/algorithms/algorithm.md +++ b/cc/docs/algorithms/algorithm.md @@ -110,7 +110,7 @@ an error. ``` Summary Serialize(); -util::Status Merge(const Summary& summary); +absl::Status Merge(const Summary& summary); ``` Serialization and merging can allow these algorithms to be used in a distributed diff --git a/cc/postgres/postgres.BUILD b/cc/postgres/postgres.BUILD index a2a8b4e6..18c90069 100755 --- a/cc/postgres/postgres.BUILD +++ b/cc/postgres/postgres.BUILD @@ -15,7 +15,7 @@ licenses(["notice"]) # Apache v2.0 -load("@rules_foreign_cc//tools/build_defs:configure.bzl", "configure_make") +load("@rules_foreign_cc//foreign_cc:configure.bzl", "configure_make") package( default_visibility = ["//visibility:public"], @@ -50,7 +50,7 @@ configure_make( "CFLAGS": "-fPIC", }, }), - headers_only = True, + out_headers_only = True, lib_source = "@postgres//:all", ) diff --git a/cc/testing/stochastic_tester_test.cc b/cc/testing/stochastic_tester_test.cc index ef6df7ce..d90493a7 100644 --- a/cc/testing/stochastic_tester_test.cc +++ b/cc/testing/stochastic_tester_test.cc @@ -51,8 +51,8 @@ class NonDpSum : public Algorithm { void ResetState() override { result_ = 0; } Summary Serialize() const override { return Summary(); } - base::Status Merge(const Summary& summary) override { - return base::OkStatus(); + absl::Status Merge(const Summary& summary) override { + return absl::OkStatus(); } int64_t MemoryUsed() override { return sizeof(NonDpSum); }; @@ -74,8 +74,8 @@ class NonDpCount : public Algorithm { void ResetState() override { result_ = 0; } Summary Serialize() const override { return Summary(); } - base::Status Merge(const Summary& summary) override { - return base::OkStatus(); + absl::Status Merge(const Summary& summary) override { + return absl::OkStatus(); } int64_t MemoryUsed() override { return sizeof(NonDpCount); }; @@ -166,8 +166,8 @@ class AlwaysError : public Algorithm { void ResetState() override {} Summary Serialize() const override { return Summary(); } - base::Status Merge(const Summary& summary) override { - return base::OkStatus(); + absl::Status Merge(const Summary& summary) override { + return absl::OkStatus(); } int64_t MemoryUsed() override { return sizeof(AlwaysError); }; diff --git a/go/dpagg/quantiles.go b/go/dpagg/quantiles.go index d941d416..8d8dd989 100644 --- a/go/dpagg/quantiles.go +++ b/go/dpagg/quantiles.go @@ -25,10 +25,11 @@ import ( "github.com/google/differential-privacy/go/noise" ) +// Constants used for QuantileTrees. const ( numericalTolerance = 1e-6 - defaultTreeHeight = 4 - defaultBranchingFactor = 16 + DefaultTreeHeight = 4 + DefaultBranchingFactor = 16 rootIndex = 0 // Fraction a node needs to contribute to the total count of itself and its siblings to be // considered during the search for a particular quantile. The idea of alpha is to filter out @@ -61,7 +62,7 @@ type BoundedQuantiles struct { branchingFactor int l0Sensitivity int64 lInfSensitivity float64 - noise noise.Noise + Noise noise.Noise noiseKind noise.Kind // necessary for serializing noise.Noise information // State variables @@ -128,7 +129,7 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles { // Check tree height and branching factor, set defaults if not specified, and use them to compute numLeaves and leftmostLeafIndex. treeHeight := opt.TreeHeight if treeHeight == 0 { - treeHeight = defaultTreeHeight + treeHeight = DefaultTreeHeight } if err := checks.CheckTreeHeight("NewBoundedQuantiles", treeHeight); err != nil { // TODO: do not exit the program from within library code @@ -136,7 +137,7 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles { } branchingFactor := opt.BranchingFactor if branchingFactor == 0 { - branchingFactor = defaultBranchingFactor + branchingFactor = DefaultBranchingFactor } if err := checks.CheckBranchingFactor("NewBoundedQuantiles", branchingFactor); err != nil { // TODO: do not exit the program from within library code @@ -171,7 +172,7 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles { branchingFactor: branchingFactor, l0Sensitivity: l0Sensitivity, lInfSensitivity: lInfSensitivity, - noise: n, + Noise: n, noiseKind: noise.ToKind(n), tree: make(map[int]int64), noisedTree: make(map[int]float64), @@ -328,7 +329,7 @@ func (bq *BoundedQuantiles) getNoisedCount(index int) float64 { return noisedCount } rawCount := bq.tree[index] - noisedCount := bq.noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta) + noisedCount := bq.Noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta) bq.noisedTree[index] = noisedCount return noisedCount } @@ -359,6 +360,7 @@ func (bq *BoundedQuantiles) Merge(bq2 *BoundedQuantiles) { for index, count := range bq2.tree { bq.tree[index] += count } + bq2.state = merged } func checkMergeBoundedQuantiles(bq1, bq2 *BoundedQuantiles) error { @@ -421,7 +423,7 @@ func (bq *BoundedQuantiles) GobEncode() ([]byte, error) { Upper: bq.upper, NumLeaves: bq.numLeaves, LeftmostLeafIndex: bq.leftmostLeafIndex, - NoiseKind: noise.ToKind(bq.noise), + NoiseKind: noise.ToKind(bq.Noise), QuantileTree: bq.tree, } bq.state = serialized @@ -446,7 +448,7 @@ func (bq *BoundedQuantiles) GobDecode(data []byte) error { lower: enc.Lower, upper: enc.Upper, noiseKind: enc.NoiseKind, - noise: noise.ToNoise(enc.NoiseKind), + Noise: noise.ToNoise(enc.NoiseKind), numLeaves: enc.NumLeaves, leftmostLeafIndex: enc.LeftmostLeafIndex, tree: enc.QuantileTree, diff --git a/go/dpagg/quantiles_test.go b/go/dpagg/quantiles_test.go index 053b70e8..68ae371f 100644 --- a/go/dpagg/quantiles_test.go +++ b/go/dpagg/quantiles_test.go @@ -52,7 +52,7 @@ func TestNewBoundedQuantiles(t *testing.T) { upper: 5, l0Sensitivity: 4, // Uses default MaxPartitionsContributed of 1. lInfSensitivity: 2, - noise: noNoise{}, + Noise: noNoise{}, noiseKind: noise.Unrecognised, treeHeight: 4, branchingFactor: 16, @@ -81,7 +81,7 @@ func TestNewBoundedQuantiles(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -107,7 +107,7 @@ func TestNewBoundedQuantiles(t *testing.T) { upper: 5, l0Sensitivity: 4, lInfSensitivity: 2, - noise: noNoise{}, + Noise: noNoise{}, noiseKind: noise.Unrecognised, treeHeight: 4, // Uses default treeHeight of 4. branchingFactor: 16, // Uses default branchingFactor of 16. @@ -134,7 +134,7 @@ func TestBQNoiseIsCorrectlyCalled(t *testing.T) { func TestBQNoInput(t *testing.T) { bq := getNoiselessBQ() got := bq.Result(0.5) - want := 0.0 // When there are no inputs, we linearly extrapolate. + want := 0.0 // When there are no inputs, we linearly interpolate. if !ApproxEqual(got, want) { t.Errorf("Result: when there is no input data got=%f, want=%f", got, want) } @@ -272,7 +272,7 @@ func TestBQInvariantToPreClamping(t *testing.T) { // Tests that Result(rank) increases monotonically with rank even with noise. func TestBQIncreasesMonotonically(t *testing.T) { bq := getNoiselessBQ() - bq.noise = noise.Gaussian() // This property should hold even if noise is added. + bq.Noise = noise.Gaussian() // This property should hold even if noise is added. for _, i := range createEntries() { bq.Add(i) @@ -669,7 +669,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -685,7 +685,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -705,7 +705,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -721,7 +721,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -741,7 +741,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -757,7 +757,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -777,7 +777,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 6, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 3, branchingFactor: 16, numLeaves: 4096, @@ -793,7 +793,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -813,7 +813,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 8, numLeaves: 4096, @@ -829,7 +829,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -849,7 +849,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -865,7 +865,7 @@ func TestBQEquallyInitialized(t *testing.T) { l0Sensitivity: 8, lInfSensitivity: 2, noiseKind: noise.LaplaceNoise, - noise: noise.Laplace(), + Noise: noise.Laplace(), treeHeight: 4, branchingFactor: 16, numLeaves: 65536, @@ -960,7 +960,7 @@ func compareBoundedQuantiles(bq1, bq2 *BoundedQuantiles) bool { bq1.branchingFactor == bq2.branchingFactor && bq1.numLeaves == bq2.numLeaves && bq1.leftmostLeafIndex == bq2.leftmostLeafIndex && - bq1.noise == bq2.noise && + bq1.Noise == bq2.Noise && bq1.noiseKind == bq2.noiseKind && reflect.DeepEqual(bq1.tree, bq2.tree) && reflect.DeepEqual(bq1.noisedTree, bq2.noisedTree) && diff --git a/privacy-on-beam/go.mod b/privacy-on-beam/go.mod index ba17b091..6903ca61 100644 --- a/privacy-on-beam/go.mod +++ b/privacy-on-beam/go.mod @@ -3,9 +3,9 @@ module github.com/google/differential-privacy/privacy-on-beam go 1.16 require ( - github.com/apache/beam v2.22.0+incompatible + github.com/apache/beam v2.25.0+incompatible github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b - github.com/google/go-cmp v0.4.2-0.20200609072101-23a2b5646fe0 - gonum.org/v1/plot v0.7.0 - google.golang.org/protobuf v1.24.1-0.20200612063355-beaa55256c57 -) \ No newline at end of file + github.com/google/go-cmp v0.5.5 + gonum.org/v1/plot v0.8.1 + google.golang.org/protobuf v1.26.0 +) diff --git a/privacy-on-beam/pbeam/BUILD.bazel b/privacy-on-beam/pbeam/BUILD.bazel index d7ca1c5c..c43637fd 100644 --- a/privacy-on-beam/pbeam/BUILD.bazel +++ b/privacy-on-beam/pbeam/BUILD.bazel @@ -32,6 +32,7 @@ go_library( "no_noise.go", "pardo.go", "pbeam.go", + "quantiles.go", "select_partitions.go", "sum.go", ], @@ -70,6 +71,7 @@ go_test( "pardo_test.go", "pbeam_main_test.go", "pbeam_test.go", + "quantiles_test.go", "select_partitions_test.go", "sum_test.go", ], diff --git a/privacy-on-beam/pbeam/aggregations.go b/privacy-on-beam/pbeam/aggregations.go index f4e2a526..43d5e175 100644 --- a/privacy-on-beam/pbeam/aggregations.go +++ b/privacy-on-beam/pbeam/aggregations.go @@ -58,6 +58,7 @@ func init() { beam.RegisterFunction(addDummyValuesToPublicPartitionsFloat64SliceFn) beam.RegisterFunction(dropThresholdedPartitionsInt64Fn) beam.RegisterFunction(dropThresholdedPartitionsFloat64Fn) + beam.RegisterFunction(dropThresholdedPartitionsFloat64SliceFn) // TODO: add tests to make sure we don't forget anything here } @@ -511,6 +512,14 @@ func dropThresholdedPartitionsFloat64Fn(v beam.V, r *float64, emit func(beam.V, } } +// dropThresholdedPartitionsFloat64SliceFn drops thresholded []float64 partitions, i.e. +// those that have nil r, by emitting only non-thresholded partitions. +func dropThresholdedPartitionsFloat64SliceFn(v beam.V, r []float64, emit func(beam.V, []float64)) { + if r != nil { + emit(v, r) + } +} + func findClampNegativePartitionsFn(kind reflect.Kind) interface{} { switch kind { case reflect.Int64: diff --git a/privacy-on-beam/pbeam/coders.go b/privacy-on-beam/pbeam/coders.go index 6c523e9d..bc6a268a 100644 --- a/privacy-on-beam/pbeam/coders.go +++ b/privacy-on-beam/pbeam/coders.go @@ -31,6 +31,7 @@ func init() { beam.RegisterCoder(reflect.TypeOf(boundedSumAccumInt64{}), encodeBoundedSumAccumInt64, decodeBoundedSumAccumInt64) beam.RegisterCoder(reflect.TypeOf(boundedSumAccumFloat64{}), encodeBoundedSumAccumFloat64, decodeBoundedSumAccumFloat64) beam.RegisterCoder(reflect.TypeOf(boundedMeanAccumFloat64{}), encodeBoundedMeanAccumFloat64, decodeBoundedMeanAccumFloat64) + beam.RegisterCoder(reflect.TypeOf(boundedQuantilesAccum{}), encodeBoundedQuantilesAccum, decodeBoundedQuantilesAccum) beam.RegisterCoder(reflect.TypeOf(expandValuesAccum{}), encodeExpandValuesAccum, decodeExpandValuesAccum) beam.RegisterCoder(reflect.TypeOf(partitionSelectionAccum{}), encodePartitionSelectionAccum, decodePartitionSelectionAccum) } @@ -75,6 +76,16 @@ func decodeBoundedMeanAccumFloat64(data []byte) (boundedMeanAccumFloat64, error) return ret, err } +func encodeBoundedQuantilesAccum(v boundedQuantilesAccum) ([]byte, error) { + return encode(v) +} + +func decodeBoundedQuantilesAccum(data []byte) (boundedQuantilesAccum, error) { + var ret boundedQuantilesAccum + err := decode(&ret, data) + return ret, err +} + func encodeExpandValuesAccum(v expandValuesAccum) ([]byte, error) { return encode(v) } diff --git a/privacy-on-beam/pbeam/count_test.go b/privacy-on-beam/pbeam/count_test.go index 178a55d9..7d86701b 100644 --- a/privacy-on-beam/pbeam/count_test.go +++ b/privacy-on-beam/pbeam/count_test.go @@ -98,8 +98,8 @@ func TestCountWithPartitionsNoNoise(t *testing.T) { } } -// Checks that Count is performing a random partition selection. -func TestCountPartitionSelectionNonDeterministic(t *testing.T) { +// Checks that Count applies partition selection. +func TestCountPartitionSelection(t *testing.T) { for _, tc := range []struct { name string noiseKind NoiseKind @@ -167,7 +167,7 @@ func TestCountPartitionSelectionNonDeterministic(t *testing.T) { got := Count(s, pcol, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToInt64Metric, got) - // Validate that partitions are selected randomly (i.e., some emitted and some dropped). + // Validate that partition selection is applied (i.e., some emitted and some dropped). testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions) if err := ptest.Run(p); err != nil { t.Errorf("%v", err) diff --git a/privacy-on-beam/pbeam/distinct_per_key_test.go b/privacy-on-beam/pbeam/distinct_per_key_test.go index 81df2265..73f915ad 100644 --- a/privacy-on-beam/pbeam/distinct_per_key_test.go +++ b/privacy-on-beam/pbeam/distinct_per_key_test.go @@ -234,7 +234,7 @@ func TestDistinctPrivacyKeyPerPartitionContributionBounding(t *testing.T) { } } -var distinctPerKeyPartitionSelectionNonDeterministicTestCases = []struct { +var distinctPerKeyPartitionSelectionTestCases = []struct { name string noiseKind NoiseKind epsilon float64 @@ -273,9 +273,9 @@ var distinctPerKeyPartitionSelectionNonDeterministicTestCases = []struct { }, } -// Checks that DistinctPerKey is performing a random partition selection. -func TestDistinctPerKeyPartitionSelectionNonDeterministicInt(t *testing.T) { - for _, tc := range distinctPerKeyPartitionSelectionNonDeterministicTestCases { +// Checks that DistinctPerKey applies partition selection. +func TestDistinctPerKeyPartitionSelection(t *testing.T) { + for _, tc := range distinctPerKeyPartitionSelectionTestCases { t.Run(tc.name, func(t *testing.T) { // Sanity check that the entriesPerPartition is sensical. if tc.entriesPerPartition <= 0 { @@ -307,7 +307,7 @@ func TestDistinctPerKeyPartitionSelectionNonDeterministicInt(t *testing.T) { got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: int64(tc.numPartitions), NoiseKind: tc.noiseKind, MaxContributionsPerPartition: 1}) got = beam.ParDo(s, testutils.KVToInt64Metric, got) - // Validate that partitions are selected randomly (i.e., some emitted and some dropped). + // Validate that partition selection is applied (i.e., some emitted and some dropped). testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions) if err := ptest.Run(p); err != nil { t.Errorf("%v", err) diff --git a/privacy-on-beam/pbeam/mean_test.go b/privacy-on-beam/pbeam/mean_test.go index 28d90478..2838aa90 100644 --- a/privacy-on-beam/pbeam/mean_test.go +++ b/privacy-on-beam/pbeam/mean_test.go @@ -600,7 +600,7 @@ func TestMeanPerKeyWithPartitionsNoNoiseFloatValues(t *testing.T) { } } -var meanPartitionSelectionNonDeterministicTestCases = []struct { +var meanPartitionSelectionTestCases = []struct { name string noiseKind NoiseKind epsilon float64 @@ -639,9 +639,9 @@ var meanPartitionSelectionNonDeterministicTestCases = []struct { }, } -// Checks that MeanPerKey is performing a random partition selection. -func TestMeanPartitionSelectionNonDeterministic(t *testing.T) { - for _, tc := range meanPartitionSelectionNonDeterministicTestCases { +// Checks that MeanPerKey applies partition selection. +func TestMeanPartitionSelection(t *testing.T) { + for _, tc := range meanPartitionSelectionTestCases { t.Run(tc.name, func(t *testing.T) { // Sanity check that the entriesPerPartition is sensical. if tc.entriesPerPartition <= 0 { @@ -679,7 +679,7 @@ func TestMeanPartitionSelectionNonDeterministic(t *testing.T) { }) got = beam.ParDo(s, testutils.KVToFloat64Metric, got) - // Validate that partitions are selected randomly (i.e., some emitted and some dropped). + // Validate that partition selection is applied (i.e., some emitted and some dropped). testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions) if err := ptest.Run(p); err != nil { t.Errorf("%v", err) diff --git a/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel b/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel index 8a60a9b7..6dbd9ae6 100644 --- a/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel +++ b/privacy-on-beam/pbeam/pbeamtest/BUILD.bazel @@ -29,6 +29,7 @@ go_library( deps = [ "//internal/testoption:go_default_library", "//pbeam:go_default_library", + "@com_google_go_differential_privacy//dpagg:go_default_library", ], ) @@ -42,5 +43,7 @@ go_test( "@com_github_apache_beam//sdks/go/pkg/beam:go_default_library", "@com_github_apache_beam//sdks/go/pkg/beam/testing/ptest:go_default_library", "@com_github_apache_beam//sdks/go/pkg/beam/transforms/stats:go_default_library", + "@com_github_google_go_cmp//cmp:go_default_library", + "@com_github_google_go_cmp//cmp/cmpopts:go_default_library", ], ) diff --git a/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go b/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go index 7eaeec8e..724710c8 100644 --- a/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go +++ b/privacy-on-beam/pbeam/pbeamtest/pbeamtest.go @@ -19,6 +19,9 @@ package pbeamtest import ( + "math" + + "github.com/google/differential-privacy/go/dpagg" "github.com/google/differential-privacy/privacy-on-beam/internal/testoption" "github.com/google/differential-privacy/privacy-on-beam/pbeam" ) @@ -54,3 +57,13 @@ func NewPrivacySpecNoNoiseWithContributionBounding(epsilon, delta float64) *pbea func NewPrivacySpecNoNoiseWithoutContributionBounding(epsilon, delta float64) *pbeam.PrivacySpec { return pbeam.NewPrivacySpec(epsilon, delta, testoption.EnableNoNoiseWithoutContributionBounding{}) } + +// QuantilesTolerance returns a tolerance t such that the output of QuantilesPerKey is +// within t of the exact result for given MinValue and MaxValue parameters of +// QuantilesParams when pbeamtest is used. +// +// Due to the implementation details of Quantiles, it has an inherent (non-DP) noise. So, +// even when we disable DP noise, the results will be still slightly noisy. +func QuantilesTolerance(MinValue, MaxValue float64) float64 { + return (MaxValue - MinValue) / math.Pow(float64(dpagg.DefaultBranchingFactor), float64(dpagg.DefaultTreeHeight)) +} diff --git a/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go b/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go index f9692dad..8dc0f62d 100644 --- a/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go +++ b/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go @@ -24,6 +24,8 @@ import ( "github.com/apache/beam/sdks/go/pkg/beam" "github.com/apache/beam/sdks/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/go/pkg/beam/transforms/stats" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" ) func TestMain(m *testing.M) { @@ -788,8 +790,9 @@ func TestMeanPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) { privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each - // partition is 1.0. Contribution bounding is disabled. The sum of all means must then be 10.0. - // This also ensures that no partitions (each with a single privacy id) gets thresholded. + // partition is 1.0. Cross-partition contribution bounding is disabled. The sum of all means + // must then be 10.0. This also ensures that no partitions (each with a single privacy id) + // gets thresholded. want: 10.0, }, } { @@ -844,7 +847,7 @@ func TestMeanPerKeyTestModePerPartitionContributionBounding(t *testing.T) { maxValue: 50.0, // MaxContributionsPerPartition = 1, but id = 0 contributes 3 times to partition 0. // There will be a per-partition contribution bounding stage. - // In this stage the algorithm will randomly chose one of these 3 contributions. + // In this stage the algorithm will arbitrarily chose one of these 3 contributions. // The mean should be equal to 50/50 = 1.0 (not 150/52 ≈ 2.88, if no per-partition contribution bounding is done). want: 1.0, }, @@ -882,7 +885,7 @@ func TestMeanPerKeyTestModePerPartitionContributionBounding(t *testing.T) { want = beam.ParDo(s, testutils.Float64MetricToKV, want) tolerance := 1e-10 // Using a small tolerance to make up for the rounding errors due to summation & division. if err := testutils.ApproxEqualsKVFloat64(s, got, want, tolerance); err != nil { - t.Fatalf("EqualsKVFloat64: %v", err) + t.Fatalf("ApproxEqualsKVFloat64: %v", err) } if err := ptest.Run(p); err != nil { t.Errorf("MeanPerKey: %s did not do per-partition contribution bounding correctly: %v", tc.desc, err) @@ -913,8 +916,8 @@ func TestMeanPerKeyWithPartitionsTestModeCrossPartitionContributionBounding(t *t privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each - // partition is 1.0. Contribution bounding is disabled and 5 out of 10 partitions are specified - // as public partitions. The sum of all means must then be 5.0. + // partition is 1.0. Cross-partition contribution bounding is disabled and 5 out of 10 partitions + // are specified as public partitions. The sum of all means must then be 5.0. want: 5.0, }, } { @@ -973,7 +976,7 @@ func TestMeanPerKeyWithPartitionsTestModePerPartitionContributionBoundingAddsEmp maxValue: 50.0, // MaxContributionsPerPartition = 1, but id = 0 contributes 3 times to partition 0. // There will be a per-partition contribution bounding stage. - // In this stage the algorithm will randomly chose one of these 3 contributions. + // In this stage the algorithm will arbitrarily chose one of these 3 contributions. // The mean should be equal to 50/50 = 1.0 (not 150/52 ≈ 2.88, if no per-partition contribution bounding is done). want: 1.0, }, @@ -1024,6 +1027,388 @@ func TestMeanPerKeyWithPartitionsTestModePerPartitionContributionBoundingAddsEmp } } +// Tests that QuantilesPerKey bounds cross-partitions contributions correctly, adds no +// noise and keeps all partitions in test mode. +func TestQuantilesPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) { + for _, tc := range []struct { + desc string + privacySpec *pbeam.PrivacySpec + maxContributionsPerPartition int64 + maxPartitionsContributed int64 + minValue float64 + maxValue float64 + want float64 + }{ + { + desc: "test mode with contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + maxContributionsPerPartition: 20, + maxPartitionsContributed: 1, + minValue: 0.0, + maxValue: 1.0, + // 10 distinct privacy IDs contribute 0.0 to partition 0 and another 10 distinct + // privacy IDs contribute 0.0 to partition 1. A single privacy ID (different from + // these 20 privacy IDs) then contributes 20 "1.0"s to both partition 0 and partition 1. + // MaxPartitionsContributed is 1, so contributions to only one of these partitions will + // be kept. The median (rank=0.50) of one of these partitions must then be 0.0 and the other + // 1.0. The sum of these medians must then equal 1.0 (as opposed to 2.0 if no contribution + // bounding takes place). This also ensures that no partitions get thresholded. + want: 1.0, + }, + { + desc: "test mode without contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + maxContributionsPerPartition: 20, + maxPartitionsContributed: 1, + minValue: 0.0, + maxValue: 1.0, + // 10 distinct privacy IDs contribute 0.0 to partition 0 and another 10 distinct + // privacy IDs contribute 0.0 to partition 1. A single privacy ID (different from + // these 20 privacy IDs) then contributes 20 "1.0"s to both partition 0 and partition 1. + // Cross-partition contribution bounding is disabled, so the median (rank=0.50) of both of + // these partitions must then be 1.0. The sum of these medians must then equal 2.0. + want: 2.0, + }, + } { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(10, 0, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(10, 10, 1, 0.0)) + for i := 0; i < 20; i++ { + triples = append(triples, testutils.TripleWithFloatValue{ID: 20, Partition: 0, Value: 1.0}) + triples = append(triples, testutils.TripleWithFloatValue{ID: 20, Partition: 1, Value: 1.0}) + } + + wantMetric := []testutils.TestFloat64Metric{ + {0, tc.want}, + } + + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + pcol := pbeam.MakePrivate(s, col, tc.privacySpec) + pcol = pbeam.ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := pbeam.QuantilesPerKey(s, pcol, pbeam.QuantilesParams{ + MaxContributionsPerPartition: tc.maxContributionsPerPartition, + MaxPartitionsContributed: tc.maxPartitionsContributed, + MinValue: tc.minValue, + MaxValue: tc.maxValue, + Ranks: []float64{0.50}, + NoiseKind: pbeam.LaplaceNoise{}}) + got = beam.ParDo(s, testutils.DereferenceFloat64Slice, got) + medians := beam.DropKey(s, got) + sumOverPartitions := stats.Sum(s, medians) + got = beam.AddFixedKey(s, sumOverPartitions) // Adds a fixed key of 0. + want = beam.ParDo(s, testutils.Float64MetricToKV, want) + // Tolerance is multiplied by 2 because we sum over 2 partitions. + tolerance := QuantilesTolerance(tc.minValue, tc.maxValue) * 2 + if err := testutils.ApproxEqualsKVFloat64(s, got, want, tolerance); err != nil { + t.Fatalf("ApproxEqualsKVFloat64: %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey: %s did not bound cross-partition contributions correctly: %v", tc.desc, err) + } + } +} + +// Tests that QuantilesPerKey with partitions bounds cross-partitions contributions correctly +// and adds no noise in test mode. +func TestQuantilesPerKeyWithPartitionsTestModeCrossPartitionContributionBounding(t *testing.T) { + for _, tc := range []struct { + desc string + privacySpec *pbeam.PrivacySpec + maxContributionsPerPartition int64 + maxPartitionsContributed int64 + minValue float64 + maxValue float64 + want float64 + }{ + { + desc: "test mode with contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + maxContributionsPerPartition: 20, + maxPartitionsContributed: 1, + minValue: 0.0, + maxValue: 1.0, + // 10 distinct privacy IDs contribute 0.0 to partition 0 and another 10 distinct + // privacy IDs contribute 0.0 to partition 1. A single privacy ID (different from + // these 20 privacy IDs) then contributes 20 "1.0"s to both partition 0 and partition 1. + // MaxPartitionsContributed is 1, so contributions to only one of these partitions will + // be kept. The median (rank=0.50) of one of these partitions must then be 0.0 and the other + // 1.0. The sum of these medians must then equal 1.0 (as opposed to 2.0 if no contribution + // bounding takes place). + want: 1.0, + }, + { + desc: "test mode without contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + maxContributionsPerPartition: 20, + maxPartitionsContributed: 1, + minValue: 0.0, + maxValue: 1.0, + // 10 distinct privacy IDs contribute 0.0 to partition 0 and another 10 distinct + // privacy IDs contribute 0.0 to partition 1. A single privacy ID (different from + // these 20 privacy IDs) then contributes 20 "1.0"s to both partition 0 and partition 1. + // Cross-partition contribution bounding is disabled, so the median (rank=0.50) of both of + // these partitions must then be 1.0. The sum of these medians must then equal 2.0. + want: 2.0, + }, + } { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(10, 0, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(10, 10, 1, 0.0)) + for i := 0; i < 20; i++ { + triples = append(triples, testutils.TripleWithFloatValue{ID: 200, Partition: 0, Value: 1.0}) + triples = append(triples, testutils.TripleWithFloatValue{ID: 200, Partition: 1, Value: 1.0}) + } + + wantMetric := []testutils.TestFloat64Metric{ + {0, tc.want}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + partitions := []int{0, 1} + publicPartitions := beam.CreateList(s, partitions) + + pcol := pbeam.MakePrivate(s, col, tc.privacySpec) + pcol = pbeam.ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := pbeam.QuantilesPerKey(s, pcol, pbeam.QuantilesParams{ + MaxContributionsPerPartition: tc.maxContributionsPerPartition, + MaxPartitionsContributed: tc.maxPartitionsContributed, + MinValue: tc.minValue, + MaxValue: tc.maxValue, + Ranks: []float64{0.50}, + PublicPartitions: publicPartitions, + NoiseKind: pbeam.LaplaceNoise{}}) + got = beam.ParDo(s, testutils.DereferenceFloat64Slice, got) + medians := beam.DropKey(s, got) + sumOverPartitions := stats.Sum(s, medians) + got = beam.AddFixedKey(s, sumOverPartitions) // Adds a fixed key of 0. + want = beam.ParDo(s, testutils.Float64MetricToKV, want) + // Tolerance is multiplied by 2 because we sum over 2 partitions. + tolerance := QuantilesTolerance(tc.minValue, tc.maxValue) * 2 + if err := testutils.ApproxEqualsKVFloat64(s, got, want, tolerance); err != nil { + t.Fatalf("ApproxEqualsKVFloat64: %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey: %s with partitions did not bound cross-partition contributions correctly: %v", tc.desc, err) + } + } +} + +// Tests that QuantilesPerKey bounds per-partition contributions correctly, adds no noise +// and keeps all partitions in test mode. +func TestQuantilesPerKeyTestModePerPartitionContributionBounding(t *testing.T) { + for _, tc := range []struct { + desc string + privacySpec *pbeam.PrivacySpec + maxContributionPerPartition int64 + minValue float64 + maxValue float64 + want float64 + }{ + { + desc: "test mode with contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + maxContributionPerPartition: 1, + minValue: 0.0, + maxValue: 1.0, + // First 50 privacy IDs contribute "0.0" 3 times to partition 0 and the next 50 privacy IDs + // contribute "1.0" to the same partition. + // There will be a per-partition contribution bounding stage. MaxContributionsPerPartition=1, so + // the algorithm will arbitrarily keep one of these 3 contributions for the first 50 privacy IDs. + // There will be equal number of "0.0"s and "1.0", so rank=0.6 should be equal to 1.0 (not 0.0, + // if no per-partition contribution bounding is done) + want: 1.0, + }, + { + desc: "test mode without contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + maxContributionPerPartition: 1, + minValue: 0.0, + maxValue: 1.0, + // First 50 privacy IDs contribute "0.0" 3 times to partition 0 and the next 50 privacy IDs + // contribute "1.0" to the same partition. + // There will not be a per-partition contribution bounding stage, meaning that there will be 150 + // "0.0"s and 50 "1.0"s. rank=0.6 should be equal to 0.0. + want: 0.0, + }, + } { + var triples []testutils.TripleWithFloatValue + // triples{privacy_id, partition_key, value} contains {0,0,0}, {0,0,0}, {0,0,0}, …, {49,0,0}, {49,0,0}, {49,0,0}, {50,0,1}, {51,0,1}, …, {99, 0, 1}. + triples = append(triples, testutils.MakeTripleWithFloatValue(50, 0, 0.0)...) + triples = append(triples, testutils.MakeTripleWithFloatValue(50, 0, 0.0)...) + triples = append(triples, testutils.MakeTripleWithFloatValue(50, 0, 0.0)...) + triples = append(triples, testutils.MakeTripleWithFloatValueStartingFromKey(50, 50, 0, 1.0)...) + wantMetric := []testutils.TestFloat64Metric{ + {0, tc.want}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + pcol := pbeam.MakePrivate(s, col, tc.privacySpec) + pcol = pbeam.ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := pbeam.QuantilesPerKey(s, pcol, pbeam.QuantilesParams{ + MaxContributionsPerPartition: tc.maxContributionPerPartition, + MaxPartitionsContributed: 1, + MinValue: tc.minValue, + MaxValue: tc.maxValue, + Ranks: []float64{0.6}, + NoiseKind: pbeam.LaplaceNoise{}}) + got = beam.ParDo(s, testutils.DereferenceFloat64Slice, got) + + want = beam.ParDo(s, testutils.Float64MetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64(s, got, want, QuantilesTolerance(tc.minValue, tc.maxValue)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64: %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey: %s did not do per-partition contribution bounding correctly: %v", tc.desc, err) + } + } +} + +// Tests that QuantilesPerKey with partition bounds per-partition contributions correctly, +// adds no noise and keeps all partitions in test mode. +func TestQuantilesPerKeyWithPartitionsTestModePerPartitionContributionBounding(t *testing.T) { + for _, tc := range []struct { + desc string + privacySpec *pbeam.PrivacySpec + maxContributionPerPartition int64 + minValue float64 + maxValue float64 + want float64 + }{ + { + desc: "test mode with contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + maxContributionPerPartition: 1, + minValue: 0.0, + maxValue: 1.0, + // First 50 privacy IDs contribute "0.0" 3 times to partition 0 and the next 50 privacy IDs + // contribute "1.0" to the same partition. + // There will be a per-partition contribution bounding stage. MaxContributionsPerPartition=1, so + // the algorithm will arbitrarily keep one of these 3 contributions for the first 50 privacy IDs. + // There will be equal number of "0.0"s and "1.0", so rank=0.6 should be equal to 1.0 (not 0.0, + // if no per-partition contribution bounding is done) + want: 1.0, + }, + { + desc: "test mode without contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + maxContributionPerPartition: 1, + minValue: 0.0, + maxValue: 1.0, + // First 50 privacy IDs contribute "0.0" 3 times to partition 0 and the next 50 privacy IDs + // contribute "1.0" to the same partition. + // There will not be a per-partition contribution bounding stage, meaning that there will be 150 + // "0.0"s and 50 "1.0"s. rank=0.6 should be equal to 0.0. + want: 0.0, + }, + } { + var triples []testutils.TripleWithFloatValue + // triples{privacy_id, partition_key, value} contains {0,0,0}, {0,0,0}, {0,0,0}, …, {49,0,0}, {49,0,0}, {49,0,0}, {50,0,1}, {51,0,1}, …, {99, 0, 1}. + triples = append(triples, testutils.MakeTripleWithFloatValue(50, 0, 0.0)...) + triples = append(triples, testutils.MakeTripleWithFloatValue(50, 0, 0.0)...) + triples = append(triples, testutils.MakeTripleWithFloatValue(50, 0, 0.0)...) + triples = append(triples, testutils.MakeTripleWithFloatValueStartingFromKey(50, 50, 0, 1.0)...) + wantMetric := []testutils.TestFloat64Metric{ + {0, tc.want}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + publicPartitions := beam.CreateList(s, []int{0}) + + pcol := pbeam.MakePrivate(s, col, tc.privacySpec) + pcol = pbeam.ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := pbeam.QuantilesPerKey(s, pcol, pbeam.QuantilesParams{ + MaxContributionsPerPartition: tc.maxContributionPerPartition, + MaxPartitionsContributed: 1, + MinValue: tc.minValue, + MaxValue: tc.maxValue, + Ranks: []float64{0.6}, + PublicPartitions: publicPartitions, + NoiseKind: pbeam.LaplaceNoise{}}) + got = beam.ParDo(s, testutils.DereferenceFloat64Slice, got) + + want = beam.ParDo(s, testutils.Float64MetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64(s, got, want, QuantilesTolerance(tc.minValue, tc.maxValue)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64: %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey: %s with partitions did not do per-partition contribution bounding correctly: %v", tc.desc, err) + } + } +} + +// Checks that QuantilesPerKey with partitions applies public partitions correctly in test mode. +func TestQuantilesPerKeyWithPartitionsAppliesPublicPartitions(t *testing.T) { + for _, tc := range []struct { + desc string + privacySpec *pbeam.PrivacySpec + }{ + { + desc: "test mode with contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + }, + { + desc: "test mode without contribution bounding", + privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + }, + } { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 1.0), + testutils.MakeTripleWithFloatValue(100, 0, 4.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 1, 1.0)) + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{1.0, 1.0, 4.0, 4.0}}, + // Partition 1 is not in the list of public partitions, so it will be dropped. + {2, []float64{0.5, 1.25, 3.75, 4.5}}, // Empty partition is linearly interpolated. + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + lower := 0.0 + upper := 5.0 + ranks := []float64{0.10, 0.25, 0.75, 0.90} + publicPartitions := beam.CreateList(s, []int{0, 2}) + + pcol := pbeam.MakePrivate(s, col, tc.privacySpec) + pcol = pbeam.ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := pbeam.QuantilesPerKey(s, pcol, pbeam.QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 2, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did apply public partitions correctly: %v", err) + } + } +} + +func TestQuantilesTolerance(t *testing.T) { + for _, tc := range []struct { + minValue float64 + maxValue float64 + wantTolerance float64 + }{ + {-5.0, 5.0, 0.00015258789}, + {0.0, 1000.0, 0.01525878906}, + } { + got := QuantilesTolerance(tc.minValue, tc.maxValue) + if !cmp.Equal(got, tc.wantTolerance, cmpopts.EquateApprox(0, 1e-9)) { // Allow for floating point arithmetic errors. + t.Errorf("QuantilesTolerance: with minValue=%f maxValue=%f got tolerance=%f, want=%f", tc.minValue, tc.maxValue, got, tc.wantTolerance) + } + } +} + // Tests that SelectPartitions bounds cross-partition contributions correctly and keeps // all partitions in test mode with PrivatePCollection inputs. func TestSelectPartitionsTestModeCrossPartitionContributionBoundingV(t *testing.T) { @@ -1044,7 +1429,7 @@ func TestSelectPartitionsTestModeCrossPartitionContributionBoundingV(t *testing. desc: "test mode without contribution bounding", privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), maxPartitionsContributed: 1, - // Contribution bounding is disabled, so all 10 partitions should be outputted. + // Cross-partition contribution bounding is disabled, so all 10 partitions should be outputted. want: 10, }, } { @@ -1086,7 +1471,7 @@ func TestSelectPartitionsTestModeCrossPartitionContributionBoundingKV(t *testing desc: "test mode without contribution bounding", privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), maxPartitionsContributed: 1, - // Contribution bounding is disabled, so all 10 partitions should be outputted. + // Cross-partition contribution bounding is disabled, so all 10 partitions should be outputted. want: 10, }, } { @@ -1133,8 +1518,9 @@ func TestDistinctPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), maxPartitionsContributed: 3, // The same privacy ID contributes once to 10 partitions, which implies that count of each - // partition is 3. Contribution bounding is disabled. The sum of all counts must then be 10. - // This also ensures that no partitions (each with a single privacy id) gets thresholded. + // partition is 3. Cross-partition contribution bounding is disabled. The sum of all counts + // must then be 10. This also ensures that no partitions (each with a single privacy id) + // gets thresholded. want: 10, }, } { diff --git a/privacy-on-beam/pbeam/quantiles.go b/privacy-on-beam/pbeam/quantiles.go new file mode 100644 index 00000000..e6ce9ed3 --- /dev/null +++ b/privacy-on-beam/pbeam/quantiles.go @@ -0,0 +1,398 @@ +// +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package pbeam + +import ( + "fmt" + "reflect" + + log "github.com/golang/glog" + "github.com/google/differential-privacy/go/checks" + "github.com/google/differential-privacy/go/dpagg" + "github.com/google/differential-privacy/go/noise" + "github.com/google/differential-privacy/privacy-on-beam/internal/kv" + "github.com/apache/beam/sdks/go/pkg/beam" +) + +func init() { + beam.RegisterType(reflect.TypeOf((*boundedQuantilesFn)(nil))) +} + +// QuantilesParams specifies the parameters associated with a Quantiles aggregation. +type QuantilesParams struct { + // Noise type (which is either LaplaceNoise{} or GaussianNoise{}). + // + // Defaults to LaplaceNoise{}. + NoiseKind NoiseKind + // Differential privacy budget consumed by this aggregation. If there is + // only one aggregation, both Epsilon and Delta can be left 0; in that + // case, the entire budget of the PrivacySpec is consumed. + Epsilon, Delta float64 + // The maximum number of distinct values that a given privacy identifier + // can influence. There is an inherent trade-off when choosing this + // parameter: a larger MaxPartitionsContributed leads to less data loss due + // to contribution bounding, but since the noise added in aggregations is + // scaled according to maxPartitionsContributed, it also means that more + // noise is added to each quantile. + // + // Required. + MaxPartitionsContributed int64 + // The maximum number of contributions from a given privacy identifier + // for each key. There is an inherent trade-off when choosing this + // parameter: a larger MaxContributionsPerPartition leads to less data loss due + // to contribution bounding, but since the noise added in aggregations is + // scaled according to maxContributionsPerPartition, it also means that more + // noise is added to each quantile. + // + // Required. + MaxContributionsPerPartition int64 + // The total contribution of a given privacy identifier to partition can be + // at at least MinValue, and at most MaxValue; otherwise it will be clamped + // to these bounds. For example, if a privacy identifier is associated with + // the key-value pairs [("a", -5), ("a", 2), ("b", 7), ("c", 3)] and the + // (MinValue, MaxValue) bounds are (0, 5), the contribution for "a" will be + // clamped up to 0, the contribution for "b" will be clamped down to 5, and + // the contribution for "c" will be untouched. There is an inherent + // trade-off when choosing MinValue and MaxValue: a small MinValue and a + // large MaxValue means that less records will be clamped, but that more + // noise will be added. + // + // Required. + MinValue, MaxValue float64 + // Percentile ranks that the quantiles should be computed for. Each rank must + // be between zero and one. The DP quantile operation returns a list of + // quantile values corresponding to the respective ranks. E.g., a percentile + // rank of 0.2 yields a quantile value that is greater than 20% and less than + // 80% of the values in the data set. + // + // Note that computing multiple quantiles does not consume extra privacy budget, + // i.e. computing multiple quantiles does not make each quantile less accurate + // for a fixed privacy budget. + Ranks []float64 + // You can input the list of partitions present in the output if you know + // them in advance. When you specify partitions, partition selection / + // thresholding will be disabled and partitions will appear in the output + // if and only if they appear in the set of public partitions. + // + // You should not derive the list of partitions non-privately from private + // data. You should only use this in either of the following cases: + // 1. The list of partitions is data-independent. For example, if you are + // aggregating a metric by hour, you could provide a list of all possible + // hourly period. + // 2. You use a differentially private operation to come up with the list of + // partitions. For example, you could use the keys of a DistinctPrivacyID + // operation as the list of public partitions. + // + // Note that current implementation limitations only allow up to millions of + // public partitions. + // + // Optional. + PublicPartitions beam.PCollection +} + +// QuantilesPerKey computes one or multiple quantiles of the values associated with each +// key in a PrivatePCollection, adding differentially private noise to the quantiles +// and doing pre-aggregation thresholding to remove partitions with a low number of +// distinct privacy identifiers. Client can also specify a PCollection of partitions. +// +// QuantilesPerKey transforms a PrivatePCollection into a PCollection. +// +// Note that due to the implementation details of the internal Quantiles algorithm, using pbeamtest +// with QuantilesPerKey has two caveats: +// +// 1. Even without DP noise, the output will be slightly noisy. You can use +// pbeamtest.QuantilesTolerance() to account for that noise. +// 2. It is not possible to not clamp input values when using +// pbeamtest.NewPrivacySpecNoNoiseWithoutContributionBounding(), so clamping to Min/MaxValue will +// still be applied. However, MaxContributionsPerPartition and MaxPartitionsContributed contribution +// bounding will be disabled. +func QuantilesPerKey(s beam.Scope, pcol PrivatePCollection, params QuantilesParams) beam.PCollection { + s = s.Scope("pbeam.QuantilesPerKey") + // Obtain & validate type information from the underlying PCollection. + idT, kvT := beam.ValidateKVType(pcol.col) + if kvT.Type() != reflect.TypeOf(kv.Pair{}) { + log.Exitf("QuantilesPerKey must be used on a PrivatePCollection of type , got type %v instead", kvT) + } + if pcol.codec == nil { + log.Exitf("QuantilesPerKey: no codec found for the input PrivatePCollection.") + } + + // Get privacy parameters. + spec := pcol.privacySpec + epsilon, delta, err := spec.consumeBudget(params.Epsilon, params.Delta) + if err != nil { + log.Exitf("Couldn't consume budget for Quantiles: %v", err) + } + var noiseKind noise.Kind + if params.NoiseKind == nil { + noiseKind = noise.LaplaceNoise + log.Infof("No NoiseKind specified, using Laplace Noise by default.") + } else { + noiseKind = params.NoiseKind.toNoiseKind() + } + err = checkQuantilesPerKeyParams(params, epsilon, delta, noiseKind) + if err != nil { + log.Exit(err) + } + + // Drop non-public partitions, if public partitions are specified. + if (params.PublicPartitions).IsValid() { + if pcol.codec.KType.T != (params.PublicPartitions).Type().Type() { + log.Exitf("Public partitions must be of type %v. Got type %v instead.", + pcol.codec.KType.T, (params.PublicPartitions).Type().Type()) + } + pcol.col = dropNonPublicPartitionsKVFn(s, params.PublicPartitions, pcol, pcol.codec.KType) + } + + // First, group together the privacy ID and the partition ID and do per-partition contribution bounding. + // Result is PCollection + encodeIDKFn := newEncodeIDKFn(idT, pcol.codec) + decoded := beam.ParDo(s, + encodeIDKFn, + pcol.col, + beam.TypeDefinition{Var: beam.VType, T: pcol.codec.VType.T}) + + maxContributionsPerPartition := getMaxContributionsPerPartition(params.MaxContributionsPerPartition) + // Don't do per-partition contribution bounding if in test mode without contribution bounding. + if spec.testMode != noNoiseWithoutContributionBounding { + decoded = boundContributions(s, decoded, maxContributionsPerPartition) + } + + // Convert value to float64. + // Result is PCollection. + _, valueT := beam.ValidateKVType(decoded) + convertFn, err := findConvertToFloat64Fn(valueT) + if err != nil { + log.Exit(err) + } + converted := beam.ParDo(s, convertFn, decoded) + + // Combine all values for into a slice. + // Result is PCollection. + combined := beam.CombinePerKey(s, + &expandValuesCombineFn{}, + converted) + + // Result is PCollection. + maxPartitionsContributed := getMaxPartitionsContributed(spec, params.MaxPartitionsContributed) + rekeyed := beam.ParDo(s, rekeyArrayFloat64Fn, combined) + // Second, do cross-partition contribution bounding if not in test mode without contribution bounding. + if spec.testMode != noNoiseWithoutContributionBounding { + rekeyed = boundContributions(s, rekeyed, maxPartitionsContributed) + } + + // Now that the cross-partition contribution bounding is done, remove the privacy keys and decode the values. + // Result is PCollection. + partialPairs := beam.DropKey(s, rekeyed) + partitionT := pcol.codec.KType.T + partialKV := beam.ParDo(s, + newDecodePairArrayFloat64Fn(partitionT), + partialPairs, + beam.TypeDefinition{Var: beam.XType, T: partitionT}) + // Add public partitions and return the aggregation output, if public partitions are specified. + if (params.PublicPartitions).IsValid() { + return addPublicPartitionsForQuantiles(s, epsilon, delta, maxPartitionsContributed, + params, noiseKind, partialKV, spec.testMode) + } + // Compute the quantiles for each partition. Result is PCollection. + quantiles := beam.CombinePerKey(s, + newBoundedQuantilesFn(epsilon, delta, maxPartitionsContributed, params.MaxContributionsPerPartition, params.MinValue, params.MaxValue, noiseKind, params.Ranks, false, spec.testMode, false), + partialKV) + // Finally, drop thresholded partitions. + return beam.ParDo(s, dropThresholdedPartitionsFloat64SliceFn, quantiles) +} + +func addPublicPartitionsForQuantiles(s beam.Scope, epsilon, delta float64, maxPartitionsContributed int64, params QuantilesParams, noiseKind noise.Kind, partialKV beam.PCollection, testMode testMode) beam.PCollection { + // Compute the quantiles for each partition with non-public partitions dropped. Result is PCollection. + quantiles := beam.CombinePerKey(s, + newBoundedQuantilesFn(epsilon, delta, maxPartitionsContributed, params.MaxContributionsPerPartition, params.MinValue, params.MaxValue, noiseKind, params.Ranks, true, testMode, false), + partialKV) + partitionT, _ := beam.ValidateKVType(quantiles) + quantilesPartitions := beam.DropValue(s, quantiles) + // Create map with partitions in the data as keys. + partitionMap := beam.Combine(s, newPartitionsMapFn(beam.EncodedType{partitionT.Type()}), quantilesPartitions) + partitionsCol := params.PublicPartitions + // Add value of empty array to each partition key in PublicPartitions. + publicPartitionsWithValues := beam.ParDo(s, addDummyValuesToPublicPartitionsFloat64SliceFn, partitionsCol) + // emptyPublicPartitions are the partitions that are public but not found in the data. + emptyPublicPartitions := beam.ParDo(s, newEmitPartitionsNotInTheDataFn(partitionT), publicPartitionsWithValues, beam.SideInput{Input: partitionMap}) + // Compute DP quantiles for empty public partitions. + emptyQuantiles := beam.CombinePerKey(s, + newBoundedQuantilesFn(epsilon, delta, maxPartitionsContributed, params.MaxContributionsPerPartition, params.MinValue, params.MaxValue, noiseKind, params.Ranks, true, testMode, true), + emptyPublicPartitions) + // Merge quantiles from data with quantiles from the empty public partitions. + allQuantiles := beam.Flatten(s, quantiles, emptyQuantiles) + return allQuantiles +} + +func checkQuantilesPerKeyParams(params QuantilesParams, epsilon, delta float64, noiseKind noise.Kind) error { + err := checks.CheckEpsilon("pbeam.QuantilesPerKey", epsilon) + if err != nil { + return err + } + if (params.PublicPartitions).IsValid() && noiseKind == noise.LaplaceNoise { + err = checks.CheckNoDelta("pbeam.QuantilesPerKey", delta) + } else { + err = checks.CheckDeltaStrict("pbeam.QuantilesPerKey", delta) + } + if err != nil { + return err + } + err = checks.CheckBoundsFloat64("pbeam.QuantilesPerKey", params.MinValue, params.MaxValue) + if err != nil { + return err + } + err = checks.CheckBoundsNotEqual("pbeam.QuantilesPerKey", params.MinValue, params.MaxValue) + if err != nil { + return err + } + if len(params.Ranks) == 0 { + return fmt.Errorf("QuantilesPerKey requires at least one rank to compute") + } + for _, rank := range params.Ranks { + if rank < 0.0 || rank > 1.0 { + return fmt.Errorf("rank %f must be >= 0 and <= 1", rank) + } + } + return checks.CheckMaxPartitionsContributed("pbeam.QuantilesPerKey", params.MaxPartitionsContributed) +} + +type boundedQuantilesAccum struct { + BQ *dpagg.BoundedQuantiles + SP *dpagg.PreAggSelectPartition + PublicPartitions bool +} + +// boundedQuantilesFn is a differentially private combineFn for computing quantiles of values. Do not +// initialize it yourself, use newBoundedQuantilesFn to create a boundedQuantilesFn instance. +type boundedQuantilesFn struct { + // Privacy spec parameters (set during initial construction). + NoiseEpsilon float64 + PartitionSelectionEpsilon float64 + NoiseDelta float64 + PartitionSelectionDelta float64 + MaxPartitionsContributed int64 + MaxContributionsPerPartition int64 + Lower float64 + Upper float64 + NoiseKind noise.Kind + noise noise.Noise // Set during Setup phase according to NoiseKind. + Ranks []float64 + PublicPartitions bool + TestMode testMode + EmptyPartitions bool // Set to true if this combineFn is for adding noise to empty public partitions. +} + +// newBoundedQuantilesFn returns a boundedQuantilesFn with the given budget and parameters. +func newBoundedQuantilesFn(epsilon, delta float64, maxPartitionsContributed, maxContributionsPerPartition int64, lower, upper float64, noiseKind noise.Kind, ranks []float64, publicPartitions bool, testMode testMode, emptyPartitions bool) *boundedQuantilesFn { + fn := &boundedQuantilesFn{ + MaxPartitionsContributed: maxPartitionsContributed, + MaxContributionsPerPartition: maxContributionsPerPartition, + Lower: lower, + Upper: upper, + NoiseKind: noiseKind, + Ranks: ranks, + PublicPartitions: publicPartitions, + TestMode: testMode, + EmptyPartitions: emptyPartitions, + } + if fn.PublicPartitions { + fn.NoiseEpsilon = epsilon + fn.NoiseDelta = delta + return fn + } + fn.NoiseEpsilon = epsilon / 2 + fn.PartitionSelectionEpsilon = epsilon - fn.NoiseEpsilon + switch noiseKind { + case noise.GaussianNoise: + fn.NoiseDelta = delta / 2 + case noise.LaplaceNoise: + fn.NoiseDelta = 0 + default: + // TODO: return error instead + log.Exitf("newBoundedQuantilesFn: unknown noise.Kind (%v) is specified. Please specify a valid noise.", noiseKind) + } + fn.PartitionSelectionDelta = delta - fn.NoiseDelta + return fn +} + +func (fn *boundedQuantilesFn) Setup() { + fn.noise = noise.ToNoise(fn.NoiseKind) + if fn.TestMode.isEnabled() { + fn.noise = noNoise{} + } +} + +func (fn *boundedQuantilesFn) CreateAccumulator() boundedQuantilesAccum { + accum := boundedQuantilesAccum{ + BQ: dpagg.NewBoundedQuantiles(&dpagg.BoundedQuantilesOptions{ + Epsilon: fn.NoiseEpsilon, + Delta: fn.NoiseDelta, + MaxPartitionsContributed: fn.MaxPartitionsContributed, + MaxContributionsPerPartition: fn.MaxContributionsPerPartition, + Lower: fn.Lower, + Upper: fn.Upper, + Noise: fn.noise, + }), PublicPartitions: fn.PublicPartitions} + if !fn.PublicPartitions { + accum.SP = dpagg.NewPreAggSelectPartition(&dpagg.PreAggSelectPartitionOptions{ + Epsilon: fn.PartitionSelectionEpsilon, + Delta: fn.PartitionSelectionDelta, + MaxPartitionsContributed: fn.MaxPartitionsContributed, + }) + } + return accum +} + +func (fn *boundedQuantilesFn) AddInput(a boundedQuantilesAccum, values []float64) boundedQuantilesAccum { + // We can have multiple values for each (privacy_key, partition_key) pair. + // We need to add each value to BoundedQuantiles as input but we need to add a single input + // for each privacy_key to SelectPartition. + for _, v := range values { + a.BQ.Add(v) + } + if !fn.PublicPartitions { + a.SP.Increment() + } + return a +} + +func (fn *boundedQuantilesFn) MergeAccumulators(a, b boundedQuantilesAccum) boundedQuantilesAccum { + a.BQ.Merge(b.BQ) + if !fn.PublicPartitions { + a.SP.Merge(b.SP) + } + return a +} + +func (fn *boundedQuantilesFn) ExtractOutput(a boundedQuantilesAccum) []float64 { + if fn.TestMode.isEnabled() { + a.BQ.Noise = noNoise{} + } + if fn.TestMode.isEnabled() || a.PublicPartitions || a.SP.ShouldKeepPartition() { + result := make([]float64, len(fn.Ranks)) + for i, rank := range fn.Ranks { + result[i] = a.BQ.Result(rank) + } + return result + } + return nil +} + +func (fn *boundedQuantilesFn) String() string { + return fmt.Sprintf("%#v", fn) +} diff --git a/privacy-on-beam/pbeam/quantiles_test.go b/privacy-on-beam/pbeam/quantiles_test.go new file mode 100644 index 00000000..4561d289 --- /dev/null +++ b/privacy-on-beam/pbeam/quantiles_test.go @@ -0,0 +1,865 @@ +// +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package pbeam + +import ( + "testing" + + "github.com/google/differential-privacy/go/noise" + "github.com/google/differential-privacy/privacy-on-beam/pbeam/testutils" + "github.com/apache/beam/sdks/go/pkg/beam" + "github.com/apache/beam/sdks/go/pkg/beam/testing/ptest" + "github.com/apache/beam/sdks/go/pkg/beam/transforms/stats" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" +) + +func TestNewBoundedQuantilesFn(t *testing.T) { + opts := []cmp.Option{ + cmpopts.EquateApprox(0, 1e-10), + cmpopts.IgnoreUnexported(boundedQuantilesFn{}), + } + for _, tc := range []struct { + desc string + noiseKind noise.Kind + want interface{} + }{ + {"Laplace noise kind", noise.LaplaceNoise, + &boundedQuantilesFn{ + NoiseEpsilon: 0.5, + PartitionSelectionEpsilon: 0.5, + NoiseDelta: 0, + PartitionSelectionDelta: 1e-5, + MaxPartitionsContributed: 17, + MaxContributionsPerPartition: 5, + Lower: 0, + Upper: 10, + Ranks: []float64{0.1, 0.5, 0.9}, + NoiseKind: noise.LaplaceNoise, + }}, + {"Gaussian noise kind", noise.GaussianNoise, + &boundedQuantilesFn{ + NoiseEpsilon: 0.5, + PartitionSelectionEpsilon: 0.5, + NoiseDelta: 5e-6, + PartitionSelectionDelta: 5e-6, + MaxPartitionsContributed: 17, + MaxContributionsPerPartition: 5, + Lower: 0, + Upper: 10, + Ranks: []float64{0.1, 0.5, 0.9}, + NoiseKind: noise.GaussianNoise, + }}, + } { + got := newBoundedQuantilesFn(1, 1e-5, 17, 5, 0, 10, tc.noiseKind, []float64{0.1, 0.5, 0.9}, false, disabled, false) + if diff := cmp.Diff(tc.want, got, opts...); diff != "" { + t.Errorf("newBoundedQuantilesFn: for %q (-want +got):\n%s", tc.desc, diff) + } + } +} + +func TestBoundedQuantilesFnSetup(t *testing.T) { + for _, tc := range []struct { + desc string + noiseKind noise.Kind + wantNoise interface{} + }{ + {"Laplace noise kind", noise.LaplaceNoise, noise.Laplace()}, + {"Gaussian noise kind", noise.GaussianNoise, noise.Gaussian()}} { + got := newBoundedQuantilesFn(1, 1e-5, 17, 5, 0, 10, tc.noiseKind, []float64{0.1, 0.5, 0.9}, false, disabled, false) + got.Setup() + if !cmp.Equal(tc.wantNoise, got.noise) { + t.Errorf("Setup: for %s got %v, want %v", tc.desc, got.noise, tc.wantNoise) + } + } +} + +func TestBoundedQuantilesFnAddInput(t *testing.T) { + // δ=10⁻²³, ε=1e100 and l0Sensitivity=1 gives a threshold of =2. + // Since ε=1e100, the noise is added with probability in the order of exp(-1e100). + maxContributionsPerPartition := int64(1) + maxPartitionsContributed := int64(1) + epsilon := 1e100 + delta := 1e-23 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.25, 0.75} + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + fn := newBoundedQuantilesFn(2*epsilon, delta, maxPartitionsContributed, maxContributionsPerPartition, lower, upper, noise.LaplaceNoise, ranks, false, disabled, false) + fn.Setup() + + accum := fn.CreateAccumulator() + for i := 0; i < 100; i++ { + fn.AddInput(accum, []float64{1.0}) + fn.AddInput(accum, []float64{4.0}) + } + + got := fn.ExtractOutput(accum) + tolerance := testutils.QuantilesTolerance(lower, upper) + want := []float64{1.0, 4.0} // Correspoding to ranks 0.25 and 0.75, respectively. + for i, rank := range ranks { + if !cmp.Equal(want[i], got[i], cmpopts.EquateApprox(0, tolerance)) { + t.Errorf("AddInput: for rank: %f values got: %f, want %f", rank, got[i], want[i]) + } + } +} + +func TestBoundedQuantilesFnMergeAccumulators(t *testing.T) { + // δ=10⁻²³, ε=1e100 and l0Sensitivity=1 gives a threshold of =2. + // Since ε=1e100, the noise is added with probability in the order of exp(-1e100). + maxContributionsPerPartition := int64(1) + maxPartitionsContributed := int64(1) + epsilon := 1e100 + delta := 1e-23 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.25, 0.75} + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + fn := newBoundedQuantilesFn(2*epsilon, delta, maxPartitionsContributed, maxContributionsPerPartition, lower, upper, noise.LaplaceNoise, ranks, false, disabled, false) + fn.Setup() + + accum1 := fn.CreateAccumulator() + accum2 := fn.CreateAccumulator() + for i := 0; i < 100; i++ { + fn.AddInput(accum1, []float64{1.0}) + fn.AddInput(accum2, []float64{4.0}) + } + fn.MergeAccumulators(accum1, accum2) + + got := fn.ExtractOutput(accum1) + tolerance := testutils.QuantilesTolerance(lower, upper) + want := []float64{1.0, 4.0} // Correspoding to ranks 0.25 and 0.75, respectively. + for i, rank := range ranks { + if !cmp.Equal(want[i], got[i], cmpopts.EquateApprox(0, tolerance)) { + t.Errorf("MergeAccumulators: for rank: %f values got: %f, want %f", rank, got[i], want[i]) + } + } +} + +func TestBoundedQuantilesFnExtractOutputReturnsNilForSmallPartitions(t *testing.T) { + for _, tc := range []struct { + desc string + inputSize int + datapointsPerPrivacyUnit int + }{ + // It's a special case for partition selection in which the algorithm should always eliminate the partition. + {"Empty input", 0, 0}, + {"Input with 1 privacy unit with 1 contribution", 1, 1}, + } { + // The choice of ε=1e100, δ=10⁻²³, and l0Sensitivity=1 gives a threshold of =2. + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + fn := newBoundedQuantilesFn(2*1e100, 1e-23, 1, 1, 0, 10, noise.LaplaceNoise, []float64{0.5}, false, disabled, false) + fn.Setup() + accum := fn.CreateAccumulator() + for i := 0; i < tc.inputSize; i++ { + values := make([]float64, tc.datapointsPerPrivacyUnit) + for i := 0; i < tc.datapointsPerPrivacyUnit; i++ { + values[i] = 1.0 + } + fn.AddInput(accum, values) + } + + got := fn.ExtractOutput(accum) + + // Should return nil output for small partitions. + if got != nil { + t.Errorf("ExtractOutput: for %s got: %f, want nil", tc.desc, got) + } + } +} + +func TestBoundedQuantilesFnWithPartitionsExtractOutputDoesNotReturnNilForSmallPartitions(t *testing.T) { + for _, tc := range []struct { + desc string + inputSize int + datapointsPerPrivacyUnit int + }{ + {"Empty input", 0, 0}, + {"Input with 1 privacy unit with 1 contribution", 1, 1}, + } { + fn := newBoundedQuantilesFn(1e100, 0, 1, 1, 0, 10, noise.LaplaceNoise, []float64{0.5}, true, disabled, false) + fn.Setup() + accum := fn.CreateAccumulator() + for i := 0; i < tc.inputSize; i++ { + values := make([]float64, tc.datapointsPerPrivacyUnit) + for i := 0; i < tc.datapointsPerPrivacyUnit; i++ { + values[i] = 1.0 + } + fn.AddInput(accum, values) + } + + got := fn.ExtractOutput(accum) + + // Should not return nil output for small partitions in the case of public partitions. + if got == nil { + t.Errorf("ExtractOutput for %s thresholded with public partitions when it shouldn't", tc.desc) + } + } +} + +// Checks that QuantilesPerKey adds noise to its output. +func TestQuantilesPerKeyAddsNoise(t *testing.T) { + for _, tc := range []struct { + name string + noiseKind NoiseKind + // Differential privacy params used + epsilon float64 + delta float64 + }{ + { + name: "Gaussian", + noiseKind: GaussianNoise{}, + epsilon: 0.1, // It is split in two: 0.05 for the noise and 0.05 for the partition selection. + delta: 2e-3, // It is split in two: 1e-3 for the noise and 1e-3 for the partition selection. + }, + { + name: "Laplace", + noiseKind: LaplaceNoise{}, + epsilon: 0.1, // It is split in two: 0.05 for the noise and 0.05 for the partition selection. + delta: 1e-3, + }, + } { + ranks := []float64{0.50} + // triples contains {1,0,0.5}, {2,0,1}, …, {200,0,100}. + var triples []testutils.TripleWithFloatValue + for i := 0; i < 200; i++ { + triples = append(triples, testutils.TripleWithFloatValue{ID: i, Partition: 0, Value: float32(i) / 2}) + } + // ε=0.1, δ=10⁻³ and l0Sensitivity=1 gives a threshold of 132. + // We have 200 privacy IDs, so we will keep the partition. + p, s, col := ptest.CreateList(triples) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + // Use twice epsilon & delta because we compute Quantiles twice. + pcol := MakePrivate(s, col, NewPrivacySpec(2*tc.epsilon, 2*tc.delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got1 := QuantilesPerKey(s, pcol, QuantilesParams{ + Epsilon: tc.epsilon, + Delta: tc.delta, + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: 0.0, + MaxValue: 2.0, + NoiseKind: tc.noiseKind, + Ranks: ranks, + }) + got2 := QuantilesPerKey(s, pcol, QuantilesParams{ + Epsilon: tc.epsilon, + Delta: tc.delta, + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: 0.0, + MaxValue: 2.0, + NoiseKind: tc.noiseKind, + Ranks: ranks, + }) + got1 = beam.ParDo(s, testutils.DereferenceFloat64Slice, got1) + got2 = beam.ParDo(s, testutils.DereferenceFloat64Slice, got2) + + if err := testutils.NotEqualsFloat64(s, got1, got2); err != nil { + t.Fatalf("NotEqualsFloat64: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey didn't add any noise with float inputs and %s Noise: %v", tc.name, err) + } + } +} + +// Checks that QuantilesPerKey with partitions adds noise to its output. +func TestQuantilesWithPartitionsPerKeyAddsNoise(t *testing.T) { + for _, tc := range []struct { + name string + noiseKind NoiseKind + // Differential privacy params used + epsilon float64 + delta float64 + }{ + { + name: "Gaussian", + noiseKind: GaussianNoise{}, + epsilon: 0.05, // It is split in two: 0.05 for the noise and 0.05 for the partition selection. + delta: 1e-5, + }, + { + name: "Laplace", + noiseKind: LaplaceNoise{}, + epsilon: 0.05, + delta: 0.0, // It is 0 because partitions are public and we are using Laplace noise. + }, + } { + ranks := []float64{0.50} + // triples contains {1,0,1}, {2,0,2}, …, {100,0,100}. + var triples []testutils.TripleWithFloatValue + for i := 0; i < 100; i++ { + triples = append(triples, testutils.TripleWithFloatValue{ID: i, Partition: 0, Value: float32(i)}) + } + p, s, col := ptest.CreateList(triples) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + publicPartitions := beam.CreateList(s, []int{0}) + + // Use twice epsilon & delta because we compute Quantiles twice. + pcol := MakePrivate(s, col, NewPrivacySpec(2*tc.epsilon, 2*tc.delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got1 := QuantilesPerKey(s, pcol, QuantilesParams{ + Epsilon: tc.epsilon, + Delta: tc.delta, + MaxPartitionsContributed: 100, + MaxContributionsPerPartition: 100, + MinValue: 0.0, + MaxValue: 100.0, + NoiseKind: tc.noiseKind, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + got2 := QuantilesPerKey(s, pcol, QuantilesParams{ + Epsilon: tc.epsilon, + Delta: tc.delta, + MaxPartitionsContributed: 100, + MaxContributionsPerPartition: 100, + MinValue: 0.0, + MaxValue: 100.0, + NoiseKind: tc.noiseKind, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + got1 = beam.ParDo(s, testutils.DereferenceFloat64Slice, got1) + got2 = beam.ParDo(s, testutils.DereferenceFloat64Slice, got2) + + if err := testutils.NotEqualsFloat64(s, got1, got2); err != nil { + t.Fatalf("NotEqualsFloat64: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey with partitions didn't add any noise with float inputs and %s Noise: %v", tc.name, err) + } + } +} + +// Checks that QuantilesPerKey returns a correct answer. +func TestQuantilesPerKeyNoNoise(t *testing.T) { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 1.0), + testutils.MakeTripleWithFloatValue(100, 0, 4.0)) + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{1.0, 1.0, 4.0, 4.0}}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + // ε=900, δ=10⁻²⁰⁰ and l0Sensitivity=1 gives a threshold of =2. + epsilon := 900.0 + delta := 1e-200 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.00, 0.25, 0.75, 1.00} + + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 2, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, testutils.QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not return approximate quantile: %v", err) + } +} + +// Checks that QuantilesPerKey with partitions returns a correct answer. +func TestQuantilesPerKeyWithPartitionsNoNoise(t *testing.T) { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 1.0), + testutils.MakeTripleWithFloatValue(100, 0, 4.0)) + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{1.0, 1.0, 4.0, 4.0}}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + epsilon := 900.0 + delta := 0.0 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.00, 0.25, 0.75, 1.00} + publicPartitions := beam.CreateList(s, []int{0}) + + pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 2, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, testutils.QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not return approximate quantile: %v", err) + } +} + +// Checks that QuantilesPerKey with partitions adds public partitions not found in +// the data and drops non-public partitions. +func TestQuantilesPerKeyWithPartitionsAppliesPublicPartitions(t *testing.T) { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(1, 0, 1.0), + testutils.MakeTripleWithFloatValueStartingFromKey(1, 1, 1, 1.0), + testutils.MakeTripleWithFloatValueStartingFromKey(2, 1, 2, 1.0)) + + p, s, col := ptest.CreateList(triples) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + epsilon := 900.0 + delta := 0.0 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.00, 0.25, 0.75, 1.00} + publicPartitions := beam.CreateList(s, []int{2, 3, 4, 5}) + + pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 2, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + got = beam.DropKey(s, got) // We are only interested in the number of partitions kept + + // There are 4 public partitions, so we expect 4 partitions in the output. + // If we didn't drop non-public partitions (partitions "0" and "1"), we would have + // 6 (if we still added empty public partitions) or 3 (if we also didn't add empty + // public partitions) partitions in the output. + // Similarly, if we didn't add empty public partitions (partitions "3", "4", "5"), + // we would have 1 (if we still dropped non-public partitions) or 3 (if we also + // didn't drop non-public partitions) partitions in the output. + wantPartitions := 4 + testutils.CheckNumPartitions(s, got, wantPartitions) + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey with partitions did not apply public partitions: %v", err) + } +} + +var quantilesPartitionSelectionTestCases = []struct { + name string + noiseKind NoiseKind + epsilon float64 + delta float64 + numPartitions int + entriesPerPartition int +}{ + { + name: "Gaussian", + noiseKind: GaussianNoise{}, + // After splitting the (ε, δ) budget between the noise and partition + // selection portions of the privacy algorithm, this results in a ε=1, + // δ=0.3 partition selection budget. + epsilon: 2, + delta: 0.6, + // entriesPerPartition=1 yields a 30% chance of emitting any particular partition + // (since δ_emit=0.3). + entriesPerPartition: 1, + // 143 distinct partitions implies that some (but not all) partitions are + // emitted with high probability (at least 1 - 1e-20). + numPartitions: 143, + }, + { + name: "Laplace", + noiseKind: LaplaceNoise{}, + // After splitting the (ε, δ) budget between the noise and partition + // selection portions of the privacy algorithm, this results in the + // partition selection portion of the budget being ε_selectPartition=1, + // δ_selectPartition=0.3. + epsilon: 2, + delta: 0.3, + // entriesPerPartition=1 yields a 30% chance of emitting any particular partition + // (since δ_emit=0.3). + entriesPerPartition: 1, + numPartitions: 143, + }, +} + +// Checks that QuantilesPerKey applies partition selection. +func TestQuantilesPartitionSelection(t *testing.T) { + for _, tc := range quantilesPartitionSelectionTestCases { + t.Run(tc.name, func(t *testing.T) { + // Sanity check that the entriesPerPartition is sensical. + if tc.entriesPerPartition <= 0 { + t.Fatalf("Invalid test case: entriesPerPartition must be positive. Got: %d", tc.entriesPerPartition) + } + + // Build up {ID, Partition, Value} pairs such that for each of the tc.numPartitions partitions, + // tc.entriesPerPartition privacy units contribute a single value: + // {0, 0, 1}, {1, 0, 1}, …, {entriesPerPartition-1, 0, 1} + // {entriesPerPartition, 1, 1}, {entriesPerPartition+1, 1, 1}, …, {entriesPerPartition+entriesPerPartition-1, 1, 1} + // … + // {entriesPerPartition*(numPartitions-1), numPartitions-1, 1}, …, {entriesPerPartition*numPartitions-1, numPartitions-1, 1} + var ( + triples []testutils.TripleWithFloatValue + kOffset = 0 + ) + for i := 0; i < tc.numPartitions; i++ { + for j := 0; j < tc.entriesPerPartition; j++ { + triples = append(triples, testutils.TripleWithFloatValue{ID: kOffset + j, Partition: i, Value: 1.0}) + } + kOffset += tc.entriesPerPartition + } + p, s, col := ptest.CreateList(triples) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + // Run QuantilesPerKey on triples + ranks := []float64{0.00, 0.25, 0.75, 1.00} + pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MinValue: 0.0, + MaxValue: 1.0, + MaxContributionsPerPartition: int64(tc.entriesPerPartition), + MaxPartitionsContributed: 1, + NoiseKind: tc.noiseKind, + Ranks: ranks, + }) + got = beam.ParDo(s, testutils.KVToFloat64SliceMetric, got) + + // Validate that partition selection is applied (i.e., some emitted and some dropped). + testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions) + if err := ptest.Run(p); err != nil { + t.Errorf("%v", err) + } + }) + } +} + +// Checks that QuantilePerKey does cross-partition contribution bounding correctly. +func TestQuantilesPerKeyCrossPartitionContributionBounding(t *testing.T) { + // 100 distinct privacy IDs contribute 0.0 to partition 0 and another 100 distinct privacy + // IDs contribute 0.0 to partition 1. Then, another 100 privacy IDs (different from + // these 200 privacy IDs) contributes "1.0"s to both partition 0 and partition 1. + // MaxPartitionsContributed is 1, so a total of 100 "1.0" contributions will be kept across + // both partitions. Depending on how contributions are kept, rank=0.60 of both partitions is + // either both 0.0 or one is 1.0 and the other 0.0. Either way, the sum of rank=0.60 of both + // partitions should be less than or equal to 1.0. (as opposed to 2.0 if no contribution bounding + // takes place, since rank=0.60 will be 1.0 for both partitions in that case). + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 1, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(200, 100, 0, 1.0), + testutils.MakeTripleWithFloatValueStartingFromKey(200, 100, 1, 1.0), + ) + lower := 0.0 + upper := 5.0 + wantMetric := []testutils.TestFloat64Metric{ + {0, 1.0 + testutils.QuantilesTolerance(lower, upper)}, // To account for noise. + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + // ε=900, δ=10⁻²⁰⁰ and l0Sensitivity=1 gives a threshold of =2. + epsilon := 900.0 + delta := 1e-200 + ranks := []float64{0.60} + + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + }) + got = beam.ParDo(s, testutils.DereferenceFloat64Slice, got) + maxs := beam.DropKey(s, got) + maxOverPartitions := stats.Sum(s, maxs) + got = beam.AddFixedKey(s, maxOverPartitions) // Adds a fixed key of 0. + + want = beam.ParDo(s, testutils.Float64MetricToKV, want) + if err := testutils.LessThanOrEqualToKVFloat64(s, got, want); err != nil { + t.Fatalf("LessThanOrEqualToKVFloat64: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not bound cross-partition contributions correctly: %v", err) + } +} + +// Checks that QuantilePerKey with partitions does cross-partition contribution bounding correctly. +func TestQuantilesPerKeyWithPartitionsCrossPartitionContributionBounding(t *testing.T) { + // 100 distinct privacy IDs contribute 0.0 to partition 0 and another 100 distinct privacy + // IDs contribute 0.0 to partition 1. Then, another 100 privacy IDs (different from + // these 200 privacy IDs) contributes "1.0"s to both partition 0 and partition 1. + // MaxPartitionsContributed is 1, so a total of 100 "1.0" contributions will be kept across + // both partitions. Depending on how contributions are kept, rank=0.60 of both partitions is + // either both 0.0 or one is 1.0 and the other 0.0. Either way, the sum of rank=0.60 of both + // partitions should be less than or equal to 1.0. (as opposed to 2.0 if no contribution bounding + // takes place, since rank=0.60 will be 1.0 for both partitions in that case). + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 1, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(200, 100, 0, 1.0), + testutils.MakeTripleWithFloatValueStartingFromKey(200, 100, 1, 1.0), + ) + lower := 0.0 + upper := 5.0 + wantMetric := []testutils.TestFloat64Metric{ + {0, 1.0 + testutils.QuantilesTolerance(lower, upper)}, // To account for noise. + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + epsilon := 900.0 + delta := 0.0 + + ranks := []float64{0.60} + publicPartitions := beam.CreateList(s, []int{0, 1}) + + pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + got = beam.ParDo(s, testutils.DereferenceFloat64Slice, got) + maxs := beam.DropKey(s, got) + maxOverPartitions := stats.Sum(s, maxs) + got = beam.AddFixedKey(s, maxOverPartitions) // Adds a fixed key of 0. + + want = beam.ParDo(s, testutils.Float64MetricToKV, want) + if err := testutils.LessThanOrEqualToKVFloat64(s, got, want); err != nil { + t.Fatalf("LessThanOrEqualToKVFloat64: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey with partitions did not bound cross-partition contributions correctly: %v", err) + } +} + +// Checks that QuantilePerKey does per-partition contribution bounding correctly. +func TestQuantilesPerKeyPerPartitionContributionBounding(t *testing.T) { + // 100 distinct privacy IDs contribute 0.0 and another 100 distinct privacy IDs + // contribute 1.0 twice. MaxPartitionsContributed is 1, so only half of these + // contributions will be kept and we expect equal number of 0.0's and 1.0s. + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 0, 1.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 0, 1.0)) + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{0.0, 1.0}}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + // ε=900, δ=10⁻²⁰⁰ and l0Sensitivity=1 gives a threshold of =2. + epsilon := 900.0 + delta := 1e-200 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.49, 0.51} + + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, testutils.QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not bound cross-partition contributions correctly: %v", err) + } +} + +// Checks that QuantilePerKey with partitions does per-partition contribution bounding correctly. +func TestQuantilesPerKeyWithPartitionsPerPartitionContributionBounding(t *testing.T) { + // 100 distinct privacy IDs contribute 0.0 and another 100 distinct privacy IDs + // contribute 1.0 twice. MaxPartitionsContributed is 1, so only half of these + // contributions will be kept and we expect equal number of 0.0's and 1.0s. + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, 0.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 0, 1.0), + testutils.MakeTripleWithFloatValueStartingFromKey(100, 100, 0, 1.0)) + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{0.0, 1.0}}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + epsilon := 900.0 + delta := 0.0 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.49, 0.51} + publicPartitions := beam.CreateList(s, []int{0}) + + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 1, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, testutils.QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not bound cross-partition contributions correctly: %v", err) + } +} + +// Checks that QuantilesPerKey clamps input values to MinValue and MaxValue. +func TestQuantilesPerKeyAppliesClamping(t *testing.T) { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, -5.0), // Will be clamped to 0.0 + testutils.MakeTripleWithFloatValue(100, 0, 10.0)) // Will be clamped to 5.0 + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{0.0, 0.0, 5.0, 5.0}}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + // ε=900, δ=10⁻²⁰⁰ and l0Sensitivity=1 gives a threshold of =2. + epsilon := 900.0 + delta := 1e-200 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.00, 0.25, 0.75, 1.00} + + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 2, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, testutils.QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not clamp input values: %v", err) + } +} + +// Checks that QuantilesPerKey with partitions clamps input values to MinValue and MaxValue. +func TestQuantilesPerKeyWithPartitionsAppliesClamping(t *testing.T) { + triples := testutils.ConcatenateTriplesWithFloatValue( + testutils.MakeTripleWithFloatValue(100, 0, -5.0), // Will be clamped to 0.0 + testutils.MakeTripleWithFloatValue(100, 0, 10.0)) // Will be clamped to 5.0 + + wantMetric := []testutils.TestFloat64SliceMetric{ + {0, []float64{0.0, 0.0, 5.0, 5.0}}, + } + p, s, col, want := ptest.CreateList2(triples, wantMetric) + col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) + + epsilon := 900.0 + delta := 0.0 + lower := 0.0 + upper := 5.0 + ranks := []float64{0.00, 0.25, 0.75, 1.00} + publicPartitions := beam.CreateList(s, []int{0}) + + // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. + pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) + got := QuantilesPerKey(s, pcol, QuantilesParams{ + MaxPartitionsContributed: 1, + MaxContributionsPerPartition: 2, + MinValue: lower, + MaxValue: upper, + Ranks: ranks, + PublicPartitions: publicPartitions, + }) + + want = beam.ParDo(s, testutils.Float64SliceMetricToKV, want) + if err := testutils.ApproxEqualsKVFloat64Slice(s, got, want, testutils.QuantilesTolerance(lower, upper)); err != nil { + t.Fatalf("ApproxEqualsKVFloat64Slice: got error %v", err) + } + if err := ptest.Run(p); err != nil { + t.Errorf("QuantilesPerKey did not clamp input values: %v", err) + } +} + +func TestCheckQuantilesPerKeyParams(t *testing.T) { + _, _, publicPartitions := ptest.CreateList([]int{0, 1}) + for _, tc := range []struct { + desc string + epsilon float64 + delta float64 + noiseKind noise.Kind + params QuantilesParams + wantErr bool + }{ + {"valid parameters", 1.0, 1e-5, noise.LaplaceNoise, QuantilesParams{MinValue: -5.0, MaxValue: 5.0, Ranks: []float64{0.5}}, false}, + {"negative epsilon", -1.0, 1e-5, noise.LaplaceNoise, QuantilesParams{MinValue: -5.0, MaxValue: 5.0, Ranks: []float64{0.5}}, true}, + {"zero delta", 1.0, 0.0, noise.LaplaceNoise, QuantilesParams{MinValue: -5.0, MaxValue: 5.0, Ranks: []float64{0.5}}, true}, + {"MaxValue < MinValue", 1.0, 1e-5, noise.LaplaceNoise, QuantilesParams{MinValue: 6.0, MaxValue: 5.0, Ranks: []float64{0.5}}, true}, + {"MaxValue = MinValue", 1.0, 1e-5, noise.LaplaceNoise, QuantilesParams{MinValue: 5.0, MaxValue: 5.0, Ranks: []float64{0.5}}, true}, + {"No ranks", 1.0, 1e-5, noise.LaplaceNoise, QuantilesParams{MinValue: -5.0, MaxValue: 5.0}, true}, + {"Out of bound (<0.0 || >1.0) ranks", 1.0, 1e-5, noise.LaplaceNoise, QuantilesParams{MinValue: -5.0, MaxValue: 5.0, Ranks: []float64{0.3, 1.5}}, true}, + {"public partitions and non-zero delta with Laplace", + 1.0, + 1e-5, + noise.LaplaceNoise, + QuantilesParams{MinValue: -5.0, MaxValue: 5.0, Ranks: []float64{0.5}, PublicPartitions: publicPartitions}, + true}, + } { + got := checkQuantilesPerKeyParams(tc.params, tc.epsilon, tc.delta, tc.noiseKind) + if (got != nil) != tc.wantErr { + t.Errorf("checkQuantilesPerKeyParams: %s got=%v, wantErr=%t", tc.desc, got, tc.wantErr) + } + } +} diff --git a/privacy-on-beam/pbeam/sum_test.go b/privacy-on-beam/pbeam/sum_test.go index c44d463d..7d1fa7fb 100644 --- a/privacy-on-beam/pbeam/sum_test.go +++ b/privacy-on-beam/pbeam/sum_test.go @@ -748,7 +748,7 @@ func TestSumPerKeyPerPartitionContributionBoundingFloat(t *testing.T) { } } -var sumPartitionSelectionNonDeterministicTestCases = []struct { +var sumPartitionSelectionTestCases = []struct { name string noiseKind NoiseKind epsilon float64 @@ -787,9 +787,9 @@ var sumPartitionSelectionNonDeterministicTestCases = []struct { }, } -// Checks that SumPerKey is performing a random partition selection. -func TestSumPartitionSelectionNonDeterministicInt(t *testing.T) { - for _, tc := range sumPartitionSelectionNonDeterministicTestCases { +// Checks that SumPerKey applies partition selection for int input values. +func TestSumPartitionSelectionInt(t *testing.T) { + for _, tc := range sumPartitionSelectionTestCases { t.Run(tc.name, func(t *testing.T) { // Sanity check that the entriesPerPartition is sensical. if tc.entriesPerPartition <= 0 { @@ -821,7 +821,7 @@ func TestSumPartitionSelectionNonDeterministicInt(t *testing.T) { got := SumPerKey(s, pcol, SumParams{MinValue: 0, MaxValue: 1, NoiseKind: tc.noiseKind, MaxPartitionsContributed: 1}) got = beam.ParDo(s, testutils.KVToInt64Metric, got) - // Validate that partitions are selected randomly (i.e., some emitted and some dropped). + // Validate that partition selection is applied (i.e., some emitted and some dropped). testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions) if err := ptest.Run(p); err != nil { t.Errorf("%v", err) @@ -830,9 +830,9 @@ func TestSumPartitionSelectionNonDeterministicInt(t *testing.T) { } } -// Checks that SumPerKey is performing a random partition selection. -func TestSumPartitionSelectionNonDeterministicFloat(t *testing.T) { - for _, tc := range sumPartitionSelectionNonDeterministicTestCases { +// Checks that SumPerKey applies partition selection for float input values. +func TestSumPartitionSelectionFloat(t *testing.T) { + for _, tc := range sumPartitionSelectionTestCases { t.Run(tc.name, func(t *testing.T) { // Sanity check that the entriesPerPartition is sensical. if tc.entriesPerPartition <= 0 { @@ -864,7 +864,7 @@ func TestSumPartitionSelectionNonDeterministicFloat(t *testing.T) { got := SumPerKey(s, pcol, SumParams{MinValue: 0.0, MaxValue: 1.0, MaxPartitionsContributed: 1, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToFloat64Metric, got) - // Validate that partitions are selected randomly (i.e., some emitted and some dropped). + // Validate that partition selection is applied (i.e., some emitted and some dropped). testutils.CheckSomePartitionsAreDropped(s, got, tc.numPartitions) if err := ptest.Run(p); err != nil { t.Errorf("%v", err) diff --git a/privacy-on-beam/pbeam/testutils/testutils.go b/privacy-on-beam/pbeam/testutils/testutils.go index 1c278ecd..5f898510 100644 --- a/privacy-on-beam/pbeam/testutils/testutils.go +++ b/privacy-on-beam/pbeam/testutils/testutils.go @@ -40,9 +40,11 @@ func init() { beam.RegisterType(reflect.TypeOf(PairICodedKV{})) beam.RegisterType(reflect.TypeOf(TestInt64Metric{})) beam.RegisterType(reflect.TypeOf(TestFloat64Metric{})) + beam.RegisterType(reflect.TypeOf(TestFloat64SliceMetric{})) beam.RegisterType(reflect.TypeOf((*diffInt64Fn)(nil))) beam.RegisterType(reflect.TypeOf((*diffFloat64Fn)(nil))) + beam.RegisterType(reflect.TypeOf((*diffFloat64SliceFn)(nil))) beam.RegisterFunction(CheckNoNegativeValuesInt64Fn) beam.RegisterFunction(CheckNoNegativeValuesFloat64Fn) @@ -297,6 +299,22 @@ func Float64MetricToInt64Metric(tm TestFloat64Metric) TestInt64Metric { return TestInt64Metric{tm.Value, int64(tm.Metric)} } +// TestFloat64SliceMetric holds a Value and an associated []float64 metric (aggregation). +type TestFloat64SliceMetric struct { + Value int + Metric []float64 +} + +// Float64SliceMetricToKV transforms a TestFloat64SliceMetric into an (int, []float64) key-value pair. +func Float64SliceMetricToKV(tm TestFloat64SliceMetric) (int, []float64) { + return tm.Value, tm.Metric +} + +// KVToFloat64SliceMetric transforms an (int, []float64) key-value pair into a TestFloat64SliceMetric. +func KVToFloat64SliceMetric(v int, m []float64) TestFloat64SliceMetric { + return TestFloat64SliceMetric{v, m} +} + // EqualsKVInt checks that two PCollections col1 and col2 of type // are exactly equal. func EqualsKVInt(s beam.Scope, col1, col2 beam.PCollection) error { @@ -327,6 +345,24 @@ func EqualsKVFloat64(s beam.Scope, col1, col2 beam.PCollection) error { return ApproxEqualsKVFloat64(s, col1, col2, 0.0) } +// NotEqualsFloat64 checks that two PCollections col1 and col2 of type +// are different. Each key can only hold a single value. +func NotEqualsFloat64(s beam.Scope, col1, col2 beam.PCollection) error { + wantV := reflect.TypeOf(float64(0)) + if err := checkValueType(col1, wantV); err != nil { + return fmt.Errorf("unexpected value type for col1: %v", err) + } + if err := checkValueType(col2, wantV); err != nil { + return fmt.Errorf("unexpected value type for col2: %v", err) + } + + coGroupToValue := beam.CoGroupByKey(s, col1, col2) + diffs := beam.ParDo(s, &diffFloat64Fn{Tolerance: 0.0}, coGroupToValue) + combinedDiff := beam.Combine(s, combineDiffs, diffs) + beam.ParDo0(s, reportEquals, combinedDiff) + return nil +} + // ApproxEqualsKVInt64 checks that two PCollections col1 and col2 of type // are approximately equal, where "approximately equal" means // "the keys are the same in both col1 and col2, and the value associated with @@ -369,6 +405,53 @@ func ApproxEqualsKVFloat64(s beam.Scope, col1, col2 beam.PCollection, tolerance return nil } +// LessThanOrEqualToKVFloat64 checks that for PCollections col1 and col2 of type +// , for each key k, value corresponding to col1 is less than or equal +// to the value corresponding in col2. Each key can only hold a single value. +func LessThanOrEqualToKVFloat64(s beam.Scope, col1, col2 beam.PCollection) error { + wantV := reflect.TypeOf(float64(0)) + if err := checkValueType(col1, wantV); err != nil { + return fmt.Errorf("unexpected value type for col1: %v", err) + } + if err := checkValueType(col2, wantV); err != nil { + return fmt.Errorf("unexpected value type for col2: %v", err) + } + + coGroupToValue := beam.CoGroupByKey(s, col1, col2) + diffs := beam.ParDo(s, lessThanOrEqualTo, coGroupToValue) + combinedDiff := beam.Combine(s, combineDiffs, diffs) + beam.ParDo0(s, reportGreaterThan, combinedDiff) + return nil +} + +// ApproxEqualsKVFloat64Slice checks that two PCollections col1 and col2 of type +// are approximately equal, where "approximately equal" means +// "the keys are the same in both col1 and col2, and each value in the slice +// associated with key k in col1 is within the specified tolerance of each value +// in the slice associated with k in col2". Each key can only hold a single slice. +func ApproxEqualsKVFloat64Slice(s beam.Scope, col1, col2 beam.PCollection, tolerance float64) error { + wantV := reflect.TypeOf([]float64{0.0}) + if err := checkValueType(col1, wantV); err != nil { + return fmt.Errorf("unexpected value type for col1: %v", err) + } + if err := checkValueType(col2, wantV); err != nil { + return fmt.Errorf("unexpected value type for col2: %v", err) + } + + coGroupToValue := beam.CoGroupByKey(s, col1, col2) + diffs := beam.ParDo(s, &diffFloat64SliceFn{Tolerance: tolerance}, coGroupToValue) + combinedDiff := beam.Combine(s, combineDiffs, diffs) + beam.ParDo0(s, reportDiffs, combinedDiff) + return nil +} + +func reportEquals(diffs string) error { + if diffs != "" { + return nil + } + return fmt.Errorf("collections are equal") +} + func reportDiffs(diffs string) error { if diffs != "" { return fmt.Errorf("collections are not approximately equal. Diff (-got, +want):\n%s", diffs) @@ -376,6 +459,13 @@ func reportDiffs(diffs string) error { return nil } +func reportGreaterThan(errors string) error { + if errors != "" { + return fmt.Errorf("col1 is not less than or equal to col2: %s", errors) + } + return nil +} + func combineDiffs(diff1, diff2 string) string { if diff2 == "" { return fmt.Sprintf("%s", diff1) @@ -390,8 +480,8 @@ type diffInt64Fn struct { // ProcessElement returns a diff between values associated with a key. It // returns an empty string if the values are approximately equal. func (fn *diffInt64Fn) ProcessElement(k int, v1Iter, v2Iter func(*int64) bool) string { - var v1 = toSliceInt64(v1Iter) - var v2 = toSliceInt64(v2Iter) + var v1 = int64PtrToSlice(v1Iter) + var v2 = int64PtrToSlice(v2Iter) if diff := cmp.Diff(v1, v2, cmpopts.EquateApprox(0, fn.Tolerance)); diff != "" { return fmt.Sprintf("For k=%d: diff=%s", k, diff) } @@ -401,15 +491,15 @@ func (fn *diffInt64Fn) ProcessElement(k int, v1Iter, v2Iter func(*int64) bool) s // ProcessElement returns a diff between values associated with a key. It // returns an empty string if the values are approximately equal. func diffIntFn(k beam.X, v1Iter, v2Iter func(*int) bool) string { - var v1 = toSliceInt(v1Iter) - var v2 = toSliceInt(v2Iter) + var v1 = intPtrToSlice(v1Iter) + var v2 = intPtrToSlice(v2Iter) if diff := cmp.Diff(v1, v2); diff != "" { return fmt.Sprintf("For k=%d: diff=%s", k, diff) } return "" } -func toSliceInt64(vIter func(*int64) bool) []float64 { +func int64PtrToSlice(vIter func(*int64) bool) []float64 { var vSlice []float64 var v int64 for vIter(&v) { @@ -418,7 +508,7 @@ func toSliceInt64(vIter func(*int64) bool) []float64 { return vSlice } -func toSliceInt(vIter func(*int) bool) []float64 { +func intPtrToSlice(vIter func(*int) bool) []float64 { var vSlice []float64 var v int for vIter(&v) { @@ -427,14 +517,43 @@ func toSliceInt(vIter func(*int) bool) []float64 { return vSlice } +func lessThanOrEqualTo(k int, v1Iter, v2Iter func(*float64) bool) string { + var v1 = float64PtrToSlice(v1Iter) + var v2 = float64PtrToSlice(v2Iter) + if len(v1) != 1 { + return fmt.Sprintf("For k=%d, col1 has %d values, it needs to have exactly 1 value", k, len(v1)) + } + if len(v2) != 1 { + return fmt.Sprintf("For k=%d, col2 has %d values, it needs to have exactly 1 value", k, len(v2)) + } + if v1[0] > v2[0] { + return fmt.Sprintf("For k=%d, v1=%f is greater than v2=%f", k, v1[0], v2[0]) + } + + return "" +} + type diffFloat64Fn struct { Tolerance float64 } func (fn *diffFloat64Fn) ProcessElement(k int, v1Iter, v2Iter func(*float64) bool) string { - var v1 = toSliceFloat64(v1Iter) - var v2 = toSliceFloat64(v2Iter) + var v1 = float64PtrToSlice(v1Iter) + var v2 = float64PtrToSlice(v2Iter) + if diff := cmp.Diff(v1, v2, cmpopts.EquateApprox(0, fn.Tolerance)); diff != "" { + return fmt.Sprintf("For k=%d: diff=%s", k, diff) + } + + return "" // No diff +} +type diffFloat64SliceFn struct { + Tolerance float64 +} + +func (fn *diffFloat64SliceFn) ProcessElement(k int, v1Iter, v2Iter func(*[]float64) bool) string { + var v1 = float64SlicePtrToSlice(v1Iter) + var v2 = float64SlicePtrToSlice(v2Iter) if diff := cmp.Diff(v1, v2, cmpopts.EquateApprox(0, fn.Tolerance)); diff != "" { return fmt.Sprintf("For k=%d: diff=%s", k, diff) } @@ -442,7 +561,7 @@ func (fn *diffFloat64Fn) ProcessElement(k int, v1Iter, v2Iter func(*float64) boo return "" // No diff } -func toSliceFloat64(vIter func(*float64) bool) []float64 { +func float64PtrToSlice(vIter func(*float64) bool) []float64 { var vSlice []float64 var v float64 for vIter(&v) { @@ -451,6 +570,12 @@ func toSliceFloat64(vIter func(*float64) bool) []float64 { return vSlice } +func float64SlicePtrToSlice(vIter func(*[]float64) bool) []float64 { + var vSlice []float64 + vIter(&vSlice) // We are expecting a single slice. + return vSlice +} + func checkValueType(col beam.PCollection, wantValueType reflect.Type) error { _, vFullType := beam.ValidateKVType(col) vType := vFullType.Type() @@ -683,6 +808,20 @@ func ToleranceForMean(lower, upper, exactNormalizedSum, exactCount, exactMean, c return math.Max(distFromMaxNoisyMean, distFromMinNoisyMean), nil } +// QuantilesTolerance returns tolerance to be used in approxEquals for tests +// for quantiles to pass with negligible flakiness. +// +// When no noise is added, the quantiles should return a value that differs from the true +// quantile by no more than the size of the buckets the range is partitioned into, i.e., +// (upper - lower) / (branchingFactor^treeHeight - 1). +// +// The tests don't disable noise, hence we multiply the tolerance by a reasonably small number, +// in this case 5, to account for the noise addition. +// TODO: Implement more accurate tolerance based on confidence intervals. +func QuantilesTolerance(lower, upper float64) float64 { + return 5 * (upper - lower) / (math.Pow(float64(dpagg.DefaultBranchingFactor), float64(dpagg.DefaultTreeHeight)) - 1.0) +} + func distanceBetween(a, b float64) float64 { return math.Abs(a - b) } @@ -832,3 +971,13 @@ func getNumPartitions(vIter func(*int) bool) (v int) { } return v } + +// DereferenceFloat64Slice returns the first and only element of the slice for +// each key in a PCollection. Returns an error if the slice +// does not contain exactly 1 element. +func DereferenceFloat64Slice(v beam.V, r []float64) (beam.V, float64, error) { + if len(r) != 1 { + return v, 0.0, fmt.Errorf("dereferenceFloat64: r=%v does not contain a single element", r) + } + return v, r[0], nil +} diff --git a/privacy-on-beam/privacy_on_beam_deps.bzl b/privacy-on-beam/privacy_on_beam_deps.bzl index e084e725..c27063dd 100644 --- a/privacy-on-beam/privacy_on_beam_deps.bzl +++ b/privacy-on-beam/privacy_on_beam_deps.bzl @@ -31,11 +31,23 @@ def privacy_on_beam_deps(): version = "v0.0.0-20190523083050-ea95bdfd59fc", ) + go_repository( + name = "com_github_ajstarks_svgo", + importpath = "github.com/ajstarks/svgo", + sum = "h1:wVe6/Ea46ZMeNkQjjBW6xcqyQA/j5e0D6GytH95g0gQ=", + version = "v0.0.0-20180226025133-644b8db467af", + ) go_repository( name = "com_github_apache_beam", importpath = "github.com/apache/beam", - sum = "h1:SsORSlJF1b7rY4//RIXz2XbExy9O28O/h35Arj2b1sU=", - version = "v2.22.0+incompatible", + sum = "h1:Y8Q9pZ9V8IKM8EDNOE314D6cJE0neJooK+MxBvCcs1M=", + version = "v2.25.0+incompatible", + ) + go_repository( + name = "com_github_boombuler_barcode", + importpath = "github.com/boombuler/barcode", + sum = "h1:s1TvRnXwL2xJRaccrdcBQMZxq6X7DvsMogtmJeHDdrc=", + version = "v1.0.0", ) go_repository( @@ -44,6 +56,12 @@ def privacy_on_beam_deps(): sum = "h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=", version = "v0.3.1", ) + go_repository( + name = "com_github_burntsushi_xgb", + importpath = "github.com/BurntSushi/xgb", + sum = "h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=", + version = "v0.0.0-20160522181843-27f122750802", + ) go_repository( name = "com_github_census_instrumentation_opencensus_proto", @@ -58,6 +76,12 @@ def privacy_on_beam_deps(): sum = "h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI=", version = "v0.3.4", ) + go_repository( + name = "com_github_davecgh_go_spew", + importpath = "github.com/davecgh/go-spew", + sum = "h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=", + version = "v1.1.0", + ) go_repository( name = "com_github_envoyproxy_go_control_plane", @@ -73,6 +97,37 @@ def privacy_on_beam_deps(): version = "v0.1.0", ) + go_repository( + name = "com_github_fogleman_gg", + importpath = "github.com/fogleman/gg", + sum = "h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8=", + version = "v1.3.0", + ) + go_repository( + name = "com_github_go_fonts_dejavu", + importpath = "github.com/go-fonts/dejavu", + sum = "h1:JSajPXURYqpr+Cu8U9bt8K+XcACIHWqWrvWCKyeFmVQ=", + version = "v0.1.0", + ) + go_repository( + name = "com_github_go_gl_glfw", + importpath = "github.com/go-gl/glfw", + sum = "h1:QbL/5oDUmRBzO9/Z7Seo6zf912W/a6Sr4Eu0G/3Jho0=", + version = "v0.0.0-20190409004039-e6da0acd62b1", + ) + go_repository( + name = "com_github_go_latex_latex", + importpath = "github.com/go-latex/latex", + sum = "h1:uroDDLmuCK5Pz5J/Ef5vCL6F0sJmAtZFTm0/cF027F4=", + version = "v0.0.0-20200518072620-0806b477ea35", + ) + + go_repository( + name = "com_github_golang_freetype", + importpath = "github.com/golang/freetype", + sum = "h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=", + version = "v0.0.0-20170609003504-e2365dfdc4a0", + ) go_repository( name = "com_github_golang_glog", importpath = "github.com/golang/glog", @@ -90,15 +145,40 @@ def privacy_on_beam_deps(): go_repository( name = "com_github_golang_protobuf", importpath = "github.com/golang/protobuf", - sum = "h1:ZFgWrT+bLgsYPirOnRfKLYJLvssAegOj/hgyMFdJZe0=", - version = "v1.4.1", + sum = "h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=", + version = "v1.5.0", ) go_repository( name = "com_github_google_go_cmp", importpath = "github.com/google/go-cmp", - sum = "h1:DUoICFE8khaC0A+LKwHMPBdiqXkiQJF3F4TKWtu0/5w=", - version = "v0.4.2-0.20200609072101-23a2b5646fe0", + sum = "h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=", + version = "v0.5.5", + ) + + go_repository( + name = "com_github_jung_kurt_gofpdf", + importpath = "github.com/jung-kurt/gofpdf", + sum = "h1:jgbatWHfRlPYiK85qgevsZTHviWXKwB1TTiKdz5PtRc=", + version = "v1.16.2", + ) + go_repository( + name = "com_github_phpdave11_gofpdi", + importpath = "github.com/phpdave11/gofpdi", + sum = "h1:k2oy4yhkQopCK+qW8KjCla0iU2RpDow+QUDmH9DDt44=", + version = "v1.0.7", + ) + go_repository( + name = "com_github_pkg_errors", + importpath = "github.com/pkg/errors", + sum = "h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=", + version = "v0.8.1", + ) + go_repository( + name = "com_github_pmezard_go_difflib", + importpath = "github.com/pmezard/go-difflib", + sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=", + version = "v1.0.0", ) go_repository( @@ -107,6 +187,18 @@ def privacy_on_beam_deps(): sum = "h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM=", version = "v0.0.0-20190812154241-14fe0d1b01d4", ) + go_repository( + name = "com_github_ruudk_golang_pdf417", + importpath = "github.com/ruudk/golang-pdf417", + sum = "h1:nlG4Wa5+minh3S9LVFtNoY+GVRiudA2e3EVfcCi3RCA=", + version = "v0.0.0-20181029194003-1af4ab5afa58", + ) + go_repository( + name = "com_github_stretchr_testify", + importpath = "github.com/stretchr/testify", + sum = "h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=", + version = "v1.2.2", + ) go_repository( name = "com_google_cloud_go", @@ -114,6 +206,25 @@ def privacy_on_beam_deps(): sum = "h1:e0WKqKTd5BnrG8aKH3J3h+QvEIQtSUcf2n5UZ5ZgLtQ=", version = "v0.26.0", ) + go_repository( + name = "com_shuralyov_dmitri_gpu_mtl", + importpath = "dmitri.shuralyov.com/gpu/mtl", + sum = "h1:VpgP7xuJadIUuKccphEpTJnWhS2jkQyMt6Y7pJCD7fY=", + version = "v0.0.0-20190408044501-666a987793e9", + ) + + go_repository( + name = "io_rsc_pdf", + importpath = "rsc.io/pdf", + sum = "h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4=", + version = "v0.1.1", + ) + go_repository( + name = "org_gioui", + importpath = "gioui.org", + sum = "h1:2Eb0izWETwD/QZKFwDyyiUlV9cjFXg6Dl/fMfG7EPWU=", + version = "v0.0.0-20200628203458-851255f7a67b", + ) go_repository( name = "org_golang_google_appengine", @@ -139,36 +250,54 @@ def privacy_on_beam_deps(): go_repository( name = "org_golang_google_protobuf", importpath = "google.golang.org/protobuf", - sum = "h1:uEMv1vd5zHshPT2daUB9Ef3zcKfeWHTpzckCcFkxNWM=", - version = "v1.24.1-0.20200612063355-beaa55256c57", + sum = "h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=", + version = "v1.26.0", ) go_repository( name = "org_golang_x_crypto", importpath = "golang.org/x/crypto", - sum = "h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=", - version = "v0.0.0-20190308221718-c2843e01d9a2", + sum = "h1:iMGN4xG0cnqj3t+zOM8wUB0BiPKHEwSxEZCvzcbZuvk=", + version = "v0.0.0-20190510104115-cbcb75029529", ) go_repository( name = "org_golang_x_exp", importpath = "golang.org/x/exp", - sum = "h1:c2HOrn5iMezYjSlGPncknSEr/8x5LELb/ilJbXi9DEA=", - version = "v0.0.0-20190121172915-509febef88a4", + sum = "h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=", + version = "v0.0.0-20191002040644-a1355ae1e2c3", ) + go_repository( + name = "org_golang_x_image", + importpath = "golang.org/x/image", + sum = "h1:1e2ufUJNM3lCHEY5jIgac/7UTjd6cgJNdatjPdFWf34=", + version = "v0.0.0-20200618115811-c13761719519", + ) go_repository( name = "org_golang_x_lint", importpath = "golang.org/x/lint", sum = "h1:XQyxROzUlZH+WIQwySDgnISgOivlhjIEwaQaJEJrrN0=", version = "v0.0.0-20190313153728-d0100b6bd8b3", ) + go_repository( + name = "org_golang_x_mobile", + importpath = "golang.org/x/mobile", + sum = "h1:4+4C/Iv2U4fMZBiMCc98MG1In4gJY5YRhtpDNeDeHWs=", + version = "v0.0.0-20190719004257-d2bd2a29d028", + ) + go_repository( + name = "org_golang_x_mod", + importpath = "golang.org/x/mod", + sum = "h1:sfUMP1Gu8qASkorDVjnMuvgJzwFbTZSeXFiGBYAVdl4=", + version = "v0.1.0", + ) go_repository( name = "org_golang_x_net", importpath = "golang.org/x/net", - sum = "h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=", - version = "v0.0.0-20190311183353-d8887717615a", + sum = "h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=", + version = "v0.0.0-20190620200207-3b0461eec859", ) go_repository( @@ -188,8 +317,8 @@ def privacy_on_beam_deps(): go_repository( name = "org_golang_x_sys", importpath = "golang.org/x/sys", - sum = "h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=", - version = "v0.0.0-20190215142949-d0b11bdaac8a", + sum = "h1:1/DFK4b7JH8DmkqhUk48onnSfrPzImPoVxuomtbT2nk=", + version = "v0.0.0-20200124204421-9fbb57f87de9", ) go_repository( @@ -202,8 +331,8 @@ def privacy_on_beam_deps(): go_repository( name = "org_golang_x_tools", importpath = "golang.org/x/tools", - sum = "h1:5Beo0mZN8dRzgrMMkDp0jc8YXQKx9DiJ2k1dkvGsn5A=", - version = "v0.0.0-20190524140312-2c0ae7006135", + sum = "h1:1xWUkZQQ9Z9UuZgNaIR6OQOE7rUFglXUUBZlO+dGg6I=", + version = "v0.0.0-20190927191325-030b2cf1153e", ) go_repository( @@ -213,58 +342,22 @@ def privacy_on_beam_deps(): version = "v0.0.0-20191204190536-9bdfabe68543", ) - go_repository( - name = "com_github_ajstarks_svgo", - importpath = "github.com/ajstarks/svgo", - sum = "h1:wVe6/Ea46ZMeNkQjjBW6xcqyQA/j5e0D6GytH95g0gQ=", - version = "v0.0.0-20180226025133-644b8db467af", - ) - - go_repository( - name = "com_github_fogleman_gg", - importpath = "github.com/fogleman/gg", - sum = "h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8=", - version = "v1.3.0", - ) - - go_repository( - name = "com_github_golang_freetype", - importpath = "github.com/golang/freetype", - sum = "h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=", - version = "v0.0.0-20170609003504-e2365dfdc4a0", - ) - - go_repository( - name = "com_github_jung_kurt_gofpdf", - importpath = "github.com/jung-kurt/gofpdf", - sum = "h1:PJr+ZMXIecYc1Ey2zucXdR73SMBtgjPgwa31099IMv0=", - version = "v1.0.3-0.20190309125859-24315acbbda5", - ) - - go_repository( - name = "io_rsc_pdf", - importpath = "rsc.io/pdf", - sum = "h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4=", - version = "v0.1.1", - ) - - go_repository( - name = "org_golang_x_image", - importpath = "golang.org/x/image", - sum = "h1:00VmoueYNlNz/aHIilyyQz/MHSqGoWJzpFv/HW8xpzI=", - version = "v0.0.0-20180708004352-c73c2afc3b81", - ) - go_repository( name = "org_gonum_v1_gonum", importpath = "gonum.org/v1/gonum", - sum = "h1:nYxTaCPaVoJbxx+vMVnsFb6kw5+6aJCx52m/lmM/Vog=", - version = "v0.0.0-20180816165407-929014505bf4", + sum = "h1:wGtP3yGpc5mCLOLeTeBdjeui9oZSz5De0eOjMLC/QuQ=", + version = "v0.8.1", + ) + go_repository( + name = "org_gonum_v1_netlib", + importpath = "gonum.org/v1/netlib", + sum = "h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=", + version = "v0.0.0-20190313105609-8cb42192e0e0", ) go_repository( name = "org_gonum_v1_plot", importpath = "gonum.org/v1/plot", - sum = "h1:Otpxyvra6Ie07ft50OX5BrCfS/BWEMvhsCUHwPEJmLI=", - version = "v0.7.0", + sum = "h1:1oWyfw7tIDDtKb+t+SbR9RFruMmNJlsKiZUolHdys2I=", + version = "v0.8.1", )