blob: a8fb57ce64084345a974725d4b66e2c6f07c366f [file] [log] [blame]
package com.google.android.rappor;
// BEGIN android-changed: Removed guava dependency
// import static com.google.common.base.Preconditions.checkArgument;
//
// import com.google.common.base.Verify;
// END android-changed
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.BitSet;
// BEGIN android-changed
import java.util.Random;
// END android-changed
// BEGIN android-changed: Remove javax
// import javax.annotation.Nullable;
// import javax.annotation.concurrent.GuardedBy;
// END android-changed
/**
* Encodes reports using the RAPPOR differentially-private encoding algorithm.
*
* The RAPPOR algorithm is described at go/rappor and presented in detail at go/rappor-writeup.
*
* @author bonawitz@google.com Keith Bonawitz
*/
// TODO(bonawitz): Make encoder and interface and make this a final class implementing it.
// We can't just make this final now because there exist tests that need to Mock it.
public class Encoder {
/**
* A non-decreasing version number.
*
* <p>The version number should increase any time the Encoder has a user-visible functional change
* to any of encoding algorithms or the interpretation of the input parameters.
*/
public static final long VERSION = 3;
/**
* Minimum length required for the user secret, in bytes.
*/
public static final int MIN_USER_SECRET_BYTES = HmacDrbg.ENTROPY_INPUT_SIZE_BYTES;
/**
* Maximum number of bits in the RAPPOR-encoded report.
*
* Must be less than HmacDrbg.MAX_BYTES_TOTAL;
*/
public static final int MAX_BITS = 4096;
/**
* Maximum number of Bloom filter hashes used for RAPPOR-encoded strings.
*
* <p>This is constrained in the current implementation by requiring 2 bytes from an MD5 value
* (which is 16 bytes long) for each Bloom filter hash.
*/
public static final int MAX_BLOOM_HASHES = 8;
/**
* Maximum number of cohorts supported.
*/
public static final int MAX_COHORTS = 128;
/**
* First (and only) byte of HMAC_DRBG personalization strings used to generate the cohort number.
*/
private static final byte HMAC_DRBG_TYPE_COHORT = 0x00;
/**
* First byte of HMAC_DRBG personalization strings used to generate the PRR response.
*/
private static final byte HMAC_DRBG_TYPE_PRR = 0x01;
/**
* A unique identifier for this Encoder, represented in UTF-8.
*
* <p>The (userSecret, encoderId) pair identify a the logical memoization table used for RAPPOR's
* Permanent Randomized Response stage. Therefore, for any userSecret, each Encoder must have a
* distinct identifier for Permanent Randomized Response to be effective.
*
* <p>In practice, "memoization" is achieved by generating deterministic pseudo-random bits using
* HMAC_DRBG. encoderIdBytes is used as part of the personalization string.
*/
private final byte[] encoderIdBytes;
/**
* The RAPPOR "f" probability, on the range [0.0, 1.0].
*
* <p>This it the probability of replacing a bit from the input with a uniform random bit when
* generating the permanent randomized response.
*
* <p>Setting probabilityF to 0 disables the PRR phase of RAPPOR.
*/
private final double probabilityF;
/**
* The RAPPOR "p" probability, on the range [0.0, 1.0].
*
* <p>This is the probability of emitting a '1' bit in the instantaneous randomized response,
* given that the corresponding bit in the permanent randomized response is '0'.
*
* <p>Setting probabilityP to 0.0 and probabilityQ to 1.0 disables the IRR phase of RAPPOR.
*/
private final double probabilityP;
/**
* The RAPPOR "1" probability, on the range [0.0, 1.0].
*
* <p>This is the probability of emitting a 1 bit in the instantaneous randomized response, given
* that the corresponding bit in the permanent randomized response is 1.
*
* <p>Setting probabilityP to 0.0 and probabilityQ to 1.0 disables the IRR phase of RAPPOR.
*/
private final double probabilityQ;
/**
* The number of bits in the RAPPOR-encoded report.
*
* <p>Must satisfy 1 &lt;= numBits &lt;= MAX_BITS.
*
* <ul>
* <li>For encodeOrdinal, requires 0 &lt;= ordinal &lt; numBits.
* <li>For encodeString, uses a numBits-wide Bloom filter.
* <li>For encodeBits, only the low-order numBits may be non-zero.
* </ul>
*/
private final int numBits;
/**
* The number of hash functions used forming the Bloom filter encoding of a string.
*
* <p>Must satisfy 1 &lt;= numBloomHashes &lt;= MAX_BLOOM_HASHES.
*/
private final int numBloomHashes;
/**
* The number of cohorts used for cohort assignment.
*/
private final int numCohorts;
/**
* The cohort this user is assigned to for Bloom filter string encoding.
*
* <p>This value is stable for a given (userSecret, numCohorts) pair. Furthermore, if two
* encoders use the same userSecret but have different numCohorts values, the cohort assignment
* for the encoder with the smaller numCohorts value is a bitwise suffix of the cohort assignment
* for the encoder with the larger numCohorts value. It follows that, if you know maximum cohort
* assignment across a set of encoders, and you know the numCohorts setting for each encoder, then
* you can deduce the cohort assignment for each encoder by taking the bitwise-and of the max
* cohort value and (numCohorts-1), noting that numCohorts is required to be a positive power of
* 2.
*/
private final int cohort;
/**
* A bitmask with 1 bits in all the positions where a RAPPOR-encoded report could have a 1 bit.
*
* <p>The bitmask has the lowest order numBits set to 1 and the rest 0.
*/
private final BitSet inputMask;
/**
* SHA-256 utility class instance.
*
* <p>This object is stateful; access must be synchronized. The reset method must be
* called before each use.
*/
// BEGIN android-changed: Remove javax
// @GuardedBy("this")
// END android-changed
private final MessageDigest sha256;
/**
* MD5 utility class instance.
*
* <p>This object is stateful; access must be synchronized. The reset method must be
* called before each use.
*/
// BEGIN android-changed: Remove javax
// @GuardedBy("this")
// END android-changed
private final MessageDigest md5;
/**
* A SecureRandom instance, initialized with a cryptographically secure random seed.
*/
// BEGIN android-changed
// private final SecureRandom random;
private final Random random;
// BEGIN android-changed
/**
* Entropy input for constructing HmacDrbg objects.
*/
private final byte[] userSecret;
/**
* Constructs a new RAPPOR message encoder.
*
* @param userSecret Stable secret randomly selected for this user. UserSecret must be at least
* MIN_USER_SECRET_BYTES bytes of high-quality entropy. Changing the user secret clears the
* memoized cohort assignments and permanent randomized responses. Be aware that resetting
* these memoizations has significant privacy risks -- consult documentation at go/rappor for
* more details.
* @param encoderId Uniquely identifies this encoder. Used to differentiate momoized
* cohort assignments and permanent randomized responses.
* @param numBits The number of bits in the RAPPOR-encoded report.
* @param probabilityF The RAPPOR "f" probability, on the range [0.0, 1.0]. This will be
* quantized to the nearest 1/128.
* @param probabilityP The RAPPOR "p" probability, on the range [0.0, 1.0].
* @param probabilityQ The RAPPOR "1" probability, on the range [0.0, 1.0].
* @param numCohorts Number of cohorts into which the user pool is randomly segmented.
* @param numBloomHashes The number of hash functions used forming the Bloom filter encoding of a
* string.
*/
public Encoder(byte[] userSecret, String encoderId, int numBits,
double probabilityF, double probabilityP, double probabilityQ,
int numCohorts, int numBloomHashes) {
this(
null, // random
null, // md5,
null, // sha256,
userSecret,
encoderId,
numBits,
probabilityF,
probabilityP,
probabilityQ,
numCohorts,
numBloomHashes);
}
/**
* Constructs a new RAPPOR message encoder, using constructor-style dependency injection.
*
* @param random A cryptographically secure random number generator, or null (in which case a
* new SecureRandom will be constructed).
* @param md5 A configured MD5 hash algorithm, or null (in which case a new MessageDigest will be
* constructed). Note: MessageDigest objects are stateful, and that state must not be
* modified while calls to the Encoder are active.
* @param sha256 A configured SHA-256 hash algorithm, or null (in which case a new MessageDigest
* will be constructed). Note: MessageDigest objects are stateful, and that state must not
* be modified while calls to the Encoder are active.
* @param userSecret Stable secret randomly selected for this user. UserSecret must be at least
* 32 bytes of high-quality entropy. Changing the user secret clears the memoized cohort
* assignments and permanent randomized responses. Be aware that resetting these memoizations
* has significant privacy risks -- consult documentation at go/rappor for more details.
* @param encoderId Uniquely identifies this encoder. Used to differentiate momoized
* cohort assignments and permanent randomized responses.
* @param numBits The number of bits in the RAPPOR-encoded report.
* @param probabilityF The RAPPOR "f" probability, on the range [0.0, 1.0]. This will be
* quantized to the nearest 1/128.
* @param probabilityP The RAPPOR "p" probability, on the range [0.0, 1.0].
* @param probabilityQ The RAPPOR "1" probability, on the range [0.0, 1.0].
* @param numCohorts Number of cohorts into which the user pool is randomly segmented.
* @param numBloomHashes The number of hash functions used forming the Bloom filter encoding of a
* string.
*/
public Encoder(
// BEGIN android-changed: Remove javax
// @Nullable SecureRandom random,
// @Nullable MessageDigest md5,
// @Nullable MessageDigest sha256,
// SecureRandom random,
Random random,
MessageDigest md5,
MessageDigest sha256,
// END android-changed
byte[] userSecret,
String encoderId,
int numBits,
double probabilityF,
double probabilityP,
double probabilityQ,
int numCohorts,
int numBloomHashes) {
if (md5 != null) {
this.md5 = md5;
} else {
try {
this.md5 = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException impossible) {
// This should never happen. Every implementation of the Java platform
// is required to support MD5.
throw new AssertionError(impossible);
}
}
this.md5.reset();
if (sha256 != null) {
this.sha256 = sha256;
} else {
try {
this.sha256 = MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException impossible) {
// This should never happen. Every implementation of the Java platform
// is required to support 256.
throw new AssertionError(impossible);
}
}
this.sha256.reset();
this.encoderIdBytes = encoderId.getBytes(StandardCharsets.UTF_8);
if (random != null) {
this.random = random;
} else {
this.random = new SecureRandom();
}
// android-changed: Removed guava dependency
// checkArgument(
// userSecret.length >= MIN_USER_SECRET_BYTES,
// "userSecret must be at least %s bytes of high-quality entropy.",
// MIN_USER_SECRET_BYTES);
checkArgument(
userSecret.length >= MIN_USER_SECRET_BYTES,
"userSecret must be at least " + MIN_USER_SECRET_BYTES
+ " bytes of high-quality entropy.");
this.userSecret = userSecret;
checkArgument(
probabilityF >= 0 && probabilityF <= 1, "probabilityF must be on range [0.0, 1.0]");
this.probabilityF = Math.round(probabilityF * 128) / 128.0;
checkArgument(
probabilityP >= 0 && probabilityP <= 1, "probabilityP must be on range [0.0, 1.0]");
this.probabilityP = probabilityP;
checkArgument(
probabilityQ >= 0 && probabilityQ <= 1, "probabilityQ must be on range [0.0, 1.0]");
this.probabilityQ = probabilityQ;
checkArgument(
numBits >= 1 && numBits <= MAX_BITS, "numBits must be on range [1, " + MAX_BITS + "].");
this.numBits = numBits;
// Make a bitmask with the lowest-order numBits set to 1.
this.inputMask = new BitSet(numBits);
this.inputMask.set(0, numBits, true);
checkArgument(
numBloomHashes >= 1 && numBloomHashes <= numBits,
"numBloomHashes must be on range [1, numBits).");
this.numBloomHashes = numBloomHashes;
checkArgument(
numCohorts >= 1 && numCohorts <= MAX_COHORTS,
"numCohorts must be on range [1, " + MAX_COHORTS + "].");
// Assuming numCohorts >= 1:
//
// If numCohorts is a power of 2, then numCohorts has one bit set and numCohorts - 1 has only
// the bits to the right of numCohorts's bit set. The bitwise-and of these is 0.
//
// If numCohorts is not a power of 2, then numCohorts has at least two bits set. It follows
// subtracting one from numCohorts keeps the most significant bit set to 1, and thus the
// bitwise-and has at least this non-zero bit.
boolean numCohortsIsPowerOfTwo = (numCohorts & (numCohorts - 1)) == 0;
checkArgument(numCohortsIsPowerOfTwo, "numCohorts must be a power of 2.");
this.numCohorts = numCohorts;
// cohortMasterAssignment depends only on the userSecret.
HmacDrbg cohortDrbg = new HmacDrbg(userSecret, new byte[] {HMAC_DRBG_TYPE_COHORT});
ByteBuffer cohortDrbgBytes = ByteBuffer.wrap(cohortDrbg.nextBytes(4));
int cohortMasterAssignment = Math.abs(cohortDrbgBytes.getInt()) % MAX_COHORTS;
this.cohort = cohortMasterAssignment & (numCohorts - 1);
}
public double getProbabilityF() {
return probabilityF;
}
public double getProbabilityP() {
return probabilityP;
}
public double getProbabilityQ() {
return probabilityQ;
}
public int getNumBits() {
return numBits;
}
public int getNumBloomHashes() {
return numBloomHashes;
}
public int getNumCohorts() {
return numCohorts;
}
public int getCohort() {
return cohort;
}
/**
* Returns the Encoder ID as a UTF-8 formatted string.
*/
public String getEncoderId() {
return new String(encoderIdBytes, StandardCharsets.UTF_8);
}
/**
* Encodes a boolean into a RAPPOR report.
*
* <p>The boolean is 0 or 1, then encoded using permanent and instantaneous randomized response.
*
* <p>In most cases, numBits should be 1 when using this method.
*/
public byte[] encodeBoolean(boolean bool) {
BitSet input = new BitSet(numBits);
input.set(0, bool);
return encodeBits(input);
}
/**
* Encodes an ordinal into a RAPPOR report.
*
* <p>The ordinal is represented using a 1-hot binary representation, then encoded using permanent
* and instantaneous randomized response.
*
* @param ordinal A value on the range [0, numBits).
*/
public byte[] encodeOrdinal(int ordinal) {
checkArgument(
ordinal >= 0 && ordinal < numBits, "Ordinal value must be in range [0, numBits).");
BitSet input = new BitSet(numBits);
input.set(ordinal, true);
return encodeBits(input);
}
/**
* Encodes a string into a RAPPOR report.
*
* <p>The string is represented using a Bloom filter with numBloomHashes hash functions, then
* encoded using permanent and instantaneous randomized response.
*
* @param string An arbitrary string.
*/
public byte[] encodeString(String string) {
// Implements a Bloom filter by slicing a single MD5 hash into numBloomHashes bit indices.
byte[] stringInUtf8 = string.getBytes(StandardCharsets.UTF_8);
byte[] message =
ByteBuffer.allocate(4 + stringInUtf8.length)
.putInt(cohort)
.put(stringInUtf8)
.array();
byte[] digest;
synchronized (this) {
md5.reset();
digest = md5.digest(message);
}
// android-changed: Removed guava dependency
// Verify.verify(digest.length == 16);
// Verify.verify(numBloomHashes <= digest.length / 2);
verify(digest.length == 16);
verify(numBloomHashes <= digest.length / 2);
BitSet input = new BitSet(numBits);
for (int i = 0; i < numBloomHashes; i++) {
// Convert byte pairs to ints on [0, 65535].
// Anding with 0xFF converts signed byte to unsigned int.
int digestWord = (digest[i * 2] & 0xFF) * 256 + (digest[i * 2 + 1] & 0xFF);
int chosenBit = digestWord % numBits;
input.set(chosenBit, true);
}
return encodeBits(input);
}
/**
* Encodes an arbitrary bitstring into a RAPPOR report.
*
* @param bits A bitstring in which only the least significant numBits bits may be 1.
*/
public byte[] encodeBits(byte[] bits) {
return encodeBits(BitSet.valueOf(bits));
}
/**
* Encodes an arbitrary bitstring into a RAPPOR report.
*
* @param bits A bitstring in which only the least significant numBits bits may be 1.
*/
private byte[] encodeBits(BitSet bits) {
BitSet permanentRandomizedResponse = computePermanentRandomizedResponse(bits);
BitSet encodedBitSet = computeInstantaneousRandomizedResponse(permanentRandomizedResponse);
// BitSet.toByteArray only returns enough bytes to capture the most significant bit
// that is set. For example, a BitSet with no bits set could return a length-0 array.
// In contrast, we guarantee that our output is sized according to numBits.
byte[] encodedBytes = encodedBitSet.toByteArray();
byte[] output = new byte[(numBits + 7) / 8];
// android-changed: Removed guava dependency
// Verify.verify(encodedBytes.length <= output.length);
verify(encodedBytes.length <= output.length);
System.arraycopy(
encodedBytes, // src
0, // srcPos
output, // dest
0, // destPos
encodedBytes.length); // length
return output;
}
/**
* Returns the permanent randomized response for the given bits.
*
* <p>The response for a particular bits input is guaranteed to always be the same for any encoder
* constructed with the same parameters (including the encoderId and the userSecret).
*/
private BitSet computePermanentRandomizedResponse(BitSet bits) {
// Ensures that the input only has bits set in the lowest
BitSet masked = new BitSet();
masked.or(bits);
masked.andNot(inputMask);
checkArgument(masked.isEmpty(), "Input bits had bits set past Encoder's numBits limit.");
if (probabilityF == 0.0) {
return bits;
}
// Builds a personalization string that contains both the encoderId and input value (bits),
// and is no longer than HmacDrbg.MAX_PERSONALIZATION_STRING_LENGTH_BYTES. The first byte
// of the personalization string is always HMAC_DRBG_TYPE_PRR, to avoid collisions with the
// cohort-generation personalization string.
byte[] personalizationString;
synchronized (this) {
int personalizationStringLength =
Math.min(HmacDrbg.MAX_PERSONALIZATION_STRING_LENGTH_BYTES, 1 + sha256.getDigestLength());
personalizationString = new byte[personalizationStringLength];
personalizationString[0] = HMAC_DRBG_TYPE_PRR;
sha256.reset();
sha256.update(encoderIdBytes);
sha256.update(new byte[] {0});
sha256.update(bits.toByteArray());
byte[] digest = sha256.digest(personalizationString);
System.arraycopy(digest, 0, personalizationString, 1, personalizationString.length - 1);
}
HmacDrbg drbg = new HmacDrbg(userSecret, personalizationString);
byte[] pseudorandomStream = drbg.nextBytes(numBits);
// android-changed: Removed guava dependency
// Verify.verify(numBits <= pseudorandomStream.length);
verify(numBits <= pseudorandomStream.length);
int probabilityFTimes128 = (int) Math.round(probabilityF * 128);
BitSet result = new BitSet(numBits);
for (int i = 0; i < numBits; i++) {
// Grabs a single byte from the pseudorandom stream.
// Anding with 0xFF converts a signed byte to an unsigned integer.
int pseudorandomByte = pseudorandomStream[i] & 0xFF;
// Uses bits 1-7 to get a random number between 0 and 127.
int uniform0to127 = pseudorandomByte >> 1;
boolean shouldUseNoise = uniform0to127 < probabilityFTimes128;
if (shouldUseNoise) {
// Uses bit 0 as a flip of a fair coin.
result.set(i, (pseudorandomByte & 0x01) > 0);
} else {
result.set(i, bits.get(i));
}
}
return result;
}
/**
* Returns the instantaneous randomized response for the given bits.
*
* <p>The instantaneous response is NOT memoized -- it is sampled randomly on
* every invocation.
*/
private BitSet computeInstantaneousRandomizedResponse(BitSet bits) {
// Ensures that the input only has bits set in the lowest
BitSet masked = new BitSet();
masked.or(bits);
masked.andNot(inputMask);
checkArgument(masked.isEmpty(), "Input bits had bits set past Encoder's numBits limit.");
if (probabilityP == 0.0 && probabilityQ == 1.0) {
return bits;
}
BitSet response = new BitSet(numBits);
for (int i = 0; i < numBits; i++) {
boolean bit = bits.get(i);
double probability = bit ? probabilityQ : probabilityP;
boolean responseBit = random.nextFloat() < probability;
response.set(i, responseBit);
}
return response;
}
// BEGIN android-changed: Added guava methods
private static void checkArgument(boolean expression, Object errorMessage) {
if (!expression) {
throw new IllegalArgumentException(String.valueOf(errorMessage));
}
}
private static void verify(boolean expression) {
if (!expression) {
throw new java.lang.IllegalStateException();
}
}
// END android-changed
}