/** * \file DiscreteNormal.hpp * \brief Header for DiscreteNormal * * Sample exactly from the discrete normal distribution. * * Copyright (c) Charles Karney (2013) and licensed * under the MIT/X11 License. For more information, see * http://randomlib.sourceforge.net/ **********************************************************************/ #if !defined(RANDOMLIB_DISCRETENORMAL_HPP) #define RANDOMLIB_DISCRETENORMAL_HPP 1 #include #include namespace RandomLib { /** * \brief The discrete normal distribution. * * Sample integers \e i with probability proportional to * \f[ * \exp\biggl[-\frac12\biggl(\frac{i-\mu}{\sigma}\biggr)^2\biggr], * \f] * where σ and μ are given as rationals (the ratio of two integers). * The sampling is exact (provided that the random generator is ideal). For * example * \code #include #include #include int main() { RandomLib::Random r; // Create r r.Reseed(); // and give it a unique seed int sigma_num = 7, sigma_den = 1, mu_num = 1, mu_den = 3; RandomLib::DiscreteNormal d(sigma_num, sigma_den, mu_num, mu_den); for (int i = 0; i < 100; ++i) std::cout << d(r) << "\n"; } \endcode * prints out 100 samples with σ = 7 and μ = 1/3. * * The algorithm is much the same as for ExactNormal; for details see * - C. F. F. Karney, Sampling exactly from the normal distribution, * http://arxiv.org/abs/1303.6257 (Mar. 2013). * . * That algorithm samples the integer part of the result \e k, samples \e x * in [0,1], and (unless rejected) returns s(\e k + \e x), where \e s * = ±1. For the discrete case, we sample \e x in [0,1) such that * \f[ * s(k + x) = (i - \mu)/\sigma, * \f] * or * \f[ * x = s(i - \mu)/\sigma - k * \f] * The value of \e i which results in the smallest \e x ≥ 0 is * \f[ * i_0 = s\lceil k \sigma + s \mu\rceil * \f] * so sample * \f[ * i = i_0 + sj * \f] * where \e j is uniformly distributed in [0, ⌈σ⌉). The * corresponding value of \e x is * \f[ * \begin{aligned} * x &= \bigl(si_0 - (k\sigma + s\mu)\bigr)/\sigma + j/\sigma\\ * &= x_0 + j/\sigma,\\ * x_0 &= \bigl(\lceil k \sigma + s \mu\rceil - * (k \sigma + s \mu)\bigr)/\sigma. * \end{aligned} * \f] * After \e x is sampled in this way, it should be rejected if \e x ≥ 1 * (this is counted with the next larger value of \e k) or if \e x = 0, \e k * = 0, and \e s = −1 (to avoid double counting the origin). If \e x * is accepted (in Step 4 of the ExactNormal algorithm), then return \e i. * * When σ and μ are given as rationals, all the arithmetic outlined * above can be carried out exactly. The basic rejection techniques used by * ExactNormal are exact. Thus the result of this discrete form of the * algorithm is also exact. * * RandomLib provides two classes to sample from this distribution: * - DiscreteNormal which is tuned for speed on a typical general purpose * computer. This assumes that random samples can be generated relatively * quickly. * - DiscreteNormalAlt, which is a prototype for what might be needed on a * small device used for cryptography which is using a hardware generator * for obtaining truly random bits. This assumption here is that the * random bits are relatively expensive to obtain. * . * The basic algorithm is the same in the two cases. The main advantages of * this method are: * - exact sampling (provided that the source of random numbers is ideal), * - no need to cut off the tails of the distribution, * - a short program involving simple integer operations only, * - no dependence on external libraries (except to generate random bits), * - no large tables of constants needed, * - minimal time to set up for a new σ and μ (roughly comparable to * the time it takes to generate one sample), * - only about 5–20 times slower than standard routines to sample from * a normal distribution using plain double-precision arithmetic. * - DiscreteNormalAlt exhibits ideal scaling for the consumption of random * bits, namely a constant + log2σ, for large σ, * where the constant is about 31. * . * The possible drawbacks of this method are: * - σ and μ are restricted to rational numbers with sufficiently * small numerators and denominators to avoid overflow (this is unlikely to * be a severe restriction especially if the template parameter IntType is * set to long long), * - the running time is unbounded (but not in any practical sense), * - the memory consumption is unbounded (but not in any practical sense), * - the toll, about 30 bits, is considerably worse than that obtained using * the Knuth-Yao algorithm, for which the toll is no more than 2 (but this * requires a large table which is expensive to compute and requires a lot * of memory to store). * * This class uses a mutable private vector. So a single DiscreteNormal * object cannot safely be used by multiple threads. In a multi-processing * environment, each thread should use a thread-specific DiscreteNormal * object. * * Some timing results for IntType = int, μ = 0, and 108 * samples (time = time per sample, including setup time, rv = mean number of * random variables per sample) * - σ = 10, time = 219 ns, rv = 17.52 * - σ = 32, time = 223 ns, rv = 17.82 * - σ = 1000, time = 225 ns, rv = 17.95 * - σ = 160000, time = 226 ns, rv = 17.95 * * @tparam IntType the integer type to use (default int). **********************************************************************/ template class DiscreteNormal { public: /** * Constructor. * * @param[in] sigma_num the numerator of σ. * @param[in] sigma_den the denominator of σ (default 1). * @param[in] mu_num the numerator of μ (default 0). * @param[in] mu_den the denominator of μ (default 1). * * The constructor creates a DiscreteNormal objects for sampling with * specific values of σ and μ. This may throw an exception if the * parameters are such that overflow is possible. Internally σ and * μ are expressed with a common denominator, so it may be possible to * avoid overflow by picking the fractions of these quantities so that \e * sigma_den and \e mu_den have many common factors. **********************************************************************/ DiscreteNormal(IntType sigma_num, IntType sigma_den = 1, IntType mu_num = 0, IntType mu_den = 1); /** * Return a sample. * * @tparam Random the type of the random generator. * @param[in,out] r a random generator. * @return discrete normal integer. **********************************************************************/ template IntType operator()(Random& r) const; private: /** * sigma = _sig / _d, mu = _imu + _mu / _d, _isig = floor(sigma) **********************************************************************/ IntType _sig, _mu, _d, _isig, _imu; typedef unsigned short word; /** * Holds as much of intermediate uniform deviates as needed. **********************************************************************/ mutable std::vector _v; mutable unsigned _m, _l; /** * Increment on size of _v. **********************************************************************/ static const unsigned alloc_incr = 16; // ceil(n/d) for d > 0 static IntType iceil(IntType n, IntType d); // abs(n) needed because Visual Studio's std::abs has problems static IntType iabs(IntType n); static IntType gcd(IntType u, IntType v); // After x = LeadingDigit(p), p/_sig = (x + p'/_sig)/b where p and p' are // in [0, _sig) and b = 1 + max(word). word LeadingDigit(IntType& p) const; /** * Implement outcomes for choosing with prob (\e x + 2\e k) / (2\e k + 2); * return: * - 1 (succeed unconditionally) with prob (\e m − 2) / \e m, * - 0 (succeed with probability x) with prob 1 / \e m, * - −1 (fail unconditionally) with prob 1 / \e m. **********************************************************************/ template static int Choose(Random& r, int m); // Compute v' < v. If true set v = v'. template bool less_than(Random& r) const; // Compute v < (x + p/_sig)/base (updating v) template bool less_than(Random& r, word x, IntType p) const; // true with prob (x + p/_sig)/base template bool bernoulli(Random& r, word x, IntType p) const; /** * Return true with probability exp(−1/2). **********************************************************************/ template bool ExpProbH(Random& r) const; /** * Return true with probability exp(−n/2). **********************************************************************/ template bool ExpProb(Random& r, int n) const; /** * Return \e n with probability exp(−n/2) * (1−exp(−1/2)). **********************************************************************/ template int ExpProbN(Random& r) const; /** * Algorithm B: true with prob exp(-x * (2*k + x) / (2*k + 2)) where * x = (x0 + xn / _sig)/b. **********************************************************************/ template bool B(Random& r, int k, word x0, IntType xn) const; }; template DiscreteNormal::DiscreteNormal (IntType sigma_num, IntType sigma_den, IntType mu_num, IntType mu_den) : _v(std::vector(alloc_incr)), _m(0), _l(alloc_incr) { STATIC_ASSERT(std::numeric_limits::is_integer, "DiscreteNormal: invalid integer type IntType"); STATIC_ASSERT(std::numeric_limits::is_signed, "DiscreteNormal: IntType must be a signed type"); STATIC_ASSERT(!std::numeric_limits::is_signed, "DiscreteNormal: word must be an unsigned type"); STATIC_ASSERT(std::numeric_limits::digits + 1 >= std::numeric_limits::digits, "DiscreteNormal: IntType must be at least as wide as word"); if (!( sigma_num > 0 && sigma_den > 0 && mu_den > 0 )) throw RandomErr("DiscreteNormal: need sigma > 0"); _imu = mu_num / mu_den; if (_imu == (std::numeric_limits::min)()) throw RandomErr("DiscreteNormal: abs(mu) too large"); mu_num -= _imu * mu_den; IntType l; l = gcd(sigma_num, sigma_den); sigma_num /= l; sigma_den /= l; l = gcd(mu_num, mu_den); mu_num /= l; mu_den /= l; _isig = iceil(sigma_num, sigma_den); l = gcd(sigma_den, mu_den); _sig = sigma_num * (mu_den / l); _mu = mu_num * (sigma_den / l); _d = sigma_den * (mu_den / l); // The rest of the constructor tests for possible overflow // Check for overflow in computing member variables IntType maxint = (std::numeric_limits::max)(); if (!( mu_den / l <= maxint / sigma_num && mu_num <= maxint / (sigma_den / l) && mu_den / l <= maxint / sigma_den )) throw RandomErr("DiscreteNormal: sigma or mu overflow"); // The probability that k = kmax is about 10^-543. int kmax = 50; // Check that max plausible result fits in an IntType, i.e., // _isig * (kmax + 1) + abs(_imu) does not lead to overflow. if (!( kmax + 1 <= maxint / _isig && _isig * (kmax + 1) <= maxint - iabs(_imu) )) throw RandomErr("DiscreteNormal: possible overflow a"); // Check xn0 = _sig * k + s * _mu; if (!( kmax <= maxint / _sig && _sig * kmax <= maxint - iabs(_mu) )) throw RandomErr("DiscreteNormal: possible overflow b"); // Check for overflow in LeadingDigit // p << bits, p = _sig - 1, bits = 8 if (!( _sig <= (maxint >> 8) )) throw RandomErr("DiscreteNormal: overflow in LeadingDigit"); } template template IntType DiscreteNormal::operator()(Random& r) const { for (;;) { int k = ExpProbN(r); if (!ExpProb(r, k * (k - 1))) continue; IntType s = r.Boolean() ? -1 : 1, xn = _sig * IntType(k) + s * _mu, i = iceil(xn, _d) + r.template Integer(_isig); xn = i * _d - xn; if (xn >= _sig || (k == 0 && s < 0 && xn <= 0)) continue; if (xn > 0) { word x0 = LeadingDigit(xn); // Find first digit in expansion in words int h = k + 1; while (h-- && B(r, k, x0, xn)); if (!(h < 0)) continue; } return s * i + _imu; } } template IntType DiscreteNormal::iceil(IntType n, IntType d) { IntType k = n / d; return k + (k * d < n ? 1 : 0); } template IntType DiscreteNormal::iabs(IntType n) { return n < 0 ? -n : n; } template IntType DiscreteNormal::gcd(IntType u, IntType v) { // Knuth, TAOCP, vol 2, 4.5.2, Algorithm A u = iabs(u); v = iabs(v); while (v > 0) { IntType r = u % v; u = v; v = r; } return u; } template typename DiscreteNormal::word DiscreteNormal::LeadingDigit(IntType& p) const { static const unsigned bits = 8; static const unsigned num = std::numeric_limits::digits / bits; STATIC_ASSERT(bits * num == std::numeric_limits::digits, "Number of digits in word must be multiple of 8"); word s = 0; for (unsigned c = num; c--;) { p <<= bits; s <<= bits; word d = word(p / _sig); s += d; p -= IntType(d) * _sig; } return s; } template template int DiscreteNormal::Choose(Random& r, int m) { int k = r.template Integer(m); return k == 0 ? 0 : (k == 1 ? -1 : 1); } template template bool DiscreteNormal::less_than(Random& r) const { for (unsigned j = 0; ; ++j) { if (j == _m) { // Need more bits in the old V if (_l == _m) _v.resize(_l += alloc_incr); _v[_m++] = r.template Integer(); } word w = r.template Integer(); if (w > _v[j]) return false; // New V is bigger, so exit else if (w < _v[j]) { _v[j] = w; // New V is smaller, update _v _m = j + 1; // adjusting its size return true; // and generate the next V } // Else w == _v[j] and we need to check the next word } } template template bool DiscreteNormal::less_than(Random& r, word x, IntType p) const { if (_m == 0) _v[_m++] = r.template Integer(); if (_v[0] != x) return _v[0] < x; for (unsigned j = 1; ; ++j) { if (p == 0) return false; if (j == _m) { if (_l == _m) _v.resize(_l += alloc_incr); _v[_m++] = r.template Integer(); } x = LeadingDigit(p); if (_v[j] != x) return _v[j] < x; } } template template bool DiscreteNormal::bernoulli(Random& r, word x, IntType p) const { word w = r.template Integer(); if (w != x) return w < x; for (;;) { if (p == 0) return false; x = LeadingDigit(p); w = r.template Integer(); if (w != x) return w < x; } } template template bool DiscreteNormal::ExpProbH(Random& r) const { static const word half = word(1) << (std::numeric_limits::digits - 1); _m = 0; if ((_v[_m++] = r.template Integer()) & half) return true; // Here _v < 1/2. Now loop finding decreasing V. Exit when first // increasing one is found. for (unsigned s = 0; ; s ^= 1) { // Parity of loop count if (!less_than(r)) return s != 0u; } } template template bool DiscreteNormal::ExpProb(Random& r, int n) const { while (n-- > 0) { if (!ExpProbH(r)) return false; } return true; } template template int DiscreteNormal::ExpProbN(Random& r) const { int n = 0; while (ExpProbH(r)) ++n; return n; } template template bool DiscreteNormal::B(Random& r, int k, word x0, IntType xn) const { int n = 0, h = 2 * k + 2, f; _m = 0; for (;; ++n) { if ( ((f = k ? 0 : Choose(r, h)) < 0) || !(n ? less_than(r) : less_than(r, x0, xn)) || ((f = k ? Choose(r, h) : f) < 0) || (f == 0 && !bernoulli(r, x0, xn)) ) break; } return (n % 2) == 0; } } // namespace RandomLib #endif // RANDOMLIB_DISCRETENORMAL_HPP