to_chars.hpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107
  1. #pragma once
  2. #include <array> // array
  3. #include <cassert> // assert
  4. #include <cmath> // signbit, isfinite
  5. #include <cstdint> // intN_t, uintN_t
  6. #include <cstring> // memcpy, memmove
  7. #include <limits> // numeric_limits
  8. #include <type_traits> // conditional
  9. #include <nlohmann/detail/boolean_operators.hpp>
  10. #include <nlohmann/detail/macro_scope.hpp>
  11. namespace nlohmann
  12. {
  13. namespace detail
  14. {
  15. /*!
  16. @brief implements the Grisu2 algorithm for binary to decimal floating-point
  17. conversion.
  18. This implementation is a slightly modified version of the reference
  19. implementation which may be obtained from
  20. http://florian.loitsch.com/publications (bench.tar.gz).
  21. The code is distributed under the MIT license, Copyright (c) 2009 Florian Loitsch.
  22. For a detailed description of the algorithm see:
  23. [1] Loitsch, "Printing Floating-Point Numbers Quickly and Accurately with
  24. Integers", Proceedings of the ACM SIGPLAN 2010 Conference on Programming
  25. Language Design and Implementation, PLDI 2010
  26. [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and Accurately",
  27. Proceedings of the ACM SIGPLAN 1996 Conference on Programming Language
  28. Design and Implementation, PLDI 1996
  29. */
  30. namespace dtoa_impl
  31. {
  32. template <typename Target, typename Source>
  33. Target reinterpret_bits(const Source source)
  34. {
  35. static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
  36. Target target;
  37. std::memcpy(&target, &source, sizeof(Source));
  38. return target;
  39. }
  40. struct diyfp // f * 2^e
  41. {
  42. static constexpr int kPrecision = 64; // = q
  43. std::uint64_t f = 0;
  44. int e = 0;
  45. constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
  46. /*!
  47. @brief returns x - y
  48. @pre x.e == y.e and x.f >= y.f
  49. */
  50. static diyfp sub(const diyfp& x, const diyfp& y) noexcept
  51. {
  52. assert(x.e == y.e);
  53. assert(x.f >= y.f);
  54. return {x.f - y.f, x.e};
  55. }
  56. /*!
  57. @brief returns x * y
  58. @note The result is rounded. (Only the upper q bits are returned.)
  59. */
  60. static diyfp mul(const diyfp& x, const diyfp& y) noexcept
  61. {
  62. static_assert(kPrecision == 64, "internal error");
  63. // Computes:
  64. // f = round((x.f * y.f) / 2^q)
  65. // e = x.e + y.e + q
  66. // Emulate the 64-bit * 64-bit multiplication:
  67. //
  68. // p = u * v
  69. // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
  70. // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + 2^64 (u_hi v_hi )
  71. // = (p0 ) + 2^32 ((p1 ) + (p2 )) + 2^64 (p3 )
  72. // = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 )
  73. // = (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + p2_hi + p3)
  74. // = (p0_lo ) + 2^32 (Q ) + 2^64 (H )
  75. // = (p0_lo ) + 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H )
  76. //
  77. // (Since Q might be larger than 2^32 - 1)
  78. //
  79. // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
  80. //
  81. // (Q_hi + H does not overflow a 64-bit int)
  82. //
  83. // = p_lo + 2^64 p_hi
  84. const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
  85. const std::uint64_t u_hi = x.f >> 32u;
  86. const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
  87. const std::uint64_t v_hi = y.f >> 32u;
  88. const std::uint64_t p0 = u_lo * v_lo;
  89. const std::uint64_t p1 = u_lo * v_hi;
  90. const std::uint64_t p2 = u_hi * v_lo;
  91. const std::uint64_t p3 = u_hi * v_hi;
  92. const std::uint64_t p0_hi = p0 >> 32u;
  93. const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
  94. const std::uint64_t p1_hi = p1 >> 32u;
  95. const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
  96. const std::uint64_t p2_hi = p2 >> 32u;
  97. std::uint64_t Q = p0_hi + p1_lo + p2_lo;
  98. // The full product might now be computed as
  99. //
  100. // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
  101. // p_lo = p0_lo + (Q << 32)
  102. //
  103. // But in this particular case here, the full p_lo is not required.
  104. // Effectively we only need to add the highest bit in p_lo to p_hi (and
  105. // Q_hi + 1 does not overflow).
  106. Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
  107. const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
  108. return {h, x.e + y.e + 64};
  109. }
  110. /*!
  111. @brief normalize x such that the significand is >= 2^(q-1)
  112. @pre x.f != 0
  113. */
  114. static diyfp normalize(diyfp x) noexcept
  115. {
  116. assert(x.f != 0);
  117. while ((x.f >> 63u) == 0)
  118. {
  119. x.f <<= 1u;
  120. x.e--;
  121. }
  122. return x;
  123. }
  124. /*!
  125. @brief normalize x such that the result has the exponent E
  126. @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
  127. */
  128. static diyfp normalize_to(const diyfp& x, const int target_exponent) noexcept
  129. {
  130. const int delta = x.e - target_exponent;
  131. assert(delta >= 0);
  132. assert(((x.f << delta) >> delta) == x.f);
  133. return {x.f << delta, target_exponent};
  134. }
  135. };
  136. struct boundaries
  137. {
  138. diyfp w;
  139. diyfp minus;
  140. diyfp plus;
  141. };
  142. /*!
  143. Compute the (normalized) diyfp representing the input number 'value' and its
  144. boundaries.
  145. @pre value must be finite and positive
  146. */
  147. template <typename FloatType>
  148. boundaries compute_boundaries(FloatType value)
  149. {
  150. assert(std::isfinite(value));
  151. assert(value > 0);
  152. // Convert the IEEE representation into a diyfp.
  153. //
  154. // If v is denormal:
  155. // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1))
  156. // If v is normalized:
  157. // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
  158. static_assert(std::numeric_limits<FloatType>::is_iec559,
  159. "internal error: dtoa_short requires an IEEE-754 floating-point implementation");
  160. constexpr int kPrecision = std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
  161. constexpr int kBias = std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
  162. constexpr int kMinExp = 1 - kBias;
  163. constexpr std::uint64_t kHiddenBit = std::uint64_t{1} << (kPrecision - 1); // = 2^(p-1)
  164. using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t, std::uint64_t >::type;
  165. const std::uint64_t bits = reinterpret_bits<bits_type>(value);
  166. const std::uint64_t E = bits >> (kPrecision - 1);
  167. const std::uint64_t F = bits & (kHiddenBit - 1);
  168. const bool is_denormal = E == 0;
  169. const diyfp v = is_denormal
  170. ? diyfp(F, kMinExp)
  171. : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
  172. // Compute the boundaries m- and m+ of the floating-point value
  173. // v = f * 2^e.
  174. //
  175. // Determine v- and v+, the floating-point predecessor and successor if v,
  176. // respectively.
  177. //
  178. // v- = v - 2^e if f != 2^(p-1) or e == e_min (A)
  179. // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B)
  180. //
  181. // v+ = v + 2^e
  182. //
  183. // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
  184. // between m- and m+ round to v, regardless of how the input rounding
  185. // algorithm breaks ties.
  186. //
  187. // ---+-------------+-------------+-------------+-------------+--- (A)
  188. // v- m- v m+ v+
  189. //
  190. // -----------------+------+------+-------------+-------------+--- (B)
  191. // v- m- v m+ v+
  192. const bool lower_boundary_is_closer = F == 0 and E > 1;
  193. const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
  194. const diyfp m_minus = lower_boundary_is_closer
  195. ? diyfp(4 * v.f - 1, v.e - 2) // (B)
  196. : diyfp(2 * v.f - 1, v.e - 1); // (A)
  197. // Determine the normalized w+ = m+.
  198. const diyfp w_plus = diyfp::normalize(m_plus);
  199. // Determine w- = m- such that e_(w-) = e_(w+).
  200. const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
  201. return {diyfp::normalize(v), w_minus, w_plus};
  202. }
  203. // Given normalized diyfp w, Grisu needs to find a (normalized) cached
  204. // power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
  205. // within a certain range [alpha, gamma] (Definition 3.2 from [1])
  206. //
  207. // alpha <= e = e_c + e_w + q <= gamma
  208. //
  209. // or
  210. //
  211. // f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
  212. // <= f_c * f_w * 2^gamma
  213. //
  214. // Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
  215. //
  216. // 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
  217. //
  218. // or
  219. //
  220. // 2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
  221. //
  222. // The choice of (alpha,gamma) determines the size of the table and the form of
  223. // the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
  224. // in practice:
  225. //
  226. // The idea is to cut the number c * w = f * 2^e into two parts, which can be
  227. // processed independently: An integral part p1, and a fractional part p2:
  228. //
  229. // f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
  230. // = (f div 2^-e) + (f mod 2^-e) * 2^e
  231. // = p1 + p2 * 2^e
  232. //
  233. // The conversion of p1 into decimal form requires a series of divisions and
  234. // modulos by (a power of) 10. These operations are faster for 32-bit than for
  235. // 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
  236. // achieved by choosing
  237. //
  238. // -e >= 32 or e <= -32 := gamma
  239. //
  240. // In order to convert the fractional part
  241. //
  242. // p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
  243. //
  244. // into decimal form, the fraction is repeatedly multiplied by 10 and the digits
  245. // d[-i] are extracted in order:
  246. //
  247. // (10 * p2) div 2^-e = d[-1]
  248. // (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
  249. //
  250. // The multiplication by 10 must not overflow. It is sufficient to choose
  251. //
  252. // 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
  253. //
  254. // Since p2 = f mod 2^-e < 2^-e,
  255. //
  256. // -e <= 60 or e >= -60 := alpha
  257. constexpr int kAlpha = -60;
  258. constexpr int kGamma = -32;
  259. struct cached_power // c = f * 2^e ~= 10^k
  260. {
  261. std::uint64_t f;
  262. int e;
  263. int k;
  264. };
  265. /*!
  266. For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
  267. power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
  268. satisfies (Definition 3.2 from [1])
  269. alpha <= e_c + e + q <= gamma.
  270. */
  271. inline cached_power get_cached_power_for_binary_exponent(int e)
  272. {
  273. // Now
  274. //
  275. // alpha <= e_c + e + q <= gamma (1)
  276. // ==> f_c * 2^alpha <= c * 2^e * 2^q
  277. //
  278. // and since the c's are normalized, 2^(q-1) <= f_c,
  279. //
  280. // ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
  281. // ==> 2^(alpha - e - 1) <= c
  282. //
  283. // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
  284. //
  285. // k = ceil( log_10( 2^(alpha - e - 1) ) )
  286. // = ceil( (alpha - e - 1) * log_10(2) )
  287. //
  288. // From the paper:
  289. // "In theory the result of the procedure could be wrong since c is rounded,
  290. // and the computation itself is approximated [...]. In practice, however,
  291. // this simple function is sufficient."
  292. //
  293. // For IEEE double precision floating-point numbers converted into
  294. // normalized diyfp's w = f * 2^e, with q = 64,
  295. //
  296. // e >= -1022 (min IEEE exponent)
  297. // -52 (p - 1)
  298. // -52 (p - 1, possibly normalize denormal IEEE numbers)
  299. // -11 (normalize the diyfp)
  300. // = -1137
  301. //
  302. // and
  303. //
  304. // e <= +1023 (max IEEE exponent)
  305. // -52 (p - 1)
  306. // -11 (normalize the diyfp)
  307. // = 960
  308. //
  309. // This binary exponent range [-1137,960] results in a decimal exponent
  310. // range [-307,324]. One does not need to store a cached power for each
  311. // k in this range. For each such k it suffices to find a cached power
  312. // such that the exponent of the product lies in [alpha,gamma].
  313. // This implies that the difference of the decimal exponents of adjacent
  314. // table entries must be less than or equal to
  315. //
  316. // floor( (gamma - alpha) * log_10(2) ) = 8.
  317. //
  318. // (A smaller distance gamma-alpha would require a larger table.)
  319. // NB:
  320. // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
  321. constexpr int kCachedPowersMinDecExp = -300;
  322. constexpr int kCachedPowersDecStep = 8;
  323. static constexpr std::array<cached_power, 79> kCachedPowers =
  324. {
  325. {
  326. { 0xAB70FE17C79AC6CA, -1060, -300 },
  327. { 0xFF77B1FCBEBCDC4F, -1034, -292 },
  328. { 0xBE5691EF416BD60C, -1007, -284 },
  329. { 0x8DD01FAD907FFC3C, -980, -276 },
  330. { 0xD3515C2831559A83, -954, -268 },
  331. { 0x9D71AC8FADA6C9B5, -927, -260 },
  332. { 0xEA9C227723EE8BCB, -901, -252 },
  333. { 0xAECC49914078536D, -874, -244 },
  334. { 0x823C12795DB6CE57, -847, -236 },
  335. { 0xC21094364DFB5637, -821, -228 },
  336. { 0x9096EA6F3848984F, -794, -220 },
  337. { 0xD77485CB25823AC7, -768, -212 },
  338. { 0xA086CFCD97BF97F4, -741, -204 },
  339. { 0xEF340A98172AACE5, -715, -196 },
  340. { 0xB23867FB2A35B28E, -688, -188 },
  341. { 0x84C8D4DFD2C63F3B, -661, -180 },
  342. { 0xC5DD44271AD3CDBA, -635, -172 },
  343. { 0x936B9FCEBB25C996, -608, -164 },
  344. { 0xDBAC6C247D62A584, -582, -156 },
  345. { 0xA3AB66580D5FDAF6, -555, -148 },
  346. { 0xF3E2F893DEC3F126, -529, -140 },
  347. { 0xB5B5ADA8AAFF80B8, -502, -132 },
  348. { 0x87625F056C7C4A8B, -475, -124 },
  349. { 0xC9BCFF6034C13053, -449, -116 },
  350. { 0x964E858C91BA2655, -422, -108 },
  351. { 0xDFF9772470297EBD, -396, -100 },
  352. { 0xA6DFBD9FB8E5B88F, -369, -92 },
  353. { 0xF8A95FCF88747D94, -343, -84 },
  354. { 0xB94470938FA89BCF, -316, -76 },
  355. { 0x8A08F0F8BF0F156B, -289, -68 },
  356. { 0xCDB02555653131B6, -263, -60 },
  357. { 0x993FE2C6D07B7FAC, -236, -52 },
  358. { 0xE45C10C42A2B3B06, -210, -44 },
  359. { 0xAA242499697392D3, -183, -36 },
  360. { 0xFD87B5F28300CA0E, -157, -28 },
  361. { 0xBCE5086492111AEB, -130, -20 },
  362. { 0x8CBCCC096F5088CC, -103, -12 },
  363. { 0xD1B71758E219652C, -77, -4 },
  364. { 0x9C40000000000000, -50, 4 },
  365. { 0xE8D4A51000000000, -24, 12 },
  366. { 0xAD78EBC5AC620000, 3, 20 },
  367. { 0x813F3978F8940984, 30, 28 },
  368. { 0xC097CE7BC90715B3, 56, 36 },
  369. { 0x8F7E32CE7BEA5C70, 83, 44 },
  370. { 0xD5D238A4ABE98068, 109, 52 },
  371. { 0x9F4F2726179A2245, 136, 60 },
  372. { 0xED63A231D4C4FB27, 162, 68 },
  373. { 0xB0DE65388CC8ADA8, 189, 76 },
  374. { 0x83C7088E1AAB65DB, 216, 84 },
  375. { 0xC45D1DF942711D9A, 242, 92 },
  376. { 0x924D692CA61BE758, 269, 100 },
  377. { 0xDA01EE641A708DEA, 295, 108 },
  378. { 0xA26DA3999AEF774A, 322, 116 },
  379. { 0xF209787BB47D6B85, 348, 124 },
  380. { 0xB454E4A179DD1877, 375, 132 },
  381. { 0x865B86925B9BC5C2, 402, 140 },
  382. { 0xC83553C5C8965D3D, 428, 148 },
  383. { 0x952AB45CFA97A0B3, 455, 156 },
  384. { 0xDE469FBD99A05FE3, 481, 164 },
  385. { 0xA59BC234DB398C25, 508, 172 },
  386. { 0xF6C69A72A3989F5C, 534, 180 },
  387. { 0xB7DCBF5354E9BECE, 561, 188 },
  388. { 0x88FCF317F22241E2, 588, 196 },
  389. { 0xCC20CE9BD35C78A5, 614, 204 },
  390. { 0x98165AF37B2153DF, 641, 212 },
  391. { 0xE2A0B5DC971F303A, 667, 220 },
  392. { 0xA8D9D1535CE3B396, 694, 228 },
  393. { 0xFB9B7CD9A4A7443C, 720, 236 },
  394. { 0xBB764C4CA7A44410, 747, 244 },
  395. { 0x8BAB8EEFB6409C1A, 774, 252 },
  396. { 0xD01FEF10A657842C, 800, 260 },
  397. { 0x9B10A4E5E9913129, 827, 268 },
  398. { 0xE7109BFBA19C0C9D, 853, 276 },
  399. { 0xAC2820D9623BF429, 880, 284 },
  400. { 0x80444B5E7AA7CF85, 907, 292 },
  401. { 0xBF21E44003ACDD2D, 933, 300 },
  402. { 0x8E679C2F5E44FF8F, 960, 308 },
  403. { 0xD433179D9C8CB841, 986, 316 },
  404. { 0x9E19DB92B4E31BA9, 1013, 324 },
  405. }
  406. };
  407. // This computation gives exactly the same results for k as
  408. // k = ceil((kAlpha - e - 1) * 0.30102999566398114)
  409. // for |e| <= 1500, but doesn't require floating-point operations.
  410. // NB: log_10(2) ~= 78913 / 2^18
  411. assert(e >= -1500);
  412. assert(e <= 1500);
  413. const int f = kAlpha - e - 1;
  414. const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
  415. const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep;
  416. assert(index >= 0);
  417. assert(static_cast<std::size_t>(index) < kCachedPowers.size());
  418. const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
  419. assert(kAlpha <= cached.e + e + 64);
  420. assert(kGamma >= cached.e + e + 64);
  421. return cached;
  422. }
  423. /*!
  424. For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
  425. For n == 0, returns 1 and sets pow10 := 1.
  426. */
  427. inline int find_largest_pow10(const std::uint32_t n, std::uint32_t& pow10)
  428. {
  429. // LCOV_EXCL_START
  430. if (n >= 1000000000)
  431. {
  432. pow10 = 1000000000;
  433. return 10;
  434. }
  435. // LCOV_EXCL_STOP
  436. else if (n >= 100000000)
  437. {
  438. pow10 = 100000000;
  439. return 9;
  440. }
  441. else if (n >= 10000000)
  442. {
  443. pow10 = 10000000;
  444. return 8;
  445. }
  446. else if (n >= 1000000)
  447. {
  448. pow10 = 1000000;
  449. return 7;
  450. }
  451. else if (n >= 100000)
  452. {
  453. pow10 = 100000;
  454. return 6;
  455. }
  456. else if (n >= 10000)
  457. {
  458. pow10 = 10000;
  459. return 5;
  460. }
  461. else if (n >= 1000)
  462. {
  463. pow10 = 1000;
  464. return 4;
  465. }
  466. else if (n >= 100)
  467. {
  468. pow10 = 100;
  469. return 3;
  470. }
  471. else if (n >= 10)
  472. {
  473. pow10 = 10;
  474. return 2;
  475. }
  476. else
  477. {
  478. pow10 = 1;
  479. return 1;
  480. }
  481. }
  482. inline void grisu2_round(char* buf, int len, std::uint64_t dist, std::uint64_t delta,
  483. std::uint64_t rest, std::uint64_t ten_k)
  484. {
  485. assert(len >= 1);
  486. assert(dist <= delta);
  487. assert(rest <= delta);
  488. assert(ten_k > 0);
  489. // <--------------------------- delta ---->
  490. // <---- dist --------->
  491. // --------------[------------------+-------------------]--------------
  492. // M- w M+
  493. //
  494. // ten_k
  495. // <------>
  496. // <---- rest ---->
  497. // --------------[------------------+----+--------------]--------------
  498. // w V
  499. // = buf * 10^k
  500. //
  501. // ten_k represents a unit-in-the-last-place in the decimal representation
  502. // stored in buf.
  503. // Decrement buf by ten_k while this takes buf closer to w.
  504. // The tests are written in this order to avoid overflow in unsigned
  505. // integer arithmetic.
  506. while (rest < dist
  507. and delta - rest >= ten_k
  508. and (rest + ten_k < dist or dist - rest > rest + ten_k - dist))
  509. {
  510. assert(buf[len - 1] != '0');
  511. buf[len - 1]--;
  512. rest += ten_k;
  513. }
  514. }
  515. /*!
  516. Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
  517. M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
  518. */
  519. inline void grisu2_digit_gen(char* buffer, int& length, int& decimal_exponent,
  520. diyfp M_minus, diyfp w, diyfp M_plus)
  521. {
  522. static_assert(kAlpha >= -60, "internal error");
  523. static_assert(kGamma <= -32, "internal error");
  524. // Generates the digits (and the exponent) of a decimal floating-point
  525. // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
  526. // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= gamma.
  527. //
  528. // <--------------------------- delta ---->
  529. // <---- dist --------->
  530. // --------------[------------------+-------------------]--------------
  531. // M- w M+
  532. //
  533. // Grisu2 generates the digits of M+ from left to right and stops as soon as
  534. // V is in [M-,M+].
  535. assert(M_plus.e >= kAlpha);
  536. assert(M_plus.e <= kGamma);
  537. std::uint64_t delta = diyfp::sub(M_plus, M_minus).f; // (significand of (M+ - M-), implicit exponent is e)
  538. std::uint64_t dist = diyfp::sub(M_plus, w ).f; // (significand of (M+ - w ), implicit exponent is e)
  539. // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
  540. //
  541. // M+ = f * 2^e
  542. // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
  543. // = ((p1 ) * 2^-e + (p2 )) * 2^e
  544. // = p1 + p2 * 2^e
  545. const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
  546. auto p1 = static_cast<std::uint32_t>(M_plus.f >> -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
  547. std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e
  548. // 1)
  549. //
  550. // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
  551. assert(p1 > 0);
  552. std::uint32_t pow10;
  553. const int k = find_largest_pow10(p1, pow10);
  554. // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
  555. //
  556. // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
  557. // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1))
  558. //
  559. // M+ = p1 + p2 * 2^e
  560. // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e
  561. // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
  562. // = d[k-1] * 10^(k-1) + ( rest) * 2^e
  563. //
  564. // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
  565. //
  566. // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
  567. //
  568. // but stop as soon as
  569. //
  570. // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
  571. int n = k;
  572. while (n > 0)
  573. {
  574. // Invariants:
  575. // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k)
  576. // pow10 = 10^(n-1) <= p1 < 10^n
  577. //
  578. const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1)
  579. const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1)
  580. //
  581. // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
  582. // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
  583. //
  584. assert(d <= 9);
  585. buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
  586. //
  587. // M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
  588. //
  589. p1 = r;
  590. n--;
  591. //
  592. // M+ = buffer * 10^n + (p1 + p2 * 2^e)
  593. // pow10 = 10^n
  594. //
  595. // Now check if enough digits have been generated.
  596. // Compute
  597. //
  598. // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
  599. //
  600. // Note:
  601. // Since rest and delta share the same exponent e, it suffices to
  602. // compare the significands.
  603. const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
  604. if (rest <= delta)
  605. {
  606. // V = buffer * 10^n, with M- <= V <= M+.
  607. decimal_exponent += n;
  608. // We may now just stop. But instead look if the buffer could be
  609. // decremented to bring V closer to w.
  610. //
  611. // pow10 = 10^n is now 1 ulp in the decimal representation V.
  612. // The rounding procedure works with diyfp's with an implicit
  613. // exponent of e.
  614. //
  615. // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
  616. //
  617. const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
  618. grisu2_round(buffer, length, dist, delta, rest, ten_n);
  619. return;
  620. }
  621. pow10 /= 10;
  622. //
  623. // pow10 = 10^(n-1) <= p1 < 10^n
  624. // Invariants restored.
  625. }
  626. // 2)
  627. //
  628. // The digits of the integral part have been generated:
  629. //
  630. // M+ = d[k-1]...d[1]d[0] + p2 * 2^e
  631. // = buffer + p2 * 2^e
  632. //
  633. // Now generate the digits of the fractional part p2 * 2^e.
  634. //
  635. // Note:
  636. // No decimal point is generated: the exponent is adjusted instead.
  637. //
  638. // p2 actually represents the fraction
  639. //
  640. // p2 * 2^e
  641. // = p2 / 2^-e
  642. // = d[-1] / 10^1 + d[-2] / 10^2 + ...
  643. //
  644. // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
  645. //
  646. // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
  647. // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
  648. //
  649. // using
  650. //
  651. // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
  652. // = ( d) * 2^-e + ( r)
  653. //
  654. // or
  655. // 10^m * p2 * 2^e = d + r * 2^e
  656. //
  657. // i.e.
  658. //
  659. // M+ = buffer + p2 * 2^e
  660. // = buffer + 10^-m * (d + r * 2^e)
  661. // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
  662. //
  663. // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
  664. assert(p2 > delta);
  665. int m = 0;
  666. for (;;)
  667. {
  668. // Invariant:
  669. // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) * 2^e
  670. // = buffer * 10^-m + 10^-m * (p2 ) * 2^e
  671. // = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e
  672. // = buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + (10*p2 mod 2^-e)) * 2^e
  673. //
  674. assert(p2 <= (std::numeric_limits<std::uint64_t>::max)() / 10);
  675. p2 *= 10;
  676. const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e
  677. const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
  678. //
  679. // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
  680. // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
  681. // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
  682. //
  683. assert(d <= 9);
  684. buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
  685. //
  686. // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
  687. //
  688. p2 = r;
  689. m++;
  690. //
  691. // M+ = buffer * 10^-m + 10^-m * p2 * 2^e
  692. // Invariant restored.
  693. // Check if enough digits have been generated.
  694. //
  695. // 10^-m * p2 * 2^e <= delta * 2^e
  696. // p2 * 2^e <= 10^m * delta * 2^e
  697. // p2 <= 10^m * delta
  698. delta *= 10;
  699. dist *= 10;
  700. if (p2 <= delta)
  701. {
  702. break;
  703. }
  704. }
  705. // V = buffer * 10^-m, with M- <= V <= M+.
  706. decimal_exponent -= m;
  707. // 1 ulp in the decimal representation is now 10^-m.
  708. // Since delta and dist are now scaled by 10^m, we need to do the
  709. // same with ulp in order to keep the units in sync.
  710. //
  711. // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
  712. //
  713. const std::uint64_t ten_m = one.f;
  714. grisu2_round(buffer, length, dist, delta, p2, ten_m);
  715. // By construction this algorithm generates the shortest possible decimal
  716. // number (Loitsch, Theorem 6.2) which rounds back to w.
  717. // For an input number of precision p, at least
  718. //
  719. // N = 1 + ceil(p * log_10(2))
  720. //
  721. // decimal digits are sufficient to identify all binary floating-point
  722. // numbers (Matula, "In-and-Out conversions").
  723. // This implies that the algorithm does not produce more than N decimal
  724. // digits.
  725. //
  726. // N = 17 for p = 53 (IEEE double precision)
  727. // N = 9 for p = 24 (IEEE single precision)
  728. }
  729. /*!
  730. v = buf * 10^decimal_exponent
  731. len is the length of the buffer (number of decimal digits)
  732. The buffer must be large enough, i.e. >= max_digits10.
  733. */
  734. JSON_HEDLEY_NON_NULL(1)
  735. inline void grisu2(char* buf, int& len, int& decimal_exponent,
  736. diyfp m_minus, diyfp v, diyfp m_plus)
  737. {
  738. assert(m_plus.e == m_minus.e);
  739. assert(m_plus.e == v.e);
  740. // --------(-----------------------+-----------------------)-------- (A)
  741. // m- v m+
  742. //
  743. // --------------------(-----------+-----------------------)-------- (B)
  744. // m- v m+
  745. //
  746. // First scale v (and m- and m+) such that the exponent is in the range
  747. // [alpha, gamma].
  748. const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
  749. const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
  750. // The exponent of the products is = v.e + c_minus_k.e + q and is in the range [alpha,gamma]
  751. const diyfp w = diyfp::mul(v, c_minus_k);
  752. const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
  753. const diyfp w_plus = diyfp::mul(m_plus, c_minus_k);
  754. // ----(---+---)---------------(---+---)---------------(---+---)----
  755. // w- w w+
  756. // = c*m- = c*v = c*m+
  757. //
  758. // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
  759. // w+ are now off by a small amount.
  760. // In fact:
  761. //
  762. // w - v * 10^k < 1 ulp
  763. //
  764. // To account for this inaccuracy, add resp. subtract 1 ulp.
  765. //
  766. // --------+---[---------------(---+---)---------------]---+--------
  767. // w- M- w M+ w+
  768. //
  769. // Now any number in [M-, M+] (bounds included) will round to w when input,
  770. // regardless of how the input rounding algorithm breaks ties.
  771. //
  772. // And digit_gen generates the shortest possible such number in [M-, M+].
  773. // Note that this does not mean that Grisu2 always generates the shortest
  774. // possible number in the interval (m-, m+).
  775. const diyfp M_minus(w_minus.f + 1, w_minus.e);
  776. const diyfp M_plus (w_plus.f - 1, w_plus.e );
  777. decimal_exponent = -cached.k; // = -(-k) = k
  778. grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
  779. }
  780. /*!
  781. v = buf * 10^decimal_exponent
  782. len is the length of the buffer (number of decimal digits)
  783. The buffer must be large enough, i.e. >= max_digits10.
  784. */
  785. template <typename FloatType>
  786. JSON_HEDLEY_NON_NULL(1)
  787. void grisu2(char* buf, int& len, int& decimal_exponent, FloatType value)
  788. {
  789. static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
  790. "internal error: not enough precision");
  791. assert(std::isfinite(value));
  792. assert(value > 0);
  793. // If the neighbors (and boundaries) of 'value' are always computed for double-precision
  794. // numbers, all float's can be recovered using strtod (and strtof). However, the resulting
  795. // decimal representations are not exactly "short".
  796. //
  797. // The documentation for 'std::to_chars' (https://en.cppreference.com/w/cpp/utility/to_chars)
  798. // says "value is converted to a string as if by std::sprintf in the default ("C") locale"
  799. // and since sprintf promotes float's to double's, I think this is exactly what 'std::to_chars'
  800. // does.
  801. // On the other hand, the documentation for 'std::to_chars' requires that "parsing the
  802. // representation using the corresponding std::from_chars function recovers value exactly". That
  803. // indicates that single precision floating-point numbers should be recovered using
  804. // 'std::strtof'.
  805. //
  806. // NB: If the neighbors are computed for single-precision numbers, there is a single float
  807. // (7.0385307e-26f) which can't be recovered using strtod. The resulting double precision
  808. // value is off by 1 ulp.
  809. #if 0
  810. const boundaries w = compute_boundaries(static_cast<double>(value));
  811. #else
  812. const boundaries w = compute_boundaries(value);
  813. #endif
  814. grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
  815. }
  816. /*!
  817. @brief appends a decimal representation of e to buf
  818. @return a pointer to the element following the exponent.
  819. @pre -1000 < e < 1000
  820. */
  821. JSON_HEDLEY_NON_NULL(1)
  822. JSON_HEDLEY_RETURNS_NON_NULL
  823. inline char* append_exponent(char* buf, int e)
  824. {
  825. assert(e > -1000);
  826. assert(e < 1000);
  827. if (e < 0)
  828. {
  829. e = -e;
  830. *buf++ = '-';
  831. }
  832. else
  833. {
  834. *buf++ = '+';
  835. }
  836. auto k = static_cast<std::uint32_t>(e);
  837. if (k < 10)
  838. {
  839. // Always print at least two digits in the exponent.
  840. // This is for compatibility with printf("%g").
  841. *buf++ = '0';
  842. *buf++ = static_cast<char>('0' + k);
  843. }
  844. else if (k < 100)
  845. {
  846. *buf++ = static_cast<char>('0' + k / 10);
  847. k %= 10;
  848. *buf++ = static_cast<char>('0' + k);
  849. }
  850. else
  851. {
  852. *buf++ = static_cast<char>('0' + k / 100);
  853. k %= 100;
  854. *buf++ = static_cast<char>('0' + k / 10);
  855. k %= 10;
  856. *buf++ = static_cast<char>('0' + k);
  857. }
  858. return buf;
  859. }
  860. /*!
  861. @brief prettify v = buf * 10^decimal_exponent
  862. If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
  863. notation. Otherwise it will be printed in exponential notation.
  864. @pre min_exp < 0
  865. @pre max_exp > 0
  866. */
  867. JSON_HEDLEY_NON_NULL(1)
  868. JSON_HEDLEY_RETURNS_NON_NULL
  869. inline char* format_buffer(char* buf, int len, int decimal_exponent,
  870. int min_exp, int max_exp)
  871. {
  872. assert(min_exp < 0);
  873. assert(max_exp > 0);
  874. const int k = len;
  875. const int n = len + decimal_exponent;
  876. // v = buf * 10^(n-k)
  877. // k is the length of the buffer (number of decimal digits)
  878. // n is the position of the decimal point relative to the start of the buffer.
  879. if (k <= n and n <= max_exp)
  880. {
  881. // digits[000]
  882. // len <= max_exp + 2
  883. std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
  884. // Make it look like a floating-point number (#362, #378)
  885. buf[n + 0] = '.';
  886. buf[n + 1] = '0';
  887. return buf + (static_cast<size_t>(n) + 2);
  888. }
  889. if (0 < n and n <= max_exp)
  890. {
  891. // dig.its
  892. // len <= max_digits10 + 1
  893. assert(k > n);
  894. std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n, static_cast<size_t>(k) - static_cast<size_t>(n));
  895. buf[n] = '.';
  896. return buf + (static_cast<size_t>(k) + 1U);
  897. }
  898. if (min_exp < n and n <= 0)
  899. {
  900. // 0.[000]digits
  901. // len <= 2 + (-min_exp - 1) + max_digits10
  902. std::memmove(buf + (2 + static_cast<size_t>(-n)), buf, static_cast<size_t>(k));
  903. buf[0] = '0';
  904. buf[1] = '.';
  905. std::memset(buf + 2, '0', static_cast<size_t>(-n));
  906. return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
  907. }
  908. if (k == 1)
  909. {
  910. // dE+123
  911. // len <= 1 + 5
  912. buf += 1;
  913. }
  914. else
  915. {
  916. // d.igitsE+123
  917. // len <= max_digits10 + 1 + 5
  918. std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
  919. buf[1] = '.';
  920. buf += 1 + static_cast<size_t>(k);
  921. }
  922. *buf++ = 'e';
  923. return append_exponent(buf, n - 1);
  924. }
  925. } // namespace dtoa_impl
  926. /*!
  927. @brief generates a decimal representation of the floating-point number value in [first, last).
  928. The format of the resulting decimal representation is similar to printf's %g
  929. format. Returns an iterator pointing past-the-end of the decimal representation.
  930. @note The input number must be finite, i.e. NaN's and Inf's are not supported.
  931. @note The buffer must be large enough.
  932. @note The result is NOT null-terminated.
  933. */
  934. template <typename FloatType>
  935. JSON_HEDLEY_NON_NULL(1, 2)
  936. JSON_HEDLEY_RETURNS_NON_NULL
  937. char* to_chars(char* first, const char* last, FloatType value)
  938. {
  939. static_cast<void>(last); // maybe unused - fix warning
  940. assert(std::isfinite(value));
  941. // Use signbit(value) instead of (value < 0) since signbit works for -0.
  942. if (std::signbit(value))
  943. {
  944. value = -value;
  945. *first++ = '-';
  946. }
  947. if (value == 0) // +-0
  948. {
  949. *first++ = '0';
  950. // Make it look like a floating-point number (#362, #378)
  951. *first++ = '.';
  952. *first++ = '0';
  953. return first;
  954. }
  955. assert(last - first >= std::numeric_limits<FloatType>::max_digits10);
  956. // Compute v = buffer * 10^decimal_exponent.
  957. // The decimal digits are stored in the buffer, which needs to be interpreted
  958. // as an unsigned decimal integer.
  959. // len is the length of the buffer, i.e. the number of decimal digits.
  960. int len = 0;
  961. int decimal_exponent = 0;
  962. dtoa_impl::grisu2(first, len, decimal_exponent, value);
  963. assert(len <= std::numeric_limits<FloatType>::max_digits10);
  964. // Format the buffer like printf("%.*g", prec, value)
  965. constexpr int kMinExp = -4;
  966. // Use digits10 here to increase compatibility with version 2.
  967. constexpr int kMaxExp = std::numeric_limits<FloatType>::digits10;
  968. assert(last - first >= kMaxExp + 2);
  969. assert(last - first >= 2 + (-kMinExp - 1) + std::numeric_limits<FloatType>::max_digits10);
  970. assert(last - first >= std::numeric_limits<FloatType>::max_digits10 + 6);
  971. return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, kMaxExp);
  972. }
  973. } // namespace detail
  974. } // namespace nlohmann