serializer.hpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. #pragma once
  2. #include <algorithm> // reverse, remove, fill, find, none_of
  3. #include <array> // array
  4. #include <cassert> // assert
  5. #include <ciso646> // and, or
  6. #include <clocale> // localeconv, lconv
  7. #include <cmath> // labs, isfinite, isnan, signbit
  8. #include <cstddef> // size_t, ptrdiff_t
  9. #include <cstdint> // uint8_t
  10. #include <cstdio> // snprintf
  11. #include <limits> // numeric_limits
  12. #include <string> // string
  13. #include <type_traits> // is_same
  14. #include <utility> // move
  15. #include <nlohmann/detail/conversions/to_chars.hpp>
  16. #include <nlohmann/detail/exceptions.hpp>
  17. #include <nlohmann/detail/macro_scope.hpp>
  18. #include <nlohmann/detail/meta/cpp_future.hpp>
  19. #include <nlohmann/detail/output/binary_writer.hpp>
  20. #include <nlohmann/detail/output/output_adapters.hpp>
  21. #include <nlohmann/detail/value_t.hpp>
  22. namespace nlohmann
  23. {
  24. namespace detail
  25. {
  26. ///////////////////
  27. // serialization //
  28. ///////////////////
  29. /// how to treat decoding errors
  30. enum class error_handler_t
  31. {
  32. strict, ///< throw a type_error exception in case of invalid UTF-8
  33. replace, ///< replace invalid UTF-8 sequences with U+FFFD
  34. ignore ///< ignore invalid UTF-8 sequences
  35. };
  36. template<typename BasicJsonType>
  37. class serializer
  38. {
  39. using string_t = typename BasicJsonType::string_t;
  40. using number_float_t = typename BasicJsonType::number_float_t;
  41. using number_integer_t = typename BasicJsonType::number_integer_t;
  42. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  43. using binary_t = typename BasicJsonType::binary_t;
  44. static constexpr std::uint8_t UTF8_ACCEPT = 0;
  45. static constexpr std::uint8_t UTF8_REJECT = 1;
  46. public:
  47. /*!
  48. @param[in] s output stream to serialize to
  49. @param[in] ichar indentation character to use
  50. @param[in] error_handler_ how to react on decoding errors
  51. */
  52. serializer(output_adapter_t<char> s, const char ichar,
  53. error_handler_t error_handler_ = error_handler_t::strict)
  54. : o(std::move(s))
  55. , loc(std::localeconv())
  56. , thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep))
  57. , decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point))
  58. , indent_char(ichar)
  59. , indent_string(512, indent_char)
  60. , error_handler(error_handler_)
  61. {}
  62. // delete because of pointer members
  63. serializer(const serializer&) = delete;
  64. serializer& operator=(const serializer&) = delete;
  65. serializer(serializer&&) = delete;
  66. serializer& operator=(serializer&&) = delete;
  67. ~serializer() = default;
  68. /*!
  69. @brief internal implementation of the serialization function
  70. This function is called by the public member function dump and organizes
  71. the serialization internally. The indentation level is propagated as
  72. additional parameter. In case of arrays and objects, the function is
  73. called recursively.
  74. - strings and object keys are escaped using `escape_string()`
  75. - integer numbers are converted implicitly via `operator<<`
  76. - floating-point numbers are converted to a string using `"%g"` format
  77. - if specified to, binary values are output using the syntax `b[]`, otherwise an exception is thrown
  78. @param[in] val value to serialize
  79. @param[in] pretty_print whether the output shall be pretty-printed
  80. @param[in] indent_step the indent level
  81. @param[in] current_indent the current indent level (only used internally)
  82. @param[in] serialize_binary whether the output shall include non-standard binary output
  83. */
  84. void dump(const BasicJsonType& val, const bool pretty_print,
  85. const bool ensure_ascii,
  86. const unsigned int indent_step,
  87. const unsigned int current_indent = 0,
  88. const bool serialize_binary = false)
  89. {
  90. switch (val.m_type)
  91. {
  92. case value_t::object:
  93. {
  94. if (val.m_value.object->empty())
  95. {
  96. o->write_characters("{}", 2);
  97. return;
  98. }
  99. if (pretty_print)
  100. {
  101. o->write_characters("{\n", 2);
  102. // variable to hold indentation for recursive calls
  103. const auto new_indent = current_indent + indent_step;
  104. if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
  105. {
  106. indent_string.resize(indent_string.size() * 2, ' ');
  107. }
  108. // first n-1 elements
  109. auto i = val.m_value.object->cbegin();
  110. for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
  111. {
  112. o->write_characters(indent_string.c_str(), new_indent);
  113. o->write_character('\"');
  114. dump_escaped(i->first, ensure_ascii);
  115. o->write_characters("\": ", 3);
  116. dump(i->second, true, ensure_ascii, indent_step, new_indent, serialize_binary);
  117. o->write_characters(",\n", 2);
  118. }
  119. // last element
  120. assert(i != val.m_value.object->cend());
  121. assert(std::next(i) == val.m_value.object->cend());
  122. o->write_characters(indent_string.c_str(), new_indent);
  123. o->write_character('\"');
  124. dump_escaped(i->first, ensure_ascii);
  125. o->write_characters("\": ", 3);
  126. dump(i->second, true, ensure_ascii, indent_step, new_indent, serialize_binary);
  127. o->write_character('\n');
  128. o->write_characters(indent_string.c_str(), current_indent);
  129. o->write_character('}');
  130. }
  131. else
  132. {
  133. o->write_character('{');
  134. // first n-1 elements
  135. auto i = val.m_value.object->cbegin();
  136. for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
  137. {
  138. o->write_character('\"');
  139. dump_escaped(i->first, ensure_ascii);
  140. o->write_characters("\":", 2);
  141. dump(i->second, false, ensure_ascii, indent_step, current_indent, serialize_binary);
  142. o->write_character(',');
  143. }
  144. // last element
  145. assert(i != val.m_value.object->cend());
  146. assert(std::next(i) == val.m_value.object->cend());
  147. o->write_character('\"');
  148. dump_escaped(i->first, ensure_ascii);
  149. o->write_characters("\":", 2);
  150. dump(i->second, false, ensure_ascii, indent_step, current_indent, serialize_binary);
  151. o->write_character('}');
  152. }
  153. return;
  154. }
  155. case value_t::array:
  156. {
  157. if (val.m_value.array->empty())
  158. {
  159. o->write_characters("[]", 2);
  160. return;
  161. }
  162. if (pretty_print)
  163. {
  164. o->write_characters("[\n", 2);
  165. // variable to hold indentation for recursive calls
  166. const auto new_indent = current_indent + indent_step;
  167. if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
  168. {
  169. indent_string.resize(indent_string.size() * 2, ' ');
  170. }
  171. // first n-1 elements
  172. for (auto i = val.m_value.array->cbegin();
  173. i != val.m_value.array->cend() - 1; ++i)
  174. {
  175. o->write_characters(indent_string.c_str(), new_indent);
  176. dump(*i, true, ensure_ascii, indent_step, new_indent, serialize_binary);
  177. o->write_characters(",\n", 2);
  178. }
  179. // last element
  180. assert(not val.m_value.array->empty());
  181. o->write_characters(indent_string.c_str(), new_indent);
  182. dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent, serialize_binary);
  183. o->write_character('\n');
  184. o->write_characters(indent_string.c_str(), current_indent);
  185. o->write_character(']');
  186. }
  187. else
  188. {
  189. o->write_character('[');
  190. // first n-1 elements
  191. for (auto i = val.m_value.array->cbegin();
  192. i != val.m_value.array->cend() - 1; ++i)
  193. {
  194. dump(*i, false, ensure_ascii, indent_step, current_indent, serialize_binary);
  195. o->write_character(',');
  196. }
  197. // last element
  198. assert(not val.m_value.array->empty());
  199. dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent, serialize_binary);
  200. o->write_character(']');
  201. }
  202. return;
  203. }
  204. case value_t::string:
  205. {
  206. o->write_character('\"');
  207. dump_escaped(*val.m_value.string, ensure_ascii);
  208. o->write_character('\"');
  209. return;
  210. }
  211. case value_t::binary:
  212. {
  213. if (not serialize_binary)
  214. {
  215. JSON_THROW(type_error::create(317, "cannot serialize binary data to text JSON"));
  216. }
  217. if (val.m_value.binary->empty())
  218. {
  219. o->write_characters("b[]", 3);
  220. }
  221. else if (pretty_print)
  222. {
  223. o->write_characters("b[", 2);
  224. for (auto i = val.m_value.binary->cbegin();
  225. i != val.m_value.binary->cend() - 1; ++i)
  226. {
  227. dump_integer(*i);
  228. o->write_character(',');
  229. if (std::distance(val.m_value.binary->cbegin(), i) % 16 == 0)
  230. {
  231. o->write_character('\n');
  232. }
  233. else
  234. {
  235. o->write_character(' ');
  236. }
  237. }
  238. dump_integer(val.m_value.binary->back());
  239. o->write_character(']');
  240. }
  241. else
  242. {
  243. o->write_characters("b[", 2);
  244. for (auto i = val.m_value.binary->cbegin();
  245. i != val.m_value.binary->cend() - 1; ++i)
  246. {
  247. dump_integer(*i);
  248. o->write_character(',');
  249. }
  250. dump_integer(val.m_value.binary->back());
  251. o->write_character(']');
  252. }
  253. return;
  254. }
  255. case value_t::boolean:
  256. {
  257. if (val.m_value.boolean)
  258. {
  259. o->write_characters("true", 4);
  260. }
  261. else
  262. {
  263. o->write_characters("false", 5);
  264. }
  265. return;
  266. }
  267. case value_t::number_integer:
  268. {
  269. dump_integer(val.m_value.number_integer);
  270. return;
  271. }
  272. case value_t::number_unsigned:
  273. {
  274. dump_integer(val.m_value.number_unsigned);
  275. return;
  276. }
  277. case value_t::number_float:
  278. {
  279. dump_float(val.m_value.number_float);
  280. return;
  281. }
  282. case value_t::discarded:
  283. {
  284. o->write_characters("<discarded>", 11);
  285. return;
  286. }
  287. case value_t::null:
  288. {
  289. o->write_characters("null", 4);
  290. return;
  291. }
  292. default: // LCOV_EXCL_LINE
  293. assert(false); // LCOV_EXCL_LINE
  294. }
  295. }
  296. private:
  297. /*!
  298. @brief dump escaped string
  299. Escape a string by replacing certain special characters by a sequence of an
  300. escape character (backslash) and another character and other control
  301. characters by a sequence of "\u" followed by a four-digit hex
  302. representation. The escaped string is written to output stream @a o.
  303. @param[in] s the string to escape
  304. @param[in] ensure_ascii whether to escape non-ASCII characters with
  305. \uXXXX sequences
  306. @complexity Linear in the length of string @a s.
  307. */
  308. void dump_escaped(const string_t& s, const bool ensure_ascii)
  309. {
  310. std::uint32_t codepoint;
  311. std::uint8_t state = UTF8_ACCEPT;
  312. std::size_t bytes = 0; // number of bytes written to string_buffer
  313. // number of bytes written at the point of the last valid byte
  314. std::size_t bytes_after_last_accept = 0;
  315. std::size_t undumped_chars = 0;
  316. for (std::size_t i = 0; i < s.size(); ++i)
  317. {
  318. const auto byte = static_cast<uint8_t>(s[i]);
  319. switch (decode(state, codepoint, byte))
  320. {
  321. case UTF8_ACCEPT: // decode found a new code point
  322. {
  323. switch (codepoint)
  324. {
  325. case 0x08: // backspace
  326. {
  327. string_buffer[bytes++] = '\\';
  328. string_buffer[bytes++] = 'b';
  329. break;
  330. }
  331. case 0x09: // horizontal tab
  332. {
  333. string_buffer[bytes++] = '\\';
  334. string_buffer[bytes++] = 't';
  335. break;
  336. }
  337. case 0x0A: // newline
  338. {
  339. string_buffer[bytes++] = '\\';
  340. string_buffer[bytes++] = 'n';
  341. break;
  342. }
  343. case 0x0C: // formfeed
  344. {
  345. string_buffer[bytes++] = '\\';
  346. string_buffer[bytes++] = 'f';
  347. break;
  348. }
  349. case 0x0D: // carriage return
  350. {
  351. string_buffer[bytes++] = '\\';
  352. string_buffer[bytes++] = 'r';
  353. break;
  354. }
  355. case 0x22: // quotation mark
  356. {
  357. string_buffer[bytes++] = '\\';
  358. string_buffer[bytes++] = '\"';
  359. break;
  360. }
  361. case 0x5C: // reverse solidus
  362. {
  363. string_buffer[bytes++] = '\\';
  364. string_buffer[bytes++] = '\\';
  365. break;
  366. }
  367. default:
  368. {
  369. // escape control characters (0x00..0x1F) or, if
  370. // ensure_ascii parameter is used, non-ASCII characters
  371. if ((codepoint <= 0x1F) or (ensure_ascii and (codepoint >= 0x7F)))
  372. {
  373. if (codepoint <= 0xFFFF)
  374. {
  375. (std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x",
  376. static_cast<std::uint16_t>(codepoint));
  377. bytes += 6;
  378. }
  379. else
  380. {
  381. (std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
  382. static_cast<std::uint16_t>(0xD7C0u + (codepoint >> 10u)),
  383. static_cast<std::uint16_t>(0xDC00u + (codepoint & 0x3FFu)));
  384. bytes += 12;
  385. }
  386. }
  387. else
  388. {
  389. // copy byte to buffer (all previous bytes
  390. // been copied have in default case above)
  391. string_buffer[bytes++] = s[i];
  392. }
  393. break;
  394. }
  395. }
  396. // write buffer and reset index; there must be 13 bytes
  397. // left, as this is the maximal number of bytes to be
  398. // written ("\uxxxx\uxxxx\0") for one code point
  399. if (string_buffer.size() - bytes < 13)
  400. {
  401. o->write_characters(string_buffer.data(), bytes);
  402. bytes = 0;
  403. }
  404. // remember the byte position of this accept
  405. bytes_after_last_accept = bytes;
  406. undumped_chars = 0;
  407. break;
  408. }
  409. case UTF8_REJECT: // decode found invalid UTF-8 byte
  410. {
  411. switch (error_handler)
  412. {
  413. case error_handler_t::strict:
  414. {
  415. std::string sn(3, '\0');
  416. (std::snprintf)(&sn[0], sn.size(), "%.2X", byte);
  417. JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
  418. }
  419. case error_handler_t::ignore:
  420. case error_handler_t::replace:
  421. {
  422. // in case we saw this character the first time, we
  423. // would like to read it again, because the byte
  424. // may be OK for itself, but just not OK for the
  425. // previous sequence
  426. if (undumped_chars > 0)
  427. {
  428. --i;
  429. }
  430. // reset length buffer to the last accepted index;
  431. // thus removing/ignoring the invalid characters
  432. bytes = bytes_after_last_accept;
  433. if (error_handler == error_handler_t::replace)
  434. {
  435. // add a replacement character
  436. if (ensure_ascii)
  437. {
  438. string_buffer[bytes++] = '\\';
  439. string_buffer[bytes++] = 'u';
  440. string_buffer[bytes++] = 'f';
  441. string_buffer[bytes++] = 'f';
  442. string_buffer[bytes++] = 'f';
  443. string_buffer[bytes++] = 'd';
  444. }
  445. else
  446. {
  447. string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF');
  448. string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF');
  449. string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD');
  450. }
  451. // write buffer and reset index; there must be 13 bytes
  452. // left, as this is the maximal number of bytes to be
  453. // written ("\uxxxx\uxxxx\0") for one code point
  454. if (string_buffer.size() - bytes < 13)
  455. {
  456. o->write_characters(string_buffer.data(), bytes);
  457. bytes = 0;
  458. }
  459. bytes_after_last_accept = bytes;
  460. }
  461. undumped_chars = 0;
  462. // continue processing the string
  463. state = UTF8_ACCEPT;
  464. break;
  465. }
  466. default: // LCOV_EXCL_LINE
  467. assert(false); // LCOV_EXCL_LINE
  468. }
  469. break;
  470. }
  471. default: // decode found yet incomplete multi-byte code point
  472. {
  473. if (not ensure_ascii)
  474. {
  475. // code point will not be escaped - copy byte to buffer
  476. string_buffer[bytes++] = s[i];
  477. }
  478. ++undumped_chars;
  479. break;
  480. }
  481. }
  482. }
  483. // we finished processing the string
  484. if (JSON_HEDLEY_LIKELY(state == UTF8_ACCEPT))
  485. {
  486. // write buffer
  487. if (bytes > 0)
  488. {
  489. o->write_characters(string_buffer.data(), bytes);
  490. }
  491. }
  492. else
  493. {
  494. // we finish reading, but do not accept: string was incomplete
  495. switch (error_handler)
  496. {
  497. case error_handler_t::strict:
  498. {
  499. std::string sn(3, '\0');
  500. (std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast<std::uint8_t>(s.back()));
  501. JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
  502. }
  503. case error_handler_t::ignore:
  504. {
  505. // write all accepted bytes
  506. o->write_characters(string_buffer.data(), bytes_after_last_accept);
  507. break;
  508. }
  509. case error_handler_t::replace:
  510. {
  511. // write all accepted bytes
  512. o->write_characters(string_buffer.data(), bytes_after_last_accept);
  513. // add a replacement character
  514. if (ensure_ascii)
  515. {
  516. o->write_characters("\\ufffd", 6);
  517. }
  518. else
  519. {
  520. o->write_characters("\xEF\xBF\xBD", 3);
  521. }
  522. break;
  523. }
  524. default: // LCOV_EXCL_LINE
  525. assert(false); // LCOV_EXCL_LINE
  526. }
  527. }
  528. }
  529. /*!
  530. @brief count digits
  531. Count the number of decimal (base 10) digits for an input unsigned integer.
  532. @param[in] x unsigned integer number to count its digits
  533. @return number of decimal digits
  534. */
  535. inline unsigned int count_digits(number_unsigned_t x) noexcept
  536. {
  537. unsigned int n_digits = 1;
  538. for (;;)
  539. {
  540. if (x < 10)
  541. {
  542. return n_digits;
  543. }
  544. if (x < 100)
  545. {
  546. return n_digits + 1;
  547. }
  548. if (x < 1000)
  549. {
  550. return n_digits + 2;
  551. }
  552. if (x < 10000)
  553. {
  554. return n_digits + 3;
  555. }
  556. x = x / 10000u;
  557. n_digits += 4;
  558. }
  559. }
  560. /*!
  561. @brief dump an integer
  562. Dump a given integer to output stream @a o. Works internally with
  563. @a number_buffer.
  564. @param[in] x integer number (signed or unsigned) to dump
  565. @tparam NumberType either @a number_integer_t or @a number_unsigned_t
  566. */
  567. template<typename NumberType, detail::enable_if_t<
  568. std::is_same<NumberType, number_unsigned_t>::value or
  569. std::is_same<NumberType, number_integer_t>::value or
  570. std::is_same<NumberType, typename binary_t::value_type>::value,
  571. int> = 0>
  572. void dump_integer(NumberType x)
  573. {
  574. static constexpr std::array<std::array<char, 2>, 100> digits_to_99
  575. {
  576. {
  577. {{'0', '0'}}, {{'0', '1'}}, {{'0', '2'}}, {{'0', '3'}}, {{'0', '4'}}, {{'0', '5'}}, {{'0', '6'}}, {{'0', '7'}}, {{'0', '8'}}, {{'0', '9'}},
  578. {{'1', '0'}}, {{'1', '1'}}, {{'1', '2'}}, {{'1', '3'}}, {{'1', '4'}}, {{'1', '5'}}, {{'1', '6'}}, {{'1', '7'}}, {{'1', '8'}}, {{'1', '9'}},
  579. {{'2', '0'}}, {{'2', '1'}}, {{'2', '2'}}, {{'2', '3'}}, {{'2', '4'}}, {{'2', '5'}}, {{'2', '6'}}, {{'2', '7'}}, {{'2', '8'}}, {{'2', '9'}},
  580. {{'3', '0'}}, {{'3', '1'}}, {{'3', '2'}}, {{'3', '3'}}, {{'3', '4'}}, {{'3', '5'}}, {{'3', '6'}}, {{'3', '7'}}, {{'3', '8'}}, {{'3', '9'}},
  581. {{'4', '0'}}, {{'4', '1'}}, {{'4', '2'}}, {{'4', '3'}}, {{'4', '4'}}, {{'4', '5'}}, {{'4', '6'}}, {{'4', '7'}}, {{'4', '8'}}, {{'4', '9'}},
  582. {{'5', '0'}}, {{'5', '1'}}, {{'5', '2'}}, {{'5', '3'}}, {{'5', '4'}}, {{'5', '5'}}, {{'5', '6'}}, {{'5', '7'}}, {{'5', '8'}}, {{'5', '9'}},
  583. {{'6', '0'}}, {{'6', '1'}}, {{'6', '2'}}, {{'6', '3'}}, {{'6', '4'}}, {{'6', '5'}}, {{'6', '6'}}, {{'6', '7'}}, {{'6', '8'}}, {{'6', '9'}},
  584. {{'7', '0'}}, {{'7', '1'}}, {{'7', '2'}}, {{'7', '3'}}, {{'7', '4'}}, {{'7', '5'}}, {{'7', '6'}}, {{'7', '7'}}, {{'7', '8'}}, {{'7', '9'}},
  585. {{'8', '0'}}, {{'8', '1'}}, {{'8', '2'}}, {{'8', '3'}}, {{'8', '4'}}, {{'8', '5'}}, {{'8', '6'}}, {{'8', '7'}}, {{'8', '8'}}, {{'8', '9'}},
  586. {{'9', '0'}}, {{'9', '1'}}, {{'9', '2'}}, {{'9', '3'}}, {{'9', '4'}}, {{'9', '5'}}, {{'9', '6'}}, {{'9', '7'}}, {{'9', '8'}}, {{'9', '9'}},
  587. }
  588. };
  589. // special case for "0"
  590. if (x == 0)
  591. {
  592. o->write_character('0');
  593. return;
  594. }
  595. // use a pointer to fill the buffer
  596. auto buffer_ptr = number_buffer.begin();
  597. const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not(x >= 0); // see issue #755
  598. number_unsigned_t abs_value;
  599. unsigned int n_chars;
  600. if (is_negative)
  601. {
  602. *buffer_ptr = '-';
  603. abs_value = remove_sign(static_cast<number_integer_t>(x));
  604. // account one more byte for the minus sign
  605. n_chars = 1 + count_digits(abs_value);
  606. }
  607. else
  608. {
  609. abs_value = static_cast<number_unsigned_t>(x);
  610. n_chars = count_digits(abs_value);
  611. }
  612. // spare 1 byte for '\0'
  613. assert(n_chars < number_buffer.size() - 1);
  614. // jump to the end to generate the string from backward
  615. // so we later avoid reversing the result
  616. buffer_ptr += n_chars;
  617. // Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
  618. // See: https://www.youtube.com/watch?v=o4-CwDo2zpg
  619. while (abs_value >= 100)
  620. {
  621. const auto digits_index = static_cast<unsigned>((abs_value % 100));
  622. abs_value /= 100;
  623. *(--buffer_ptr) = digits_to_99[digits_index][1];
  624. *(--buffer_ptr) = digits_to_99[digits_index][0];
  625. }
  626. if (abs_value >= 10)
  627. {
  628. const auto digits_index = static_cast<unsigned>(abs_value);
  629. *(--buffer_ptr) = digits_to_99[digits_index][1];
  630. *(--buffer_ptr) = digits_to_99[digits_index][0];
  631. }
  632. else
  633. {
  634. *(--buffer_ptr) = static_cast<char>('0' + abs_value);
  635. }
  636. o->write_characters(number_buffer.data(), n_chars);
  637. }
  638. /*!
  639. @brief dump a floating-point number
  640. Dump a given floating-point number to output stream @a o. Works internally
  641. with @a number_buffer.
  642. @param[in] x floating-point number to dump
  643. */
  644. void dump_float(number_float_t x)
  645. {
  646. // NaN / inf
  647. if (not std::isfinite(x))
  648. {
  649. o->write_characters("null", 4);
  650. return;
  651. }
  652. // If number_float_t is an IEEE-754 single or double precision number,
  653. // use the Grisu2 algorithm to produce short numbers which are
  654. // guaranteed to round-trip, using strtof and strtod, resp.
  655. //
  656. // NB: The test below works if <long double> == <double>.
  657. static constexpr bool is_ieee_single_or_double
  658. = (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 24 and std::numeric_limits<number_float_t>::max_exponent == 128) or
  659. (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 53 and std::numeric_limits<number_float_t>::max_exponent == 1024);
  660. dump_float(x, std::integral_constant<bool, is_ieee_single_or_double>());
  661. }
  662. void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_double*/)
  663. {
  664. char* begin = number_buffer.data();
  665. char* end = ::nlohmann::detail::to_chars(begin, begin + number_buffer.size(), x);
  666. o->write_characters(begin, static_cast<size_t>(end - begin));
  667. }
  668. void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_double*/)
  669. {
  670. // get number of digits for a float -> text -> float round-trip
  671. static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10;
  672. // the actual conversion
  673. std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
  674. // negative value indicates an error
  675. assert(len > 0);
  676. // check if buffer was large enough
  677. assert(static_cast<std::size_t>(len) < number_buffer.size());
  678. // erase thousands separator
  679. if (thousands_sep != '\0')
  680. {
  681. const auto end = std::remove(number_buffer.begin(),
  682. number_buffer.begin() + len, thousands_sep);
  683. std::fill(end, number_buffer.end(), '\0');
  684. assert((end - number_buffer.begin()) <= len);
  685. len = (end - number_buffer.begin());
  686. }
  687. // convert decimal point to '.'
  688. if (decimal_point != '\0' and decimal_point != '.')
  689. {
  690. const auto dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point);
  691. if (dec_pos != number_buffer.end())
  692. {
  693. *dec_pos = '.';
  694. }
  695. }
  696. o->write_characters(number_buffer.data(), static_cast<std::size_t>(len));
  697. // determine if need to append ".0"
  698. const bool value_is_int_like =
  699. std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1,
  700. [](char c)
  701. {
  702. return c == '.' or c == 'e';
  703. });
  704. if (value_is_int_like)
  705. {
  706. o->write_characters(".0", 2);
  707. }
  708. }
  709. /*!
  710. @brief check whether a string is UTF-8 encoded
  711. The function checks each byte of a string whether it is UTF-8 encoded. The
  712. result of the check is stored in the @a state parameter. The function must
  713. be called initially with state 0 (accept). State 1 means the string must
  714. be rejected, because the current byte is not allowed. If the string is
  715. completely processed, but the state is non-zero, the string ended
  716. prematurely; that is, the last byte indicated more bytes should have
  717. followed.
  718. @param[in,out] state the state of the decoding
  719. @param[in,out] codep codepoint (valid only if resulting state is UTF8_ACCEPT)
  720. @param[in] byte next byte to decode
  721. @return new state
  722. @note The function has been edited: a std::array is used.
  723. @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
  724. @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
  725. */
  726. static std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, const std::uint8_t byte) noexcept
  727. {
  728. static const std::array<std::uint8_t, 400> utf8d =
  729. {
  730. {
  731. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1F
  732. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3F
  733. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5F
  734. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7F
  735. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9F
  736. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // A0..BF
  737. 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..DF
  738. 0xA, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // E0..EF
  739. 0xB, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // F0..FF
  740. 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
  741. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
  742. 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
  743. 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
  744. 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // s7..s8
  745. }
  746. };
  747. const std::uint8_t type = utf8d[byte];
  748. codep = (state != UTF8_ACCEPT)
  749. ? (byte & 0x3fu) | (codep << 6u)
  750. : (0xFFu >> type) & (byte);
  751. std::size_t index = 256u + static_cast<size_t>(state) * 16u + static_cast<size_t>(type);
  752. assert(index < 400);
  753. state = utf8d[index];
  754. return state;
  755. }
  756. /*
  757. * Overload to make the compiler happy while it is instantiating
  758. * dump_integer for number_unsigned_t.
  759. * Must never be called.
  760. */
  761. number_unsigned_t remove_sign(number_unsigned_t x)
  762. {
  763. assert(false); // LCOV_EXCL_LINE
  764. return x; // LCOV_EXCL_LINE
  765. }
  766. /*
  767. * Helper function for dump_integer
  768. *
  769. * This function takes a negative signed integer and returns its absolute
  770. * value as unsigned integer. The plus/minus shuffling is necessary as we can
  771. * not directly remove the sign of an arbitrary signed integer as the
  772. * absolute values of INT_MIN and INT_MAX are usually not the same. See
  773. * #1708 for details.
  774. */
  775. inline number_unsigned_t remove_sign(number_integer_t x) noexcept
  776. {
  777. assert(x < 0 and x < (std::numeric_limits<number_integer_t>::max)());
  778. return static_cast<number_unsigned_t>(-(x + 1)) + 1;
  779. }
  780. private:
  781. /// the output of the serializer
  782. output_adapter_t<char> o = nullptr;
  783. /// a (hopefully) large enough character buffer
  784. std::array<char, 64> number_buffer{{}};
  785. /// the locale
  786. const std::lconv* loc = nullptr;
  787. /// the locale's thousand separator character
  788. const char thousands_sep = '\0';
  789. /// the locale's decimal point character
  790. const char decimal_point = '\0';
  791. /// string buffer
  792. std::array<char, 512> string_buffer{{}};
  793. /// the indentation character
  794. const char indent_char;
  795. /// the indentation string
  796. string_t indent_string;
  797. /// error_handler how to react on decoding errors
  798. const error_handler_t error_handler;
  799. };
  800. } // namespace detail
  801. } // namespace nlohmann