serializer.hpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. #pragma once
  2. #include <algorithm> // reverse, remove, fill, find, none_of
  3. #include <array> // array
  4. #include <cassert> // assert
  5. #include <clocale> // localeconv, lconv
  6. #include <cmath> // labs, isfinite, isnan, signbit
  7. #include <cstddef> // size_t, ptrdiff_t
  8. #include <cstdint> // uint8_t
  9. #include <cstdio> // snprintf
  10. #include <limits> // numeric_limits
  11. #include <string> // string
  12. #include <type_traits> // is_same
  13. #include <utility> // move
  14. #include <nlohmann/detail/boolean_operators.hpp>
  15. #include <nlohmann/detail/conversions/to_chars.hpp>
  16. #include <nlohmann/detail/exceptions.hpp>
  17. #include <nlohmann/detail/macro_scope.hpp>
  18. #include <nlohmann/detail/meta/cpp_future.hpp>
  19. #include <nlohmann/detail/output/binary_writer.hpp>
  20. #include <nlohmann/detail/output/output_adapters.hpp>
  21. #include <nlohmann/detail/value_t.hpp>
  22. namespace nlohmann
  23. {
  24. namespace detail
  25. {
  26. ///////////////////
  27. // serialization //
  28. ///////////////////
  29. /// how to treat decoding errors
  30. enum class error_handler_t
  31. {
  32. strict, ///< throw a type_error exception in case of invalid UTF-8
  33. replace, ///< replace invalid UTF-8 sequences with U+FFFD
  34. ignore ///< ignore invalid UTF-8 sequences
  35. };
  36. template<typename BasicJsonType>
  37. class serializer
  38. {
  39. using string_t = typename BasicJsonType::string_t;
  40. using number_float_t = typename BasicJsonType::number_float_t;
  41. using number_integer_t = typename BasicJsonType::number_integer_t;
  42. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  43. using binary_char_t = typename BasicJsonType::binary_t::value_type;
  44. static constexpr std::uint8_t UTF8_ACCEPT = 0;
  45. static constexpr std::uint8_t UTF8_REJECT = 1;
  46. public:
  47. /*!
  48. @param[in] s output stream to serialize to
  49. @param[in] ichar indentation character to use
  50. @param[in] error_handler_ how to react on decoding errors
  51. */
  52. serializer(output_adapter_t<char> s, const char ichar,
  53. error_handler_t error_handler_ = error_handler_t::strict)
  54. : o(std::move(s))
  55. , loc(std::localeconv())
  56. , thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep))
  57. , decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point))
  58. , indent_char(ichar)
  59. , indent_string(512, indent_char)
  60. , error_handler(error_handler_)
  61. {}
  62. // delete because of pointer members
  63. serializer(const serializer&) = delete;
  64. serializer& operator=(const serializer&) = delete;
  65. serializer(serializer&&) = delete;
  66. serializer& operator=(serializer&&) = delete;
  67. ~serializer() = default;
  68. /*!
  69. @brief internal implementation of the serialization function
  70. This function is called by the public member function dump and organizes
  71. the serialization internally. The indentation level is propagated as
  72. additional parameter. In case of arrays and objects, the function is
  73. called recursively.
  74. - strings and object keys are escaped using `escape_string()`
  75. - integer numbers are converted implicitly via `operator<<`
  76. - floating-point numbers are converted to a string using `"%g"` format
  77. - binary values are serialized as objects containing the subtype and the
  78. byte array
  79. @param[in] val value to serialize
  80. @param[in] pretty_print whether the output shall be pretty-printed
  81. @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
  82. in the output are escaped with `\uXXXX` sequences, and the result consists
  83. of ASCII characters only.
  84. @param[in] indent_step the indent level
  85. @param[in] current_indent the current indent level (only used internally)
  86. */
  87. void dump(const BasicJsonType& val,
  88. const bool pretty_print,
  89. const bool ensure_ascii,
  90. const unsigned int indent_step,
  91. const unsigned int current_indent = 0)
  92. {
  93. switch (val.m_type)
  94. {
  95. case value_t::object:
  96. {
  97. if (val.m_value.object->empty())
  98. {
  99. o->write_characters("{}", 2);
  100. return;
  101. }
  102. if (pretty_print)
  103. {
  104. o->write_characters("{\n", 2);
  105. // variable to hold indentation for recursive calls
  106. const auto new_indent = current_indent + indent_step;
  107. if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
  108. {
  109. indent_string.resize(indent_string.size() * 2, ' ');
  110. }
  111. // first n-1 elements
  112. auto i = val.m_value.object->cbegin();
  113. for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
  114. {
  115. o->write_characters(indent_string.c_str(), new_indent);
  116. o->write_character('\"');
  117. dump_escaped(i->first, ensure_ascii);
  118. o->write_characters("\": ", 3);
  119. dump(i->second, true, ensure_ascii, indent_step, new_indent);
  120. o->write_characters(",\n", 2);
  121. }
  122. // last element
  123. assert(i != val.m_value.object->cend());
  124. assert(std::next(i) == val.m_value.object->cend());
  125. o->write_characters(indent_string.c_str(), new_indent);
  126. o->write_character('\"');
  127. dump_escaped(i->first, ensure_ascii);
  128. o->write_characters("\": ", 3);
  129. dump(i->second, true, ensure_ascii, indent_step, new_indent);
  130. o->write_character('\n');
  131. o->write_characters(indent_string.c_str(), current_indent);
  132. o->write_character('}');
  133. }
  134. else
  135. {
  136. o->write_character('{');
  137. // first n-1 elements
  138. auto i = val.m_value.object->cbegin();
  139. for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
  140. {
  141. o->write_character('\"');
  142. dump_escaped(i->first, ensure_ascii);
  143. o->write_characters("\":", 2);
  144. dump(i->second, false, ensure_ascii, indent_step, current_indent);
  145. o->write_character(',');
  146. }
  147. // last element
  148. assert(i != val.m_value.object->cend());
  149. assert(std::next(i) == val.m_value.object->cend());
  150. o->write_character('\"');
  151. dump_escaped(i->first, ensure_ascii);
  152. o->write_characters("\":", 2);
  153. dump(i->second, false, ensure_ascii, indent_step, current_indent);
  154. o->write_character('}');
  155. }
  156. return;
  157. }
  158. case value_t::array:
  159. {
  160. if (val.m_value.array->empty())
  161. {
  162. o->write_characters("[]", 2);
  163. return;
  164. }
  165. if (pretty_print)
  166. {
  167. o->write_characters("[\n", 2);
  168. // variable to hold indentation for recursive calls
  169. const auto new_indent = current_indent + indent_step;
  170. if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
  171. {
  172. indent_string.resize(indent_string.size() * 2, ' ');
  173. }
  174. // first n-1 elements
  175. for (auto i = val.m_value.array->cbegin();
  176. i != val.m_value.array->cend() - 1; ++i)
  177. {
  178. o->write_characters(indent_string.c_str(), new_indent);
  179. dump(*i, true, ensure_ascii, indent_step, new_indent);
  180. o->write_characters(",\n", 2);
  181. }
  182. // last element
  183. assert(not val.m_value.array->empty());
  184. o->write_characters(indent_string.c_str(), new_indent);
  185. dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
  186. o->write_character('\n');
  187. o->write_characters(indent_string.c_str(), current_indent);
  188. o->write_character(']');
  189. }
  190. else
  191. {
  192. o->write_character('[');
  193. // first n-1 elements
  194. for (auto i = val.m_value.array->cbegin();
  195. i != val.m_value.array->cend() - 1; ++i)
  196. {
  197. dump(*i, false, ensure_ascii, indent_step, current_indent);
  198. o->write_character(',');
  199. }
  200. // last element
  201. assert(not val.m_value.array->empty());
  202. dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
  203. o->write_character(']');
  204. }
  205. return;
  206. }
  207. case value_t::string:
  208. {
  209. o->write_character('\"');
  210. dump_escaped(*val.m_value.string, ensure_ascii);
  211. o->write_character('\"');
  212. return;
  213. }
  214. case value_t::binary:
  215. {
  216. if (pretty_print)
  217. {
  218. o->write_characters("{\n", 2);
  219. // variable to hold indentation for recursive calls
  220. const auto new_indent = current_indent + indent_step;
  221. if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
  222. {
  223. indent_string.resize(indent_string.size() * 2, ' ');
  224. }
  225. o->write_characters(indent_string.c_str(), new_indent);
  226. o->write_characters("\"bytes\": [", 10);
  227. if (not val.m_value.binary->empty())
  228. {
  229. for (auto i = val.m_value.binary->cbegin();
  230. i != val.m_value.binary->cend() - 1; ++i)
  231. {
  232. dump_integer(*i);
  233. o->write_characters(", ", 2);
  234. }
  235. dump_integer(val.m_value.binary->back());
  236. }
  237. o->write_characters("],\n", 3);
  238. o->write_characters(indent_string.c_str(), new_indent);
  239. o->write_characters("\"subtype\": ", 11);
  240. if (val.m_value.binary->has_subtype())
  241. {
  242. dump_integer(val.m_value.binary->subtype());
  243. }
  244. else
  245. {
  246. o->write_characters("null", 4);
  247. }
  248. o->write_character('\n');
  249. o->write_characters(indent_string.c_str(), current_indent);
  250. o->write_character('}');
  251. }
  252. else
  253. {
  254. o->write_characters("{\"bytes\":[", 10);
  255. if (not val.m_value.binary->empty())
  256. {
  257. for (auto i = val.m_value.binary->cbegin();
  258. i != val.m_value.binary->cend() - 1; ++i)
  259. {
  260. dump_integer(*i);
  261. o->write_character(',');
  262. }
  263. dump_integer(val.m_value.binary->back());
  264. }
  265. o->write_characters("],\"subtype\":", 12);
  266. if (val.m_value.binary->has_subtype())
  267. {
  268. dump_integer(val.m_value.binary->subtype());
  269. o->write_character('}');
  270. }
  271. else
  272. {
  273. o->write_characters("null}", 5);
  274. }
  275. }
  276. return;
  277. }
  278. case value_t::boolean:
  279. {
  280. if (val.m_value.boolean)
  281. {
  282. o->write_characters("true", 4);
  283. }
  284. else
  285. {
  286. o->write_characters("false", 5);
  287. }
  288. return;
  289. }
  290. case value_t::number_integer:
  291. {
  292. dump_integer(val.m_value.number_integer);
  293. return;
  294. }
  295. case value_t::number_unsigned:
  296. {
  297. dump_integer(val.m_value.number_unsigned);
  298. return;
  299. }
  300. case value_t::number_float:
  301. {
  302. dump_float(val.m_value.number_float);
  303. return;
  304. }
  305. case value_t::discarded:
  306. {
  307. o->write_characters("<discarded>", 11);
  308. return;
  309. }
  310. case value_t::null:
  311. {
  312. o->write_characters("null", 4);
  313. return;
  314. }
  315. default: // LCOV_EXCL_LINE
  316. assert(false); // LCOV_EXCL_LINE
  317. }
  318. }
  319. private:
  320. /*!
  321. @brief dump escaped string
  322. Escape a string by replacing certain special characters by a sequence of an
  323. escape character (backslash) and another character and other control
  324. characters by a sequence of "\u" followed by a four-digit hex
  325. representation. The escaped string is written to output stream @a o.
  326. @param[in] s the string to escape
  327. @param[in] ensure_ascii whether to escape non-ASCII characters with
  328. \uXXXX sequences
  329. @complexity Linear in the length of string @a s.
  330. */
  331. void dump_escaped(const string_t& s, const bool ensure_ascii)
  332. {
  333. std::uint32_t codepoint;
  334. std::uint8_t state = UTF8_ACCEPT;
  335. std::size_t bytes = 0; // number of bytes written to string_buffer
  336. // number of bytes written at the point of the last valid byte
  337. std::size_t bytes_after_last_accept = 0;
  338. std::size_t undumped_chars = 0;
  339. for (std::size_t i = 0; i < s.size(); ++i)
  340. {
  341. const auto byte = static_cast<uint8_t>(s[i]);
  342. switch (decode(state, codepoint, byte))
  343. {
  344. case UTF8_ACCEPT: // decode found a new code point
  345. {
  346. switch (codepoint)
  347. {
  348. case 0x08: // backspace
  349. {
  350. string_buffer[bytes++] = '\\';
  351. string_buffer[bytes++] = 'b';
  352. break;
  353. }
  354. case 0x09: // horizontal tab
  355. {
  356. string_buffer[bytes++] = '\\';
  357. string_buffer[bytes++] = 't';
  358. break;
  359. }
  360. case 0x0A: // newline
  361. {
  362. string_buffer[bytes++] = '\\';
  363. string_buffer[bytes++] = 'n';
  364. break;
  365. }
  366. case 0x0C: // formfeed
  367. {
  368. string_buffer[bytes++] = '\\';
  369. string_buffer[bytes++] = 'f';
  370. break;
  371. }
  372. case 0x0D: // carriage return
  373. {
  374. string_buffer[bytes++] = '\\';
  375. string_buffer[bytes++] = 'r';
  376. break;
  377. }
  378. case 0x22: // quotation mark
  379. {
  380. string_buffer[bytes++] = '\\';
  381. string_buffer[bytes++] = '\"';
  382. break;
  383. }
  384. case 0x5C: // reverse solidus
  385. {
  386. string_buffer[bytes++] = '\\';
  387. string_buffer[bytes++] = '\\';
  388. break;
  389. }
  390. default:
  391. {
  392. // escape control characters (0x00..0x1F) or, if
  393. // ensure_ascii parameter is used, non-ASCII characters
  394. if ((codepoint <= 0x1F) or (ensure_ascii and (codepoint >= 0x7F)))
  395. {
  396. if (codepoint <= 0xFFFF)
  397. {
  398. (std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x",
  399. static_cast<std::uint16_t>(codepoint));
  400. bytes += 6;
  401. }
  402. else
  403. {
  404. (std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
  405. static_cast<std::uint16_t>(0xD7C0u + (codepoint >> 10u)),
  406. static_cast<std::uint16_t>(0xDC00u + (codepoint & 0x3FFu)));
  407. bytes += 12;
  408. }
  409. }
  410. else
  411. {
  412. // copy byte to buffer (all previous bytes
  413. // been copied have in default case above)
  414. string_buffer[bytes++] = s[i];
  415. }
  416. break;
  417. }
  418. }
  419. // write buffer and reset index; there must be 13 bytes
  420. // left, as this is the maximal number of bytes to be
  421. // written ("\uxxxx\uxxxx\0") for one code point
  422. if (string_buffer.size() - bytes < 13)
  423. {
  424. o->write_characters(string_buffer.data(), bytes);
  425. bytes = 0;
  426. }
  427. // remember the byte position of this accept
  428. bytes_after_last_accept = bytes;
  429. undumped_chars = 0;
  430. break;
  431. }
  432. case UTF8_REJECT: // decode found invalid UTF-8 byte
  433. {
  434. switch (error_handler)
  435. {
  436. case error_handler_t::strict:
  437. {
  438. std::string sn(3, '\0');
  439. (std::snprintf)(&sn[0], sn.size(), "%.2X", byte);
  440. JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
  441. }
  442. case error_handler_t::ignore:
  443. case error_handler_t::replace:
  444. {
  445. // in case we saw this character the first time, we
  446. // would like to read it again, because the byte
  447. // may be OK for itself, but just not OK for the
  448. // previous sequence
  449. if (undumped_chars > 0)
  450. {
  451. --i;
  452. }
  453. // reset length buffer to the last accepted index;
  454. // thus removing/ignoring the invalid characters
  455. bytes = bytes_after_last_accept;
  456. if (error_handler == error_handler_t::replace)
  457. {
  458. // add a replacement character
  459. if (ensure_ascii)
  460. {
  461. string_buffer[bytes++] = '\\';
  462. string_buffer[bytes++] = 'u';
  463. string_buffer[bytes++] = 'f';
  464. string_buffer[bytes++] = 'f';
  465. string_buffer[bytes++] = 'f';
  466. string_buffer[bytes++] = 'd';
  467. }
  468. else
  469. {
  470. string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF');
  471. string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF');
  472. string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD');
  473. }
  474. // write buffer and reset index; there must be 13 bytes
  475. // left, as this is the maximal number of bytes to be
  476. // written ("\uxxxx\uxxxx\0") for one code point
  477. if (string_buffer.size() - bytes < 13)
  478. {
  479. o->write_characters(string_buffer.data(), bytes);
  480. bytes = 0;
  481. }
  482. bytes_after_last_accept = bytes;
  483. }
  484. undumped_chars = 0;
  485. // continue processing the string
  486. state = UTF8_ACCEPT;
  487. break;
  488. }
  489. default: // LCOV_EXCL_LINE
  490. assert(false); // LCOV_EXCL_LINE
  491. }
  492. break;
  493. }
  494. default: // decode found yet incomplete multi-byte code point
  495. {
  496. if (not ensure_ascii)
  497. {
  498. // code point will not be escaped - copy byte to buffer
  499. string_buffer[bytes++] = s[i];
  500. }
  501. ++undumped_chars;
  502. break;
  503. }
  504. }
  505. }
  506. // we finished processing the string
  507. if (JSON_HEDLEY_LIKELY(state == UTF8_ACCEPT))
  508. {
  509. // write buffer
  510. if (bytes > 0)
  511. {
  512. o->write_characters(string_buffer.data(), bytes);
  513. }
  514. }
  515. else
  516. {
  517. // we finish reading, but do not accept: string was incomplete
  518. switch (error_handler)
  519. {
  520. case error_handler_t::strict:
  521. {
  522. std::string sn(3, '\0');
  523. (std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast<std::uint8_t>(s.back()));
  524. JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
  525. }
  526. case error_handler_t::ignore:
  527. {
  528. // write all accepted bytes
  529. o->write_characters(string_buffer.data(), bytes_after_last_accept);
  530. break;
  531. }
  532. case error_handler_t::replace:
  533. {
  534. // write all accepted bytes
  535. o->write_characters(string_buffer.data(), bytes_after_last_accept);
  536. // add a replacement character
  537. if (ensure_ascii)
  538. {
  539. o->write_characters("\\ufffd", 6);
  540. }
  541. else
  542. {
  543. o->write_characters("\xEF\xBF\xBD", 3);
  544. }
  545. break;
  546. }
  547. default: // LCOV_EXCL_LINE
  548. assert(false); // LCOV_EXCL_LINE
  549. }
  550. }
  551. }
  552. /*!
  553. @brief count digits
  554. Count the number of decimal (base 10) digits for an input unsigned integer.
  555. @param[in] x unsigned integer number to count its digits
  556. @return number of decimal digits
  557. */
  558. inline unsigned int count_digits(number_unsigned_t x) noexcept
  559. {
  560. unsigned int n_digits = 1;
  561. for (;;)
  562. {
  563. if (x < 10)
  564. {
  565. return n_digits;
  566. }
  567. if (x < 100)
  568. {
  569. return n_digits + 1;
  570. }
  571. if (x < 1000)
  572. {
  573. return n_digits + 2;
  574. }
  575. if (x < 10000)
  576. {
  577. return n_digits + 3;
  578. }
  579. x = x / 10000u;
  580. n_digits += 4;
  581. }
  582. }
  583. /*!
  584. @brief dump an integer
  585. Dump a given integer to output stream @a o. Works internally with
  586. @a number_buffer.
  587. @param[in] x integer number (signed or unsigned) to dump
  588. @tparam NumberType either @a number_integer_t or @a number_unsigned_t
  589. */
  590. template<typename NumberType, detail::enable_if_t<
  591. std::is_same<NumberType, number_unsigned_t>::value or
  592. std::is_same<NumberType, number_integer_t>::value or
  593. std::is_same<NumberType, binary_char_t>::value,
  594. int> = 0>
  595. void dump_integer(NumberType x)
  596. {
  597. static constexpr std::array<std::array<char, 2>, 100> digits_to_99
  598. {
  599. {
  600. {{'0', '0'}}, {{'0', '1'}}, {{'0', '2'}}, {{'0', '3'}}, {{'0', '4'}}, {{'0', '5'}}, {{'0', '6'}}, {{'0', '7'}}, {{'0', '8'}}, {{'0', '9'}},
  601. {{'1', '0'}}, {{'1', '1'}}, {{'1', '2'}}, {{'1', '3'}}, {{'1', '4'}}, {{'1', '5'}}, {{'1', '6'}}, {{'1', '7'}}, {{'1', '8'}}, {{'1', '9'}},
  602. {{'2', '0'}}, {{'2', '1'}}, {{'2', '2'}}, {{'2', '3'}}, {{'2', '4'}}, {{'2', '5'}}, {{'2', '6'}}, {{'2', '7'}}, {{'2', '8'}}, {{'2', '9'}},
  603. {{'3', '0'}}, {{'3', '1'}}, {{'3', '2'}}, {{'3', '3'}}, {{'3', '4'}}, {{'3', '5'}}, {{'3', '6'}}, {{'3', '7'}}, {{'3', '8'}}, {{'3', '9'}},
  604. {{'4', '0'}}, {{'4', '1'}}, {{'4', '2'}}, {{'4', '3'}}, {{'4', '4'}}, {{'4', '5'}}, {{'4', '6'}}, {{'4', '7'}}, {{'4', '8'}}, {{'4', '9'}},
  605. {{'5', '0'}}, {{'5', '1'}}, {{'5', '2'}}, {{'5', '3'}}, {{'5', '4'}}, {{'5', '5'}}, {{'5', '6'}}, {{'5', '7'}}, {{'5', '8'}}, {{'5', '9'}},
  606. {{'6', '0'}}, {{'6', '1'}}, {{'6', '2'}}, {{'6', '3'}}, {{'6', '4'}}, {{'6', '5'}}, {{'6', '6'}}, {{'6', '7'}}, {{'6', '8'}}, {{'6', '9'}},
  607. {{'7', '0'}}, {{'7', '1'}}, {{'7', '2'}}, {{'7', '3'}}, {{'7', '4'}}, {{'7', '5'}}, {{'7', '6'}}, {{'7', '7'}}, {{'7', '8'}}, {{'7', '9'}},
  608. {{'8', '0'}}, {{'8', '1'}}, {{'8', '2'}}, {{'8', '3'}}, {{'8', '4'}}, {{'8', '5'}}, {{'8', '6'}}, {{'8', '7'}}, {{'8', '8'}}, {{'8', '9'}},
  609. {{'9', '0'}}, {{'9', '1'}}, {{'9', '2'}}, {{'9', '3'}}, {{'9', '4'}}, {{'9', '5'}}, {{'9', '6'}}, {{'9', '7'}}, {{'9', '8'}}, {{'9', '9'}},
  610. }
  611. };
  612. // special case for "0"
  613. if (x == 0)
  614. {
  615. o->write_character('0');
  616. return;
  617. }
  618. // use a pointer to fill the buffer
  619. auto buffer_ptr = number_buffer.begin();
  620. const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not(x >= 0); // see issue #755
  621. number_unsigned_t abs_value;
  622. unsigned int n_chars;
  623. if (is_negative)
  624. {
  625. *buffer_ptr = '-';
  626. abs_value = remove_sign(static_cast<number_integer_t>(x));
  627. // account one more byte for the minus sign
  628. n_chars = 1 + count_digits(abs_value);
  629. }
  630. else
  631. {
  632. abs_value = static_cast<number_unsigned_t>(x);
  633. n_chars = count_digits(abs_value);
  634. }
  635. // spare 1 byte for '\0'
  636. assert(n_chars < number_buffer.size() - 1);
  637. // jump to the end to generate the string from backward
  638. // so we later avoid reversing the result
  639. buffer_ptr += n_chars;
  640. // Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
  641. // See: https://www.youtube.com/watch?v=o4-CwDo2zpg
  642. while (abs_value >= 100)
  643. {
  644. const auto digits_index = static_cast<unsigned>((abs_value % 100));
  645. abs_value /= 100;
  646. *(--buffer_ptr) = digits_to_99[digits_index][1];
  647. *(--buffer_ptr) = digits_to_99[digits_index][0];
  648. }
  649. if (abs_value >= 10)
  650. {
  651. const auto digits_index = static_cast<unsigned>(abs_value);
  652. *(--buffer_ptr) = digits_to_99[digits_index][1];
  653. *(--buffer_ptr) = digits_to_99[digits_index][0];
  654. }
  655. else
  656. {
  657. *(--buffer_ptr) = static_cast<char>('0' + abs_value);
  658. }
  659. o->write_characters(number_buffer.data(), n_chars);
  660. }
  661. /*!
  662. @brief dump a floating-point number
  663. Dump a given floating-point number to output stream @a o. Works internally
  664. with @a number_buffer.
  665. @param[in] x floating-point number to dump
  666. */
  667. void dump_float(number_float_t x)
  668. {
  669. // NaN / inf
  670. if (not std::isfinite(x))
  671. {
  672. o->write_characters("null", 4);
  673. return;
  674. }
  675. // If number_float_t is an IEEE-754 single or double precision number,
  676. // use the Grisu2 algorithm to produce short numbers which are
  677. // guaranteed to round-trip, using strtof and strtod, resp.
  678. //
  679. // NB: The test below works if <long double> == <double>.
  680. static constexpr bool is_ieee_single_or_double
  681. = (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 24 and std::numeric_limits<number_float_t>::max_exponent == 128) or
  682. (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 53 and std::numeric_limits<number_float_t>::max_exponent == 1024);
  683. dump_float(x, std::integral_constant<bool, is_ieee_single_or_double>());
  684. }
  685. void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_double*/)
  686. {
  687. char* begin = number_buffer.data();
  688. char* end = ::nlohmann::detail::to_chars(begin, begin + number_buffer.size(), x);
  689. o->write_characters(begin, static_cast<size_t>(end - begin));
  690. }
  691. void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_double*/)
  692. {
  693. // get number of digits for a float -> text -> float round-trip
  694. static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10;
  695. // the actual conversion
  696. std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
  697. // negative value indicates an error
  698. assert(len > 0);
  699. // check if buffer was large enough
  700. assert(static_cast<std::size_t>(len) < number_buffer.size());
  701. // erase thousands separator
  702. if (thousands_sep != '\0')
  703. {
  704. const auto end = std::remove(number_buffer.begin(),
  705. number_buffer.begin() + len, thousands_sep);
  706. std::fill(end, number_buffer.end(), '\0');
  707. assert((end - number_buffer.begin()) <= len);
  708. len = (end - number_buffer.begin());
  709. }
  710. // convert decimal point to '.'
  711. if (decimal_point != '\0' and decimal_point != '.')
  712. {
  713. const auto dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point);
  714. if (dec_pos != number_buffer.end())
  715. {
  716. *dec_pos = '.';
  717. }
  718. }
  719. o->write_characters(number_buffer.data(), static_cast<std::size_t>(len));
  720. // determine if need to append ".0"
  721. const bool value_is_int_like =
  722. std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1,
  723. [](char c)
  724. {
  725. return c == '.' or c == 'e';
  726. });
  727. if (value_is_int_like)
  728. {
  729. o->write_characters(".0", 2);
  730. }
  731. }
  732. /*!
  733. @brief check whether a string is UTF-8 encoded
  734. The function checks each byte of a string whether it is UTF-8 encoded. The
  735. result of the check is stored in the @a state parameter. The function must
  736. be called initially with state 0 (accept). State 1 means the string must
  737. be rejected, because the current byte is not allowed. If the string is
  738. completely processed, but the state is non-zero, the string ended
  739. prematurely; that is, the last byte indicated more bytes should have
  740. followed.
  741. @param[in,out] state the state of the decoding
  742. @param[in,out] codep codepoint (valid only if resulting state is UTF8_ACCEPT)
  743. @param[in] byte next byte to decode
  744. @return new state
  745. @note The function has been edited: a std::array is used.
  746. @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
  747. @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
  748. */
  749. static std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, const std::uint8_t byte) noexcept
  750. {
  751. static const std::array<std::uint8_t, 400> utf8d =
  752. {
  753. {
  754. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1F
  755. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3F
  756. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5F
  757. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7F
  758. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9F
  759. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // A0..BF
  760. 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..DF
  761. 0xA, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // E0..EF
  762. 0xB, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // F0..FF
  763. 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
  764. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
  765. 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
  766. 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
  767. 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // s7..s8
  768. }
  769. };
  770. const std::uint8_t type = utf8d[byte];
  771. codep = (state != UTF8_ACCEPT)
  772. ? (byte & 0x3fu) | (codep << 6u)
  773. : (0xFFu >> type) & (byte);
  774. std::size_t index = 256u + static_cast<size_t>(state) * 16u + static_cast<size_t>(type);
  775. assert(index < 400);
  776. state = utf8d[index];
  777. return state;
  778. }
  779. /*
  780. * Overload to make the compiler happy while it is instantiating
  781. * dump_integer for number_unsigned_t.
  782. * Must never be called.
  783. */
  784. number_unsigned_t remove_sign(number_unsigned_t x)
  785. {
  786. assert(false); // LCOV_EXCL_LINE
  787. return x; // LCOV_EXCL_LINE
  788. }
  789. /*
  790. * Helper function for dump_integer
  791. *
  792. * This function takes a negative signed integer and returns its absolute
  793. * value as unsigned integer. The plus/minus shuffling is necessary as we can
  794. * not directly remove the sign of an arbitrary signed integer as the
  795. * absolute values of INT_MIN and INT_MAX are usually not the same. See
  796. * #1708 for details.
  797. */
  798. inline number_unsigned_t remove_sign(number_integer_t x) noexcept
  799. {
  800. assert(x < 0 and x < (std::numeric_limits<number_integer_t>::max)());
  801. return static_cast<number_unsigned_t>(-(x + 1)) + 1;
  802. }
  803. private:
  804. /// the output of the serializer
  805. output_adapter_t<char> o = nullptr;
  806. /// a (hopefully) large enough character buffer
  807. std::array<char, 64> number_buffer{{}};
  808. /// the locale
  809. const std::lconv* loc = nullptr;
  810. /// the locale's thousand separator character
  811. const char thousands_sep = '\0';
  812. /// the locale's decimal point character
  813. const char decimal_point = '\0';
  814. /// string buffer
  815. std::array<char, 512> string_buffer{{}};
  816. /// the indentation character
  817. const char indent_char;
  818. /// the indentation string
  819. string_t indent_string;
  820. /// error_handler how to react on decoding errors
  821. const error_handler_t error_handler;
  822. };
  823. } // namespace detail
  824. } // namespace nlohmann