json_sax.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726
  1. #pragma once
  2. #include <cassert> // assert
  3. #include <cstddef>
  4. #include <string> // string
  5. #include <utility> // move
  6. #include <vector> // vector
  7. #include <nlohmann/detail/exceptions.hpp>
  8. #include <nlohmann/detail/macro_scope.hpp>
  9. namespace nlohmann
  10. {
  11. /*!
  12. @brief SAX interface
  13. This class describes the SAX interface used by @ref nlohmann::json::sax_parse.
  14. Each function is called in different situations while the input is parsed. The
  15. boolean return value informs the parser whether to continue processing the
  16. input.
  17. */
  18. template<typename BasicJsonType>
  19. struct json_sax
  20. {
  21. using number_integer_t = typename BasicJsonType::number_integer_t;
  22. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  23. using number_float_t = typename BasicJsonType::number_float_t;
  24. using string_t = typename BasicJsonType::string_t;
  25. using binary_t = typename BasicJsonType::binary_t;
  26. /*!
  27. @brief a null value was read
  28. @return whether parsing should proceed
  29. */
  30. virtual bool null() = 0;
  31. /*!
  32. @brief a boolean value was read
  33. @param[in] val boolean value
  34. @return whether parsing should proceed
  35. */
  36. virtual bool boolean(bool val) = 0;
  37. /*!
  38. @brief an integer number was read
  39. @param[in] val integer value
  40. @return whether parsing should proceed
  41. */
  42. virtual bool number_integer(number_integer_t val) = 0;
  43. /*!
  44. @brief an unsigned integer number was read
  45. @param[in] val unsigned integer value
  46. @return whether parsing should proceed
  47. */
  48. virtual bool number_unsigned(number_unsigned_t val) = 0;
  49. /*!
  50. @brief an floating-point number was read
  51. @param[in] val floating-point value
  52. @param[in] s raw token value
  53. @return whether parsing should proceed
  54. */
  55. virtual bool number_float(number_float_t val, const string_t& s) = 0;
  56. /*!
  57. @brief a string was read
  58. @param[in] val string value
  59. @return whether parsing should proceed
  60. @note It is safe to move the passed string.
  61. */
  62. virtual bool string(string_t& val) = 0;
  63. /*!
  64. @brief a binary string was read
  65. @param[in] val binary value
  66. @return whether parsing should proceed
  67. @note It is safe to move the passed binary.
  68. */
  69. virtual bool binary(binary_t& val) = 0;
  70. /*!
  71. @brief the beginning of an object was read
  72. @param[in] elements number of object elements or -1 if unknown
  73. @return whether parsing should proceed
  74. @note binary formats may report the number of elements
  75. */
  76. virtual bool start_object(std::size_t elements) = 0;
  77. /*!
  78. @brief an object key was read
  79. @param[in] val object key
  80. @return whether parsing should proceed
  81. @note It is safe to move the passed string.
  82. */
  83. virtual bool key(string_t& val) = 0;
  84. /*!
  85. @brief the end of an object was read
  86. @return whether parsing should proceed
  87. */
  88. virtual bool end_object() = 0;
  89. /*!
  90. @brief the beginning of an array was read
  91. @param[in] elements number of array elements or -1 if unknown
  92. @return whether parsing should proceed
  93. @note binary formats may report the number of elements
  94. */
  95. virtual bool start_array(std::size_t elements) = 0;
  96. /*!
  97. @brief the end of an array was read
  98. @return whether parsing should proceed
  99. */
  100. virtual bool end_array() = 0;
  101. /*!
  102. @brief a parse error occurred
  103. @param[in] position the position in the input where the error occurs
  104. @param[in] last_token the last read token
  105. @param[in] ex an exception object describing the error
  106. @return whether parsing should proceed (must return false)
  107. */
  108. virtual bool parse_error(std::size_t position,
  109. const std::string& last_token,
  110. const detail::exception& ex) = 0;
  111. virtual ~json_sax() = default;
  112. };
  113. namespace detail
  114. {
  115. /*!
  116. @brief SAX implementation to create a JSON value from SAX events
  117. This class implements the @ref json_sax interface and processes the SAX events
  118. to create a JSON value which makes it basically a DOM parser. The structure or
  119. hierarchy of the JSON value is managed by the stack `ref_stack` which contains
  120. a pointer to the respective array or object for each recursion depth.
  121. After successful parsing, the value that is passed by reference to the
  122. constructor contains the parsed value.
  123. @tparam BasicJsonType the JSON type
  124. */
  125. template<typename BasicJsonType>
  126. class json_sax_dom_parser
  127. {
  128. public:
  129. using number_integer_t = typename BasicJsonType::number_integer_t;
  130. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  131. using number_float_t = typename BasicJsonType::number_float_t;
  132. using string_t = typename BasicJsonType::string_t;
  133. using binary_t = typename BasicJsonType::binary_t;
  134. /*!
  135. @param[in, out] r reference to a JSON value that is manipulated while
  136. parsing
  137. @param[in] allow_exceptions_ whether parse errors yield exceptions
  138. */
  139. explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
  140. : root(r), allow_exceptions(allow_exceptions_)
  141. {}
  142. // make class move-only
  143. json_sax_dom_parser(const json_sax_dom_parser&) = delete;
  144. json_sax_dom_parser(json_sax_dom_parser&&) = default;
  145. json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete;
  146. json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default;
  147. ~json_sax_dom_parser() = default;
  148. bool null()
  149. {
  150. handle_value(nullptr);
  151. return true;
  152. }
  153. bool boolean(bool val)
  154. {
  155. handle_value(val);
  156. return true;
  157. }
  158. bool number_integer(number_integer_t val)
  159. {
  160. handle_value(val);
  161. return true;
  162. }
  163. bool number_unsigned(number_unsigned_t val)
  164. {
  165. handle_value(val);
  166. return true;
  167. }
  168. bool number_float(number_float_t val, const string_t& /*unused*/)
  169. {
  170. handle_value(val);
  171. return true;
  172. }
  173. bool string(string_t& val)
  174. {
  175. handle_value(val);
  176. return true;
  177. }
  178. bool binary(binary_t& val)
  179. {
  180. handle_value(std::move(val));
  181. return true;
  182. }
  183. bool start_object(std::size_t len)
  184. {
  185. ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
  186. if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  187. {
  188. JSON_THROW(out_of_range::create(408,
  189. "excessive object size: " + std::to_string(len)));
  190. }
  191. return true;
  192. }
  193. bool key(string_t& val)
  194. {
  195. // add null at given key and store the reference for later
  196. object_element = &(ref_stack.back()->m_value.object->operator[](val));
  197. return true;
  198. }
  199. bool end_object()
  200. {
  201. ref_stack.pop_back();
  202. return true;
  203. }
  204. bool start_array(std::size_t len)
  205. {
  206. ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
  207. if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  208. {
  209. JSON_THROW(out_of_range::create(408,
  210. "excessive array size: " + std::to_string(len)));
  211. }
  212. return true;
  213. }
  214. bool end_array()
  215. {
  216. ref_stack.pop_back();
  217. return true;
  218. }
  219. bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
  220. const detail::exception& ex)
  221. {
  222. errored = true;
  223. if (allow_exceptions)
  224. {
  225. // determine the proper exception type from the id
  226. switch ((ex.id / 100) % 100)
  227. {
  228. case 1:
  229. JSON_THROW(*static_cast<const detail::parse_error*>(&ex));
  230. case 4:
  231. JSON_THROW(*static_cast<const detail::out_of_range*>(&ex));
  232. // LCOV_EXCL_START
  233. case 2:
  234. JSON_THROW(*static_cast<const detail::invalid_iterator*>(&ex));
  235. case 3:
  236. JSON_THROW(*static_cast<const detail::type_error*>(&ex));
  237. case 5:
  238. JSON_THROW(*static_cast<const detail::other_error*>(&ex));
  239. default:
  240. assert(false);
  241. // LCOV_EXCL_STOP
  242. }
  243. }
  244. return false;
  245. }
  246. constexpr bool is_errored() const
  247. {
  248. return errored;
  249. }
  250. private:
  251. /*!
  252. @invariant If the ref stack is empty, then the passed value will be the new
  253. root.
  254. @invariant If the ref stack contains a value, then it is an array or an
  255. object to which we can add elements
  256. */
  257. template<typename Value>
  258. JSON_HEDLEY_RETURNS_NON_NULL
  259. BasicJsonType* handle_value(Value&& v)
  260. {
  261. if (ref_stack.empty())
  262. {
  263. root = BasicJsonType(std::forward<Value>(v));
  264. return &root;
  265. }
  266. assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
  267. if (ref_stack.back()->is_array())
  268. {
  269. ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v));
  270. return &(ref_stack.back()->m_value.array->back());
  271. }
  272. assert(ref_stack.back()->is_object());
  273. assert(object_element);
  274. *object_element = BasicJsonType(std::forward<Value>(v));
  275. return object_element;
  276. }
  277. /// the parsed JSON value
  278. BasicJsonType& root;
  279. /// stack to model hierarchy of values
  280. std::vector<BasicJsonType*> ref_stack {};
  281. /// helper to hold the reference for the next object element
  282. BasicJsonType* object_element = nullptr;
  283. /// whether a syntax error occurred
  284. bool errored = false;
  285. /// whether to throw exceptions in case of errors
  286. const bool allow_exceptions = true;
  287. };
  288. template<typename BasicJsonType>
  289. class json_sax_dom_callback_parser
  290. {
  291. public:
  292. using number_integer_t = typename BasicJsonType::number_integer_t;
  293. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  294. using number_float_t = typename BasicJsonType::number_float_t;
  295. using string_t = typename BasicJsonType::string_t;
  296. using binary_t = typename BasicJsonType::binary_t;
  297. using parser_callback_t = typename BasicJsonType::parser_callback_t;
  298. using parse_event_t = typename BasicJsonType::parse_event_t;
  299. json_sax_dom_callback_parser(BasicJsonType& r,
  300. const parser_callback_t cb,
  301. const bool allow_exceptions_ = true)
  302. : root(r), callback(cb), allow_exceptions(allow_exceptions_)
  303. {
  304. keep_stack.push_back(true);
  305. }
  306. // make class move-only
  307. json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete;
  308. json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default;
  309. json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete;
  310. json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default;
  311. ~json_sax_dom_callback_parser() = default;
  312. bool null()
  313. {
  314. handle_value(nullptr);
  315. return true;
  316. }
  317. bool boolean(bool val)
  318. {
  319. handle_value(val);
  320. return true;
  321. }
  322. bool number_integer(number_integer_t val)
  323. {
  324. handle_value(val);
  325. return true;
  326. }
  327. bool number_unsigned(number_unsigned_t val)
  328. {
  329. handle_value(val);
  330. return true;
  331. }
  332. bool number_float(number_float_t val, const string_t& /*unused*/)
  333. {
  334. handle_value(val);
  335. return true;
  336. }
  337. bool string(string_t& val)
  338. {
  339. handle_value(val);
  340. return true;
  341. }
  342. bool binary(binary_t& val)
  343. {
  344. handle_value(std::move(val));
  345. return true;
  346. }
  347. bool start_object(std::size_t len)
  348. {
  349. // check callback for object start
  350. const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded);
  351. keep_stack.push_back(keep);
  352. auto val = handle_value(BasicJsonType::value_t::object, true);
  353. ref_stack.push_back(val.second);
  354. // check object limit
  355. if (ref_stack.back() and JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  356. {
  357. JSON_THROW(out_of_range::create(408, "excessive object size: " + std::to_string(len)));
  358. }
  359. return true;
  360. }
  361. bool key(string_t& val)
  362. {
  363. BasicJsonType k = BasicJsonType(val);
  364. // check callback for key
  365. const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k);
  366. key_keep_stack.push_back(keep);
  367. // add discarded value at given key and store the reference for later
  368. if (keep and ref_stack.back())
  369. {
  370. object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded);
  371. }
  372. return true;
  373. }
  374. bool end_object()
  375. {
  376. if (ref_stack.back() and not callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back()))
  377. {
  378. // discard object
  379. *ref_stack.back() = discarded;
  380. }
  381. assert(not ref_stack.empty());
  382. assert(not keep_stack.empty());
  383. ref_stack.pop_back();
  384. keep_stack.pop_back();
  385. if (not ref_stack.empty() and ref_stack.back() and ref_stack.back()->is_object())
  386. {
  387. // remove discarded value
  388. for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it)
  389. {
  390. if (it->is_discarded())
  391. {
  392. ref_stack.back()->erase(it);
  393. break;
  394. }
  395. }
  396. }
  397. return true;
  398. }
  399. bool start_array(std::size_t len)
  400. {
  401. const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded);
  402. keep_stack.push_back(keep);
  403. auto val = handle_value(BasicJsonType::value_t::array, true);
  404. ref_stack.push_back(val.second);
  405. // check array limit
  406. if (ref_stack.back() and JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  407. {
  408. JSON_THROW(out_of_range::create(408, "excessive array size: " + std::to_string(len)));
  409. }
  410. return true;
  411. }
  412. bool end_array()
  413. {
  414. bool keep = true;
  415. if (ref_stack.back())
  416. {
  417. keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back());
  418. if (not keep)
  419. {
  420. // discard array
  421. *ref_stack.back() = discarded;
  422. }
  423. }
  424. assert(not ref_stack.empty());
  425. assert(not keep_stack.empty());
  426. ref_stack.pop_back();
  427. keep_stack.pop_back();
  428. // remove discarded value
  429. if (not keep and not ref_stack.empty() and ref_stack.back()->is_array())
  430. {
  431. ref_stack.back()->m_value.array->pop_back();
  432. }
  433. return true;
  434. }
  435. bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
  436. const detail::exception& ex)
  437. {
  438. errored = true;
  439. if (allow_exceptions)
  440. {
  441. // determine the proper exception type from the id
  442. switch ((ex.id / 100) % 100)
  443. {
  444. case 1:
  445. JSON_THROW(*static_cast<const detail::parse_error*>(&ex));
  446. case 4:
  447. JSON_THROW(*static_cast<const detail::out_of_range*>(&ex));
  448. // LCOV_EXCL_START
  449. case 2:
  450. JSON_THROW(*static_cast<const detail::invalid_iterator*>(&ex));
  451. case 3:
  452. JSON_THROW(*static_cast<const detail::type_error*>(&ex));
  453. case 5:
  454. JSON_THROW(*static_cast<const detail::other_error*>(&ex));
  455. default:
  456. assert(false);
  457. // LCOV_EXCL_STOP
  458. }
  459. }
  460. return false;
  461. }
  462. constexpr bool is_errored() const
  463. {
  464. return errored;
  465. }
  466. private:
  467. /*!
  468. @param[in] v value to add to the JSON value we build during parsing
  469. @param[in] skip_callback whether we should skip calling the callback
  470. function; this is required after start_array() and
  471. start_object() SAX events, because otherwise we would call the
  472. callback function with an empty array or object, respectively.
  473. @invariant If the ref stack is empty, then the passed value will be the new
  474. root.
  475. @invariant If the ref stack contains a value, then it is an array or an
  476. object to which we can add elements
  477. @return pair of boolean (whether value should be kept) and pointer (to the
  478. passed value in the ref_stack hierarchy; nullptr if not kept)
  479. */
  480. template<typename Value>
  481. std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false)
  482. {
  483. assert(not keep_stack.empty());
  484. // do not handle this value if we know it would be added to a discarded
  485. // container
  486. if (not keep_stack.back())
  487. {
  488. return {false, nullptr};
  489. }
  490. // create value
  491. auto value = BasicJsonType(std::forward<Value>(v));
  492. // check callback
  493. const bool keep = skip_callback or callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value);
  494. // do not handle this value if we just learnt it shall be discarded
  495. if (not keep)
  496. {
  497. return {false, nullptr};
  498. }
  499. if (ref_stack.empty())
  500. {
  501. root = std::move(value);
  502. return {true, &root};
  503. }
  504. // skip this value if we already decided to skip the parent
  505. // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360)
  506. if (not ref_stack.back())
  507. {
  508. return {false, nullptr};
  509. }
  510. // we now only expect arrays and objects
  511. assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
  512. // array
  513. if (ref_stack.back()->is_array())
  514. {
  515. ref_stack.back()->m_value.array->push_back(std::move(value));
  516. return {true, &(ref_stack.back()->m_value.array->back())};
  517. }
  518. // object
  519. assert(ref_stack.back()->is_object());
  520. // check if we should store an element for the current key
  521. assert(not key_keep_stack.empty());
  522. const bool store_element = key_keep_stack.back();
  523. key_keep_stack.pop_back();
  524. if (not store_element)
  525. {
  526. return {false, nullptr};
  527. }
  528. assert(object_element);
  529. *object_element = std::move(value);
  530. return {true, object_element};
  531. }
  532. /// the parsed JSON value
  533. BasicJsonType& root;
  534. /// stack to model hierarchy of values
  535. std::vector<BasicJsonType*> ref_stack {};
  536. /// stack to manage which values to keep
  537. std::vector<bool> keep_stack {};
  538. /// stack to manage which object keys to keep
  539. std::vector<bool> key_keep_stack {};
  540. /// helper to hold the reference for the next object element
  541. BasicJsonType* object_element = nullptr;
  542. /// whether a syntax error occurred
  543. bool errored = false;
  544. /// callback function
  545. const parser_callback_t callback = nullptr;
  546. /// whether to throw exceptions in case of errors
  547. const bool allow_exceptions = true;
  548. /// a discarded value for the callback
  549. BasicJsonType discarded = BasicJsonType::value_t::discarded;
  550. };
  551. template<typename BasicJsonType>
  552. class json_sax_acceptor
  553. {
  554. public:
  555. using number_integer_t = typename BasicJsonType::number_integer_t;
  556. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  557. using number_float_t = typename BasicJsonType::number_float_t;
  558. using string_t = typename BasicJsonType::string_t;
  559. using binary_t = typename BasicJsonType::binary_t;
  560. bool null()
  561. {
  562. return true;
  563. }
  564. bool boolean(bool /*unused*/)
  565. {
  566. return true;
  567. }
  568. bool number_integer(number_integer_t /*unused*/)
  569. {
  570. return true;
  571. }
  572. bool number_unsigned(number_unsigned_t /*unused*/)
  573. {
  574. return true;
  575. }
  576. bool number_float(number_float_t /*unused*/, const string_t& /*unused*/)
  577. {
  578. return true;
  579. }
  580. bool string(string_t& /*unused*/)
  581. {
  582. return true;
  583. }
  584. bool binary(binary_t& /*unused*/)
  585. {
  586. return true;
  587. }
  588. bool start_object(std::size_t /*unused*/ = std::size_t(-1))
  589. {
  590. return true;
  591. }
  592. bool key(string_t& /*unused*/)
  593. {
  594. return true;
  595. }
  596. bool end_object()
  597. {
  598. return true;
  599. }
  600. bool start_array(std::size_t /*unused*/ = std::size_t(-1))
  601. {
  602. return true;
  603. }
  604. bool end_array()
  605. {
  606. return true;
  607. }
  608. bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/)
  609. {
  610. return false;
  611. }
  612. };
  613. } // namespace detail
  614. } // namespace nlohmann