json_sax.hpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. #pragma once
  2. #include <cassert> // assert
  3. #include <cstddef>
  4. #include <string> // string
  5. #include <utility> // move
  6. #include <vector> // vector
  7. #include <nlohmann/detail/exceptions.hpp>
  8. #include <nlohmann/detail/macro_scope.hpp>
  9. namespace nlohmann
  10. {
  11. /*!
  12. @brief SAX interface
  13. This class describes the SAX interface used by @ref nlohmann::json::sax_parse.
  14. Each function is called in different situations while the input is parsed. The
  15. boolean return value informs the parser whether to continue processing the
  16. input.
  17. */
  18. template<typename BasicJsonType>
  19. struct json_sax
  20. {
  21. /// type for (signed) integers
  22. using number_integer_t = typename BasicJsonType::number_integer_t;
  23. /// type for unsigned integers
  24. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  25. /// type for floating-point numbers
  26. using number_float_t = typename BasicJsonType::number_float_t;
  27. /// type for strings
  28. using string_t = typename BasicJsonType::string_t;
  29. using binary_t = typename BasicJsonType::binary_t;
  30. /*!
  31. @brief a null value was read
  32. @return whether parsing should proceed
  33. */
  34. virtual bool null() = 0;
  35. /*!
  36. @brief a boolean value was read
  37. @param[in] val boolean value
  38. @return whether parsing should proceed
  39. */
  40. virtual bool boolean(bool val) = 0;
  41. /*!
  42. @brief an integer number was read
  43. @param[in] val integer value
  44. @return whether parsing should proceed
  45. */
  46. virtual bool number_integer(number_integer_t val) = 0;
  47. /*!
  48. @brief an unsigned integer number was read
  49. @param[in] val unsigned integer value
  50. @return whether parsing should proceed
  51. */
  52. virtual bool number_unsigned(number_unsigned_t val) = 0;
  53. /*!
  54. @brief an floating-point number was read
  55. @param[in] val floating-point value
  56. @param[in] s raw token value
  57. @return whether parsing should proceed
  58. */
  59. virtual bool number_float(number_float_t val, const string_t& s) = 0;
  60. /*!
  61. @brief a string was read
  62. @param[in] val string value
  63. @return whether parsing should proceed
  64. @note It is safe to move the passed string.
  65. */
  66. virtual bool string(string_t& val) = 0;
  67. /*!
  68. @brief a binary string was read
  69. @param[in] val binary value
  70. @return whether parsing should proceed
  71. @note It is safe to move the passed binary.
  72. */
  73. virtual bool binary(binary_t& val) = 0;
  74. /*!
  75. @brief the beginning of an object was read
  76. @param[in] elements number of object elements or -1 if unknown
  77. @return whether parsing should proceed
  78. @note binary formats may report the number of elements
  79. */
  80. virtual bool start_object(std::size_t elements) = 0;
  81. /*!
  82. @brief an object key was read
  83. @param[in] val object key
  84. @return whether parsing should proceed
  85. @note It is safe to move the passed string.
  86. */
  87. virtual bool key(string_t& val) = 0;
  88. /*!
  89. @brief the end of an object was read
  90. @return whether parsing should proceed
  91. */
  92. virtual bool end_object() = 0;
  93. /*!
  94. @brief the beginning of an array was read
  95. @param[in] elements number of array elements or -1 if unknown
  96. @return whether parsing should proceed
  97. @note binary formats may report the number of elements
  98. */
  99. virtual bool start_array(std::size_t elements) = 0;
  100. /*!
  101. @brief the end of an array was read
  102. @return whether parsing should proceed
  103. */
  104. virtual bool end_array() = 0;
  105. /*!
  106. @brief a parse error occurred
  107. @param[in] position the position in the input where the error occurs
  108. @param[in] last_token the last read token
  109. @param[in] ex an exception object describing the error
  110. @return whether parsing should proceed (must return false)
  111. */
  112. virtual bool parse_error(std::size_t position,
  113. const std::string& last_token,
  114. const detail::exception& ex) = 0;
  115. virtual ~json_sax() = default;
  116. };
  117. namespace detail
  118. {
  119. /*!
  120. @brief SAX implementation to create a JSON value from SAX events
  121. This class implements the @ref json_sax interface and processes the SAX events
  122. to create a JSON value which makes it basically a DOM parser. The structure or
  123. hierarchy of the JSON value is managed by the stack `ref_stack` which contains
  124. a pointer to the respective array or object for each recursion depth.
  125. After successful parsing, the value that is passed by reference to the
  126. constructor contains the parsed value.
  127. @tparam BasicJsonType the JSON type
  128. */
  129. template<typename BasicJsonType>
  130. class json_sax_dom_parser
  131. {
  132. public:
  133. using number_integer_t = typename BasicJsonType::number_integer_t;
  134. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  135. using number_float_t = typename BasicJsonType::number_float_t;
  136. using string_t = typename BasicJsonType::string_t;
  137. using binary_t = typename BasicJsonType::binary_t;
  138. /*!
  139. @param[in, out] r reference to a JSON value that is manipulated while
  140. parsing
  141. @param[in] allow_exceptions_ whether parse errors yield exceptions
  142. */
  143. explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
  144. : root(r), allow_exceptions(allow_exceptions_)
  145. {}
  146. // make class move-only
  147. json_sax_dom_parser(const json_sax_dom_parser&) = delete;
  148. json_sax_dom_parser(json_sax_dom_parser&&) = default;
  149. json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete;
  150. json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default;
  151. ~json_sax_dom_parser() = default;
  152. bool null()
  153. {
  154. handle_value(nullptr);
  155. return true;
  156. }
  157. bool boolean(bool val)
  158. {
  159. handle_value(val);
  160. return true;
  161. }
  162. bool number_integer(number_integer_t val)
  163. {
  164. handle_value(val);
  165. return true;
  166. }
  167. bool number_unsigned(number_unsigned_t val)
  168. {
  169. handle_value(val);
  170. return true;
  171. }
  172. bool number_float(number_float_t val, const string_t& /*unused*/)
  173. {
  174. handle_value(val);
  175. return true;
  176. }
  177. bool string(string_t& val)
  178. {
  179. handle_value(val);
  180. return true;
  181. }
  182. bool binary(binary_t& val)
  183. {
  184. handle_binary(val);
  185. return true;
  186. }
  187. bool start_object(std::size_t len)
  188. {
  189. ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
  190. if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  191. {
  192. JSON_THROW(out_of_range::create(408,
  193. "excessive object size: " + std::to_string(len)));
  194. }
  195. return true;
  196. }
  197. bool key(string_t& val)
  198. {
  199. // add null at given key and store the reference for later
  200. object_element = &(ref_stack.back()->m_value.object->operator[](val));
  201. return true;
  202. }
  203. bool end_object()
  204. {
  205. ref_stack.pop_back();
  206. return true;
  207. }
  208. bool start_array(std::size_t len)
  209. {
  210. ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
  211. if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  212. {
  213. JSON_THROW(out_of_range::create(408,
  214. "excessive array size: " + std::to_string(len)));
  215. }
  216. return true;
  217. }
  218. bool end_array()
  219. {
  220. ref_stack.pop_back();
  221. return true;
  222. }
  223. bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
  224. const detail::exception& ex)
  225. {
  226. errored = true;
  227. if (allow_exceptions)
  228. {
  229. // determine the proper exception type from the id
  230. switch ((ex.id / 100) % 100)
  231. {
  232. case 1:
  233. JSON_THROW(*static_cast<const detail::parse_error*>(&ex));
  234. case 4:
  235. JSON_THROW(*static_cast<const detail::out_of_range*>(&ex));
  236. // LCOV_EXCL_START
  237. case 2:
  238. JSON_THROW(*static_cast<const detail::invalid_iterator*>(&ex));
  239. case 3:
  240. JSON_THROW(*static_cast<const detail::type_error*>(&ex));
  241. case 5:
  242. JSON_THROW(*static_cast<const detail::other_error*>(&ex));
  243. default:
  244. assert(false);
  245. // LCOV_EXCL_STOP
  246. }
  247. }
  248. return false;
  249. }
  250. constexpr bool is_errored() const
  251. {
  252. return errored;
  253. }
  254. private:
  255. /*!
  256. @invariant If the ref stack is empty, then the passed value will be the new
  257. root.
  258. @invariant If the ref stack contains a value, then it is an array or an
  259. object to which we can add elements
  260. */
  261. template<typename Value>
  262. JSON_HEDLEY_RETURNS_NON_NULL
  263. BasicJsonType* handle_value(Value&& v)
  264. {
  265. if (ref_stack.empty())
  266. {
  267. root = BasicJsonType(std::forward<Value>(v));
  268. return &root;
  269. }
  270. assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
  271. if (ref_stack.back()->is_array())
  272. {
  273. ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v));
  274. return &(ref_stack.back()->m_value.array->back());
  275. }
  276. assert(ref_stack.back()->is_object());
  277. assert(object_element);
  278. *object_element = BasicJsonType(std::forward<Value>(v));
  279. return object_element;
  280. }
  281. /*!
  282. @invariant If the ref stack is empty, then the passed value will be the new
  283. root.
  284. @invariant If the ref stack contains a value, then it is an array or an
  285. object to which we can add elements
  286. */
  287. template<typename BinaryValue>
  288. JSON_HEDLEY_RETURNS_NON_NULL
  289. BasicJsonType* handle_binary(BinaryValue&& v)
  290. {
  291. if (ref_stack.empty())
  292. {
  293. root = BasicJsonType::binary_array(std::forward<BinaryValue>(v));
  294. return &root;
  295. }
  296. assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
  297. if (ref_stack.back()->is_array())
  298. {
  299. ref_stack.back()->m_value.array->emplace_back(BasicJsonType::binary_array(std::forward<BinaryValue>(v)));
  300. return &(ref_stack.back()->m_value.array->back());
  301. }
  302. assert(ref_stack.back()->is_object());
  303. assert(object_element);
  304. *object_element = BasicJsonType::binary_array(std::forward<BinaryValue>(v));
  305. return object_element;
  306. }
  307. /// the parsed JSON value
  308. BasicJsonType& root;
  309. /// stack to model hierarchy of values
  310. std::vector<BasicJsonType*> ref_stack {};
  311. /// helper to hold the reference for the next object element
  312. BasicJsonType* object_element = nullptr;
  313. /// whether a syntax error occurred
  314. bool errored = false;
  315. /// whether to throw exceptions in case of errors
  316. const bool allow_exceptions = true;
  317. };
  318. template<typename BasicJsonType>
  319. class json_sax_dom_callback_parser
  320. {
  321. public:
  322. using number_integer_t = typename BasicJsonType::number_integer_t;
  323. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  324. using number_float_t = typename BasicJsonType::number_float_t;
  325. using string_t = typename BasicJsonType::string_t;
  326. using binary_t = typename BasicJsonType::binary_t;
  327. using parser_callback_t = typename BasicJsonType::parser_callback_t;
  328. using parse_event_t = typename BasicJsonType::parse_event_t;
  329. json_sax_dom_callback_parser(BasicJsonType& r,
  330. const parser_callback_t cb,
  331. const bool allow_exceptions_ = true)
  332. : root(r), callback(cb), allow_exceptions(allow_exceptions_)
  333. {
  334. keep_stack.push_back(true);
  335. }
  336. // make class move-only
  337. json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete;
  338. json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default;
  339. json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete;
  340. json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default;
  341. ~json_sax_dom_callback_parser() = default;
  342. bool null()
  343. {
  344. handle_value(nullptr);
  345. return true;
  346. }
  347. bool boolean(bool val)
  348. {
  349. handle_value(val);
  350. return true;
  351. }
  352. bool number_integer(number_integer_t val)
  353. {
  354. handle_value(val);
  355. return true;
  356. }
  357. bool number_unsigned(number_unsigned_t val)
  358. {
  359. handle_value(val);
  360. return true;
  361. }
  362. bool number_float(number_float_t val, const string_t& /*unused*/)
  363. {
  364. handle_value(val);
  365. return true;
  366. }
  367. bool string(string_t& val)
  368. {
  369. handle_value(val);
  370. return true;
  371. }
  372. bool binary(binary_t& val)
  373. {
  374. handle_value(val);
  375. return true;
  376. }
  377. bool start_object(std::size_t len)
  378. {
  379. // check callback for object start
  380. const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded);
  381. keep_stack.push_back(keep);
  382. auto val = handle_value(BasicJsonType::value_t::object, true);
  383. ref_stack.push_back(val.second);
  384. // check object limit
  385. if (ref_stack.back() and JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  386. {
  387. JSON_THROW(out_of_range::create(408, "excessive object size: " + std::to_string(len)));
  388. }
  389. return true;
  390. }
  391. bool key(string_t& val)
  392. {
  393. BasicJsonType k = BasicJsonType(val);
  394. // check callback for key
  395. const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k);
  396. key_keep_stack.push_back(keep);
  397. // add discarded value at given key and store the reference for later
  398. if (keep and ref_stack.back())
  399. {
  400. object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded);
  401. }
  402. return true;
  403. }
  404. bool end_object()
  405. {
  406. if (ref_stack.back() and not callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back()))
  407. {
  408. // discard object
  409. *ref_stack.back() = discarded;
  410. }
  411. assert(not ref_stack.empty());
  412. assert(not keep_stack.empty());
  413. ref_stack.pop_back();
  414. keep_stack.pop_back();
  415. if (not ref_stack.empty() and ref_stack.back() and ref_stack.back()->is_object())
  416. {
  417. // remove discarded value
  418. for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it)
  419. {
  420. if (it->is_discarded())
  421. {
  422. ref_stack.back()->erase(it);
  423. break;
  424. }
  425. }
  426. }
  427. return true;
  428. }
  429. bool start_array(std::size_t len)
  430. {
  431. const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded);
  432. keep_stack.push_back(keep);
  433. auto val = handle_value(BasicJsonType::value_t::array, true);
  434. ref_stack.push_back(val.second);
  435. // check array limit
  436. if (ref_stack.back() and JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size()))
  437. {
  438. JSON_THROW(out_of_range::create(408, "excessive array size: " + std::to_string(len)));
  439. }
  440. return true;
  441. }
  442. bool end_array()
  443. {
  444. bool keep = true;
  445. if (ref_stack.back())
  446. {
  447. keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back());
  448. if (not keep)
  449. {
  450. // discard array
  451. *ref_stack.back() = discarded;
  452. }
  453. }
  454. assert(not ref_stack.empty());
  455. assert(not keep_stack.empty());
  456. ref_stack.pop_back();
  457. keep_stack.pop_back();
  458. // remove discarded value
  459. if (not keep and not ref_stack.empty() and ref_stack.back()->is_array())
  460. {
  461. ref_stack.back()->m_value.array->pop_back();
  462. }
  463. return true;
  464. }
  465. bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
  466. const detail::exception& ex)
  467. {
  468. errored = true;
  469. if (allow_exceptions)
  470. {
  471. // determine the proper exception type from the id
  472. switch ((ex.id / 100) % 100)
  473. {
  474. case 1:
  475. JSON_THROW(*static_cast<const detail::parse_error*>(&ex));
  476. case 4:
  477. JSON_THROW(*static_cast<const detail::out_of_range*>(&ex));
  478. // LCOV_EXCL_START
  479. case 2:
  480. JSON_THROW(*static_cast<const detail::invalid_iterator*>(&ex));
  481. case 3:
  482. JSON_THROW(*static_cast<const detail::type_error*>(&ex));
  483. case 5:
  484. JSON_THROW(*static_cast<const detail::other_error*>(&ex));
  485. default:
  486. assert(false);
  487. // LCOV_EXCL_STOP
  488. }
  489. }
  490. return false;
  491. }
  492. constexpr bool is_errored() const
  493. {
  494. return errored;
  495. }
  496. private:
  497. /*!
  498. @param[in] v value to add to the JSON value we build during parsing
  499. @param[in] skip_callback whether we should skip calling the callback
  500. function; this is required after start_array() and
  501. start_object() SAX events, because otherwise we would call the
  502. callback function with an empty array or object, respectively.
  503. @invariant If the ref stack is empty, then the passed value will be the new
  504. root.
  505. @invariant If the ref stack contains a value, then it is an array or an
  506. object to which we can add elements
  507. @return pair of boolean (whether value should be kept) and pointer (to the
  508. passed value in the ref_stack hierarchy; nullptr if not kept)
  509. */
  510. template<typename Value>
  511. std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false)
  512. {
  513. assert(not keep_stack.empty());
  514. // do not handle this value if we know it would be added to a discarded
  515. // container
  516. if (not keep_stack.back())
  517. {
  518. return {false, nullptr};
  519. }
  520. // create value
  521. auto value = BasicJsonType(std::forward<Value>(v));
  522. // check callback
  523. const bool keep = skip_callback or callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value);
  524. // do not handle this value if we just learnt it shall be discarded
  525. if (not keep)
  526. {
  527. return {false, nullptr};
  528. }
  529. if (ref_stack.empty())
  530. {
  531. root = std::move(value);
  532. return {true, &root};
  533. }
  534. // skip this value if we already decided to skip the parent
  535. // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360)
  536. if (not ref_stack.back())
  537. {
  538. return {false, nullptr};
  539. }
  540. // we now only expect arrays and objects
  541. assert(ref_stack.back()->is_array() or ref_stack.back()->is_object());
  542. // array
  543. if (ref_stack.back()->is_array())
  544. {
  545. ref_stack.back()->m_value.array->push_back(std::move(value));
  546. return {true, &(ref_stack.back()->m_value.array->back())};
  547. }
  548. // object
  549. assert(ref_stack.back()->is_object());
  550. // check if we should store an element for the current key
  551. assert(not key_keep_stack.empty());
  552. const bool store_element = key_keep_stack.back();
  553. key_keep_stack.pop_back();
  554. if (not store_element)
  555. {
  556. return {false, nullptr};
  557. }
  558. assert(object_element);
  559. *object_element = std::move(value);
  560. return {true, object_element};
  561. }
  562. /// the parsed JSON value
  563. BasicJsonType& root;
  564. /// stack to model hierarchy of values
  565. std::vector<BasicJsonType*> ref_stack {};
  566. /// stack to manage which values to keep
  567. std::vector<bool> keep_stack {};
  568. /// stack to manage which object keys to keep
  569. std::vector<bool> key_keep_stack {};
  570. /// helper to hold the reference for the next object element
  571. BasicJsonType* object_element = nullptr;
  572. /// whether a syntax error occurred
  573. bool errored = false;
  574. /// callback function
  575. const parser_callback_t callback = nullptr;
  576. /// whether to throw exceptions in case of errors
  577. const bool allow_exceptions = true;
  578. /// a discarded value for the callback
  579. BasicJsonType discarded = BasicJsonType::value_t::discarded;
  580. };
  581. template<typename BasicJsonType>
  582. class json_sax_acceptor
  583. {
  584. public:
  585. using number_integer_t = typename BasicJsonType::number_integer_t;
  586. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  587. using number_float_t = typename BasicJsonType::number_float_t;
  588. using string_t = typename BasicJsonType::string_t;
  589. using binary_t = typename BasicJsonType::binary_t;
  590. bool null()
  591. {
  592. return true;
  593. }
  594. bool boolean(bool /*unused*/)
  595. {
  596. return true;
  597. }
  598. bool number_integer(number_integer_t /*unused*/)
  599. {
  600. return true;
  601. }
  602. bool number_unsigned(number_unsigned_t /*unused*/)
  603. {
  604. return true;
  605. }
  606. bool number_float(number_float_t /*unused*/, const string_t& /*unused*/)
  607. {
  608. return true;
  609. }
  610. bool string(string_t& /*unused*/)
  611. {
  612. return true;
  613. }
  614. bool binary(binary_t& /*unused*/)
  615. {
  616. return true;
  617. }
  618. bool start_object(std::size_t /*unused*/ = std::size_t(-1))
  619. {
  620. return true;
  621. }
  622. bool key(string_t& /*unused*/)
  623. {
  624. return true;
  625. }
  626. bool end_object()
  627. {
  628. return true;
  629. }
  630. bool start_array(std::size_t /*unused*/ = std::size_t(-1))
  631. {
  632. return true;
  633. }
  634. bool end_array()
  635. {
  636. return true;
  637. }
  638. bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/)
  639. {
  640. return false;
  641. }
  642. };
  643. } // namespace detail
  644. } // namespace nlohmann