parser.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. #pragma once
  2. #include <cassert> // assert
  3. #include <cmath> // isfinite
  4. #include <cstdint> // uint8_t
  5. #include <functional> // function
  6. #include <string> // string
  7. #include <utility> // move
  8. #include <vector> // vector
  9. #include <nlohmann/detail/exceptions.hpp>
  10. #include <nlohmann/detail/input/input_adapters.hpp>
  11. #include <nlohmann/detail/input/json_sax.hpp>
  12. #include <nlohmann/detail/input/lexer.hpp>
  13. #include <nlohmann/detail/macro_scope.hpp>
  14. #include <nlohmann/detail/meta/is_sax.hpp>
  15. #include <nlohmann/detail/value_t.hpp>
  16. namespace nlohmann
  17. {
  18. namespace detail
  19. {
  20. ////////////
  21. // parser //
  22. ////////////
  23. enum class parse_event_t : uint8_t
  24. {
  25. /// the parser read `{` and started to process a JSON object
  26. object_start,
  27. /// the parser read `}` and finished processing a JSON object
  28. object_end,
  29. /// the parser read `[` and started to process a JSON array
  30. array_start,
  31. /// the parser read `]` and finished processing a JSON array
  32. array_end,
  33. /// the parser read a key of a value in an object
  34. key,
  35. /// the parser finished reading a JSON value
  36. value
  37. };
  38. template<typename BasicJsonType>
  39. using parser_callback_t =
  40. std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
  41. /*!
  42. @brief syntax analysis
  43. This class implements a recursive descent parser.
  44. */
  45. template<typename BasicJsonType, typename InputAdapterType>
  46. class parser
  47. {
  48. using number_integer_t = typename BasicJsonType::number_integer_t;
  49. using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
  50. using number_float_t = typename BasicJsonType::number_float_t;
  51. using string_t = typename BasicJsonType::string_t;
  52. using lexer_t = lexer<BasicJsonType, InputAdapterType>;
  53. using token_type = typename lexer_t::token_type;
  54. public:
  55. /// a parser reading from an input adapter
  56. explicit parser(InputAdapterType&& adapter,
  57. const parser_callback_t<BasicJsonType> cb = nullptr,
  58. const bool allow_exceptions_ = true)
  59. : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
  60. {
  61. // read first token
  62. get_token();
  63. }
  64. /*!
  65. @brief public parser interface
  66. @param[in] strict whether to expect the last token to be EOF
  67. @param[in,out] result parsed JSON value
  68. @throw parse_error.101 in case of an unexpected token
  69. @throw parse_error.102 if to_unicode fails or surrogate error
  70. @throw parse_error.103 if to_unicode fails
  71. */
  72. void parse(const bool strict, BasicJsonType& result)
  73. {
  74. if (callback)
  75. {
  76. json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
  77. sax_parse_internal(&sdp);
  78. result.assert_invariant();
  79. // in strict mode, input must be completely read
  80. if (strict and (get_token() != token_type::end_of_input))
  81. {
  82. sdp.parse_error(m_lexer.get_position(),
  83. m_lexer.get_token_string(),
  84. parse_error::create(101, m_lexer.get_position(),
  85. exception_message(token_type::end_of_input, "value")));
  86. }
  87. // in case of an error, return discarded value
  88. if (sdp.is_errored())
  89. {
  90. result = value_t::discarded;
  91. return;
  92. }
  93. // set top-level value to null if it was discarded by the callback
  94. // function
  95. if (result.is_discarded())
  96. {
  97. result = nullptr;
  98. }
  99. }
  100. else
  101. {
  102. json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
  103. sax_parse_internal(&sdp);
  104. result.assert_invariant();
  105. // in strict mode, input must be completely read
  106. if (strict and (get_token() != token_type::end_of_input))
  107. {
  108. sdp.parse_error(m_lexer.get_position(),
  109. m_lexer.get_token_string(),
  110. parse_error::create(101, m_lexer.get_position(),
  111. exception_message(token_type::end_of_input, "value")));
  112. }
  113. // in case of an error, return discarded value
  114. if (sdp.is_errored())
  115. {
  116. result = value_t::discarded;
  117. return;
  118. }
  119. }
  120. }
  121. /*!
  122. @brief public accept interface
  123. @param[in] strict whether to expect the last token to be EOF
  124. @return whether the input is a proper JSON text
  125. */
  126. bool accept(const bool strict = true)
  127. {
  128. json_sax_acceptor<BasicJsonType> sax_acceptor;
  129. return sax_parse(&sax_acceptor, strict);
  130. }
  131. template <typename SAX>
  132. JSON_HEDLEY_NON_NULL(2)
  133. bool sax_parse(SAX* sax, const bool strict = true)
  134. {
  135. (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
  136. const bool result = sax_parse_internal(sax);
  137. // strict mode: next byte must be EOF
  138. if (result and strict and (get_token() != token_type::end_of_input))
  139. {
  140. return sax->parse_error(m_lexer.get_position(),
  141. m_lexer.get_token_string(),
  142. parse_error::create(101, m_lexer.get_position(),
  143. exception_message(token_type::end_of_input, "value")));
  144. }
  145. return result;
  146. }
  147. private:
  148. template <typename SAX>
  149. JSON_HEDLEY_NON_NULL(2)
  150. bool sax_parse_internal(SAX* sax)
  151. {
  152. // stack to remember the hierarchy of structured values we are parsing
  153. // true = array; false = object
  154. std::vector<bool> states;
  155. // value to avoid a goto (see comment where set to true)
  156. bool skip_to_state_evaluation = false;
  157. while (true)
  158. {
  159. if (not skip_to_state_evaluation)
  160. {
  161. // invariant: get_token() was called before each iteration
  162. switch (last_token)
  163. {
  164. case token_type::begin_object:
  165. {
  166. if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
  167. {
  168. return false;
  169. }
  170. // closing } -> we are done
  171. if (get_token() == token_type::end_object)
  172. {
  173. if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
  174. {
  175. return false;
  176. }
  177. break;
  178. }
  179. // parse key
  180. if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
  181. {
  182. return sax->parse_error(m_lexer.get_position(),
  183. m_lexer.get_token_string(),
  184. parse_error::create(101, m_lexer.get_position(),
  185. exception_message(token_type::value_string, "object key")));
  186. }
  187. if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
  188. {
  189. return false;
  190. }
  191. // parse separator (:)
  192. if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
  193. {
  194. return sax->parse_error(m_lexer.get_position(),
  195. m_lexer.get_token_string(),
  196. parse_error::create(101, m_lexer.get_position(),
  197. exception_message(token_type::name_separator, "object separator")));
  198. }
  199. // remember we are now inside an object
  200. states.push_back(false);
  201. // parse values
  202. get_token();
  203. continue;
  204. }
  205. case token_type::begin_array:
  206. {
  207. if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
  208. {
  209. return false;
  210. }
  211. // closing ] -> we are done
  212. if (get_token() == token_type::end_array)
  213. {
  214. if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
  215. {
  216. return false;
  217. }
  218. break;
  219. }
  220. // remember we are now inside an array
  221. states.push_back(true);
  222. // parse values (no need to call get_token)
  223. continue;
  224. }
  225. case token_type::value_float:
  226. {
  227. const auto res = m_lexer.get_number_float();
  228. if (JSON_HEDLEY_UNLIKELY(not std::isfinite(res)))
  229. {
  230. return sax->parse_error(m_lexer.get_position(),
  231. m_lexer.get_token_string(),
  232. out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
  233. }
  234. if (JSON_HEDLEY_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
  235. {
  236. return false;
  237. }
  238. break;
  239. }
  240. case token_type::literal_false:
  241. {
  242. if (JSON_HEDLEY_UNLIKELY(not sax->boolean(false)))
  243. {
  244. return false;
  245. }
  246. break;
  247. }
  248. case token_type::literal_null:
  249. {
  250. if (JSON_HEDLEY_UNLIKELY(not sax->null()))
  251. {
  252. return false;
  253. }
  254. break;
  255. }
  256. case token_type::literal_true:
  257. {
  258. if (JSON_HEDLEY_UNLIKELY(not sax->boolean(true)))
  259. {
  260. return false;
  261. }
  262. break;
  263. }
  264. case token_type::value_integer:
  265. {
  266. if (JSON_HEDLEY_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
  267. {
  268. return false;
  269. }
  270. break;
  271. }
  272. case token_type::value_string:
  273. {
  274. if (JSON_HEDLEY_UNLIKELY(not sax->string(m_lexer.get_string())))
  275. {
  276. return false;
  277. }
  278. break;
  279. }
  280. case token_type::value_unsigned:
  281. {
  282. if (JSON_HEDLEY_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
  283. {
  284. return false;
  285. }
  286. break;
  287. }
  288. case token_type::parse_error:
  289. {
  290. // using "uninitialized" to avoid "expected" message
  291. return sax->parse_error(m_lexer.get_position(),
  292. m_lexer.get_token_string(),
  293. parse_error::create(101, m_lexer.get_position(),
  294. exception_message(token_type::uninitialized, "value")));
  295. }
  296. default: // the last token was unexpected
  297. {
  298. return sax->parse_error(m_lexer.get_position(),
  299. m_lexer.get_token_string(),
  300. parse_error::create(101, m_lexer.get_position(),
  301. exception_message(token_type::literal_or_value, "value")));
  302. }
  303. }
  304. }
  305. else
  306. {
  307. skip_to_state_evaluation = false;
  308. }
  309. // we reached this line after we successfully parsed a value
  310. if (states.empty())
  311. {
  312. // empty stack: we reached the end of the hierarchy: done
  313. return true;
  314. }
  315. if (states.back()) // array
  316. {
  317. // comma -> next value
  318. if (get_token() == token_type::value_separator)
  319. {
  320. // parse a new value
  321. get_token();
  322. continue;
  323. }
  324. // closing ]
  325. if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
  326. {
  327. if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
  328. {
  329. return false;
  330. }
  331. // We are done with this array. Before we can parse a
  332. // new value, we need to evaluate the new state first.
  333. // By setting skip_to_state_evaluation to false, we
  334. // are effectively jumping to the beginning of this if.
  335. assert(not states.empty());
  336. states.pop_back();
  337. skip_to_state_evaluation = true;
  338. continue;
  339. }
  340. return sax->parse_error(m_lexer.get_position(),
  341. m_lexer.get_token_string(),
  342. parse_error::create(101, m_lexer.get_position(),
  343. exception_message(token_type::end_array, "array")));
  344. }
  345. else // object
  346. {
  347. // comma -> next value
  348. if (get_token() == token_type::value_separator)
  349. {
  350. // parse key
  351. if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
  352. {
  353. return sax->parse_error(m_lexer.get_position(),
  354. m_lexer.get_token_string(),
  355. parse_error::create(101, m_lexer.get_position(),
  356. exception_message(token_type::value_string, "object key")));
  357. }
  358. if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
  359. {
  360. return false;
  361. }
  362. // parse separator (:)
  363. if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
  364. {
  365. return sax->parse_error(m_lexer.get_position(),
  366. m_lexer.get_token_string(),
  367. parse_error::create(101, m_lexer.get_position(),
  368. exception_message(token_type::name_separator, "object separator")));
  369. }
  370. // parse values
  371. get_token();
  372. continue;
  373. }
  374. // closing }
  375. if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
  376. {
  377. if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
  378. {
  379. return false;
  380. }
  381. // We are done with this object. Before we can parse a
  382. // new value, we need to evaluate the new state first.
  383. // By setting skip_to_state_evaluation to false, we
  384. // are effectively jumping to the beginning of this if.
  385. assert(not states.empty());
  386. states.pop_back();
  387. skip_to_state_evaluation = true;
  388. continue;
  389. }
  390. return sax->parse_error(m_lexer.get_position(),
  391. m_lexer.get_token_string(),
  392. parse_error::create(101, m_lexer.get_position(),
  393. exception_message(token_type::end_object, "object")));
  394. }
  395. }
  396. }
  397. /// get next token from lexer
  398. token_type get_token()
  399. {
  400. return last_token = m_lexer.scan();
  401. }
  402. std::string exception_message(const token_type expected, const std::string& context)
  403. {
  404. std::string error_msg = "syntax error ";
  405. if (not context.empty())
  406. {
  407. error_msg += "while parsing " + context + " ";
  408. }
  409. error_msg += "- ";
  410. if (last_token == token_type::parse_error)
  411. {
  412. error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
  413. m_lexer.get_token_string() + "'";
  414. }
  415. else
  416. {
  417. error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
  418. }
  419. if (expected != token_type::uninitialized)
  420. {
  421. error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
  422. }
  423. return error_msg;
  424. }
  425. private:
  426. /// callback function
  427. const parser_callback_t<BasicJsonType> callback = nullptr;
  428. /// the type of the last read token
  429. token_type last_token = token_type::uninitialized;
  430. /// the lexer
  431. lexer_t m_lexer;
  432. /// whether to throw exceptions in case of errors
  433. const bool allow_exceptions = true;
  434. };
  435. } // namespace detail
  436. } // namespace nlohmann