input_adapters.hpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. #pragma once
  2. #include <array> // array
  3. #include <cassert> // assert
  4. #include <cstddef> // size_t
  5. #include <cstdio> //FILE *
  6. #include <cstring> // strlen
  7. #include <istream> // istream
  8. #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
  9. #include <memory> // shared_ptr, make_shared, addressof
  10. #include <numeric> // accumulate
  11. #include <string> // string, char_traits
  12. #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
  13. #include <utility> // pair, declval
  14. #include <nlohmann/detail/iterators/iterator_traits.hpp>
  15. #include <nlohmann/detail/macro_scope.hpp>
  16. namespace nlohmann
  17. {
  18. namespace detail
  19. {
  20. /// the supported input formats
  21. enum class input_format_t { json, cbor, msgpack, ubjson, bson };
  22. ////////////////////
  23. // input adapters //
  24. ////////////////////
  25. /*!
  26. Input adapter for stdio file access. This adapter read only 1 byte and do not use any
  27. buffer. This adapter is a very low level adapter.
  28. */
  29. class file_input_adapter
  30. {
  31. public:
  32. JSON_HEDLEY_NON_NULL(2)
  33. explicit file_input_adapter(std::FILE* f) noexcept
  34. : m_file(f)
  35. {}
  36. // make class move-only
  37. file_input_adapter(const file_input_adapter&) = delete;
  38. file_input_adapter(file_input_adapter&&) = default;
  39. file_input_adapter& operator=(const file_input_adapter&) = delete;
  40. file_input_adapter& operator=(file_input_adapter&&) = delete;
  41. std::char_traits<char>::int_type get_character() noexcept
  42. {
  43. return std::fgetc(m_file);
  44. }
  45. private:
  46. /// the file pointer to read from
  47. std::FILE* m_file;
  48. };
  49. /*!
  50. Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
  51. beginning of input. Does not support changing the underlying std::streambuf
  52. in mid-input. Maintains underlying std::istream and std::streambuf to support
  53. subsequent use of standard std::istream operations to process any input
  54. characters following those used in parsing the JSON input. Clears the
  55. std::istream flags; any input errors (e.g., EOF) will be detected by the first
  56. subsequent call for input from the std::istream.
  57. */
  58. class input_stream_adapter
  59. {
  60. public:
  61. ~input_stream_adapter()
  62. {
  63. // clear stream flags; we use underlying streambuf I/O, do not
  64. // maintain ifstream flags, except eof
  65. if (is)
  66. {
  67. is->clear(is->rdstate() & std::ios::eofbit);
  68. }
  69. }
  70. explicit input_stream_adapter(std::istream& i)
  71. : is(&i), sb(i.rdbuf())
  72. {}
  73. // delete because of pointer members
  74. input_stream_adapter(const input_stream_adapter&) = delete;
  75. input_stream_adapter& operator=(input_stream_adapter&) = delete;
  76. input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
  77. input_stream_adapter(input_stream_adapter&& rhs) : is(rhs.is), sb(rhs.sb)
  78. {
  79. rhs.is = nullptr;
  80. rhs.sb = nullptr;
  81. }
  82. // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
  83. // ensure that std::char_traits<char>::eof() and the character 0xFF do not
  84. // end up as the same value, eg. 0xFFFFFFFF.
  85. std::char_traits<char>::int_type get_character()
  86. {
  87. auto res = sb->sbumpc();
  88. // set eof manually, as we don't use the istream interface.
  89. if (res == EOF)
  90. {
  91. is->clear(is->rdstate() | std::ios::eofbit);
  92. }
  93. return res;
  94. }
  95. private:
  96. /// the associated input stream
  97. std::istream* is = nullptr;
  98. std::streambuf* sb = nullptr;
  99. };
  100. /// input adapter for buffer input
  101. class input_buffer_adapter
  102. {
  103. public:
  104. input_buffer_adapter(const char* b, const std::size_t l) noexcept
  105. : cursor(b), limit(b == nullptr ? nullptr : (b + l))
  106. {}
  107. // delete because of pointer members
  108. input_buffer_adapter(const input_buffer_adapter&) = delete;
  109. input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
  110. input_buffer_adapter(input_buffer_adapter&&) = default;
  111. input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
  112. std::char_traits<char>::int_type get_character() noexcept
  113. {
  114. if (JSON_HEDLEY_LIKELY(cursor < limit))
  115. {
  116. assert(cursor != nullptr and limit != nullptr);
  117. return std::char_traits<char>::to_int_type(*(cursor++));
  118. }
  119. return std::char_traits<char>::eof();
  120. }
  121. private:
  122. /// pointer to the current character
  123. const char* cursor;
  124. /// pointer past the last character
  125. const char* const limit;
  126. };
  127. template<typename WideStringType, size_t T>
  128. struct wide_string_input_helper
  129. {
  130. // UTF-32
  131. static void fill_buffer(const WideStringType& str,
  132. size_t& current_wchar,
  133. std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
  134. size_t& utf8_bytes_index,
  135. size_t& utf8_bytes_filled)
  136. {
  137. utf8_bytes_index = 0;
  138. if (current_wchar == str.size())
  139. {
  140. utf8_bytes[0] = std::char_traits<char>::eof();
  141. utf8_bytes_filled = 1;
  142. }
  143. else
  144. {
  145. // get the current character
  146. const auto wc = static_cast<unsigned int>(str[current_wchar++]);
  147. // UTF-32 to UTF-8 encoding
  148. if (wc < 0x80)
  149. {
  150. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  151. utf8_bytes_filled = 1;
  152. }
  153. else if (wc <= 0x7FF)
  154. {
  155. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu));
  156. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
  157. utf8_bytes_filled = 2;
  158. }
  159. else if (wc <= 0xFFFF)
  160. {
  161. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu));
  162. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
  163. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
  164. utf8_bytes_filled = 3;
  165. }
  166. else if (wc <= 0x10FFFF)
  167. {
  168. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u));
  169. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu));
  170. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
  171. utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
  172. utf8_bytes_filled = 4;
  173. }
  174. else
  175. {
  176. // unknown character
  177. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  178. utf8_bytes_filled = 1;
  179. }
  180. }
  181. }
  182. };
  183. template<typename WideStringType>
  184. struct wide_string_input_helper<WideStringType, 2>
  185. {
  186. // UTF-16
  187. static void fill_buffer(const WideStringType& str,
  188. size_t& current_wchar,
  189. std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
  190. size_t& utf8_bytes_index,
  191. size_t& utf8_bytes_filled)
  192. {
  193. utf8_bytes_index = 0;
  194. if (current_wchar == str.size())
  195. {
  196. utf8_bytes[0] = std::char_traits<char>::eof();
  197. utf8_bytes_filled = 1;
  198. }
  199. else
  200. {
  201. // get the current character
  202. const auto wc = static_cast<unsigned int>(str[current_wchar++]);
  203. // UTF-16 to UTF-8 encoding
  204. if (wc < 0x80)
  205. {
  206. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  207. utf8_bytes_filled = 1;
  208. }
  209. else if (wc <= 0x7FF)
  210. {
  211. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
  212. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
  213. utf8_bytes_filled = 2;
  214. }
  215. else if (0xD800 > wc or wc >= 0xE000)
  216. {
  217. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
  218. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
  219. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
  220. utf8_bytes_filled = 3;
  221. }
  222. else
  223. {
  224. if (current_wchar < str.size())
  225. {
  226. const auto wc2 = static_cast<unsigned int>(str[current_wchar++]);
  227. const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
  228. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
  229. utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
  230. utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
  231. utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
  232. utf8_bytes_filled = 4;
  233. }
  234. else
  235. {
  236. // unknown character
  237. ++current_wchar;
  238. utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
  239. utf8_bytes_filled = 1;
  240. }
  241. }
  242. }
  243. }
  244. };
  245. template<typename WideStringType>
  246. class wide_string_input_adapter
  247. {
  248. public:
  249. explicit wide_string_input_adapter(const WideStringType& w) noexcept
  250. : str(w)
  251. {}
  252. std::char_traits<char>::int_type get_character() noexcept
  253. {
  254. // check if buffer needs to be filled
  255. if (utf8_bytes_index == utf8_bytes_filled)
  256. {
  257. fill_buffer<sizeof(typename WideStringType::value_type)>();
  258. assert(utf8_bytes_filled > 0);
  259. assert(utf8_bytes_index == 0);
  260. }
  261. // use buffer
  262. assert(utf8_bytes_filled > 0);
  263. assert(utf8_bytes_index < utf8_bytes_filled);
  264. return utf8_bytes[utf8_bytes_index++];
  265. }
  266. private:
  267. template<size_t T>
  268. void fill_buffer()
  269. {
  270. wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
  271. }
  272. /// the wstring to process
  273. const WideStringType& str;
  274. /// index of the current wchar in str
  275. std::size_t current_wchar = 0;
  276. /// a buffer for UTF-8 bytes
  277. std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
  278. /// index to the utf8_codes array for the next valid byte
  279. std::size_t utf8_bytes_index = 0;
  280. /// number of valid bytes in the utf8_codes array
  281. std::size_t utf8_bytes_filled = 0;
  282. };
  283. inline file_input_adapter input_adapter(std::FILE* file)
  284. {
  285. return file_input_adapter(file);
  286. }
  287. inline input_stream_adapter input_adapter(std::istream& stream)
  288. {
  289. return input_stream_adapter(stream);
  290. }
  291. inline input_stream_adapter input_adapter(std::istream&& stream)
  292. {
  293. return input_stream_adapter(stream);
  294. }
  295. template<typename CharT, typename SizeT,
  296. typename std::enable_if<
  297. std::is_pointer<CharT>::value and
  298. std::is_integral<typename std::remove_pointer<CharT>::type>::value and
  299. not std::is_same<SizeT, bool>::value and
  300. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  301. int>::type = 0>
  302. input_buffer_adapter input_adapter(CharT b, SizeT l)
  303. {
  304. return input_buffer_adapter(reinterpret_cast<const char*>(b), l);
  305. }
  306. template<typename CharT,
  307. typename std::enable_if<
  308. std::is_pointer<CharT>::value and
  309. std::is_integral<typename std::remove_pointer<CharT>::type>::value and
  310. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  311. int>::type = 0>
  312. input_buffer_adapter input_adapter(CharT b)
  313. {
  314. return input_adapter(reinterpret_cast<const char*>(b),
  315. std::strlen(reinterpret_cast<const char*>(b)));
  316. }
  317. template<class IteratorType,
  318. typename std::enable_if<
  319. std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
  320. int>::type = 0>
  321. input_buffer_adapter input_adapter(IteratorType first, IteratorType last)
  322. {
  323. #ifndef NDEBUG
  324. // assertion to check that the iterator range is indeed contiguous,
  325. // see https://stackoverflow.com/a/35008842/266378 for more discussion
  326. const auto is_contiguous = std::accumulate(
  327. first, last, std::pair<bool, int>(true, 0),
  328. [&first](std::pair<bool, int> res, decltype(*first) val)
  329. {
  330. res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
  331. return res;
  332. }).first;
  333. assert(is_contiguous);
  334. #endif
  335. // assertion to check that each element is 1 byte long
  336. static_assert(
  337. sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
  338. "each element in the iterator range must have the size of 1 byte");
  339. const auto len = static_cast<size_t>(std::distance(first, last));
  340. if (JSON_HEDLEY_LIKELY(len > 0))
  341. {
  342. // there is at least one element: use the address of first
  343. return input_buffer_adapter(reinterpret_cast<const char*>(&(*first)), len);
  344. }
  345. else
  346. {
  347. // the address of first cannot be used: use nullptr
  348. return input_buffer_adapter(nullptr, len);
  349. }
  350. }
  351. inline wide_string_input_adapter<std::wstring> input_adapter(const std::wstring& ws)
  352. {
  353. return wide_string_input_adapter<std::wstring>(ws);
  354. }
  355. inline wide_string_input_adapter<std::u16string> input_adapter(const std::u16string& ws)
  356. {
  357. return wide_string_input_adapter<std::u16string>(ws);
  358. }
  359. inline wide_string_input_adapter<std::u32string> input_adapter(const std::u32string& ws)
  360. {
  361. return wide_string_input_adapter<std::u32string>(ws);
  362. }
  363. template<class ContiguousContainer, typename
  364. std::enable_if<not std::is_pointer<ContiguousContainer>::value and
  365. std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
  366. int>::type = 0>
  367. input_buffer_adapter input_adapter(const ContiguousContainer& c)
  368. {
  369. return input_adapter(std::begin(c), std::end(c));
  370. }
  371. template<class T, std::size_t N>
  372. input_buffer_adapter input_adapter(T (&array)[N])
  373. {
  374. return input_adapter(std::begin(array), std::end(array));
  375. }
  376. // This class only handles inputs of input_buffer_adapter type.
  377. // It's required so that expressions like {ptr, len} can be implicitely casted
  378. // to the correct adapter.
  379. class span_input_adapter
  380. {
  381. public:
  382. template<typename CharT,
  383. typename std::enable_if<
  384. std::is_pointer<CharT>::value and
  385. std::is_integral<typename std::remove_pointer<CharT>::type>::value and
  386. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  387. int>::type = 0>
  388. span_input_adapter(CharT b, std::size_t l)
  389. : ia(reinterpret_cast<const char*>(b), l) {}
  390. template<typename CharT,
  391. typename std::enable_if<
  392. std::is_pointer<CharT>::value and
  393. std::is_integral<typename std::remove_pointer<CharT>::type>::value and
  394. sizeof(typename std::remove_pointer<CharT>::type) == 1,
  395. int>::type = 0>
  396. span_input_adapter(CharT b)
  397. : span_input_adapter(reinterpret_cast<const char*>(b),
  398. std::strlen(reinterpret_cast<const char*>(b))) {}
  399. template<class IteratorType,
  400. typename std::enable_if<
  401. std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
  402. int>::type = 0>
  403. span_input_adapter(IteratorType first, IteratorType last)
  404. : ia(input_adapter(first, last)) {}
  405. template<class T, std::size_t N>
  406. span_input_adapter(T (&array)[N])
  407. : span_input_adapter(std::begin(array), std::end(array)) {}
  408. /// input adapter for contiguous container
  409. template<class ContiguousContainer, typename
  410. std::enable_if<not std::is_pointer<ContiguousContainer>::value and
  411. std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
  412. int>::type = 0>
  413. span_input_adapter(const ContiguousContainer& c)
  414. : span_input_adapter(std::begin(c), std::end(c)) {}
  415. input_buffer_adapter&& get()
  416. {
  417. return std::move(ia);
  418. }
  419. private:
  420. input_buffer_adapter ia;
  421. };
  422. } // namespace detail
  423. } // namespace nlohmann