123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480 |
- #pragma once
- #include <array> // array
- #include <cassert> // assert
- #include <cstddef> // size_t
- #include <cstdio> //FILE *
- #include <cstring> // strlen
- #include <istream> // istream
- #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
- #include <memory> // shared_ptr, make_shared, addressof
- #include <numeric> // accumulate
- #include <string> // string, char_traits
- #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
- #include <utility> // pair, declval
- #include <nlohmann/detail/iterators/iterator_traits.hpp>
- #include <nlohmann/detail/macro_scope.hpp>
- namespace nlohmann
- {
- namespace detail
- {
- /// the supported input formats
- enum class input_format_t { json, cbor, msgpack, ubjson, bson };
- ////////////////////
- // input adapters //
- ////////////////////
- /*!
- Input adapter for stdio file access. This adapter read only 1 byte and do not use any
- buffer. This adapter is a very low level adapter.
- */
- class file_input_adapter
- {
- public:
- JSON_HEDLEY_NON_NULL(2)
- explicit file_input_adapter(std::FILE* f) noexcept
- : m_file(f)
- {}
- // make class move-only
- file_input_adapter(const file_input_adapter&) = delete;
- file_input_adapter(file_input_adapter&&) = default;
- file_input_adapter& operator=(const file_input_adapter&) = delete;
- file_input_adapter& operator=(file_input_adapter&&) = delete;
- std::char_traits<char>::int_type get_character() noexcept
- {
- return std::fgetc(m_file);
- }
- private:
- /// the file pointer to read from
- std::FILE* m_file;
- };
- /*!
- Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
- beginning of input. Does not support changing the underlying std::streambuf
- in mid-input. Maintains underlying std::istream and std::streambuf to support
- subsequent use of standard std::istream operations to process any input
- characters following those used in parsing the JSON input. Clears the
- std::istream flags; any input errors (e.g., EOF) will be detected by the first
- subsequent call for input from the std::istream.
- */
- class input_stream_adapter
- {
- public:
- ~input_stream_adapter()
- {
- // clear stream flags; we use underlying streambuf I/O, do not
- // maintain ifstream flags, except eof
- if (is)
- {
- is->clear(is->rdstate() & std::ios::eofbit);
- }
- }
- explicit input_stream_adapter(std::istream& i)
- : is(&i), sb(i.rdbuf())
- {}
- // delete because of pointer members
- input_stream_adapter(const input_stream_adapter&) = delete;
- input_stream_adapter& operator=(input_stream_adapter&) = delete;
- input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
- input_stream_adapter(input_stream_adapter&& rhs) : is(rhs.is), sb(rhs.sb)
- {
- rhs.is = nullptr;
- rhs.sb = nullptr;
- }
- // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
- // ensure that std::char_traits<char>::eof() and the character 0xFF do not
- // end up as the same value, eg. 0xFFFFFFFF.
- std::char_traits<char>::int_type get_character()
- {
- auto res = sb->sbumpc();
- // set eof manually, as we don't use the istream interface.
- if (res == EOF)
- {
- is->clear(is->rdstate() | std::ios::eofbit);
- }
- return res;
- }
- private:
- /// the associated input stream
- std::istream* is = nullptr;
- std::streambuf* sb = nullptr;
- };
- /// input adapter for buffer input
- class input_buffer_adapter
- {
- public:
- input_buffer_adapter(const char* b, const std::size_t l) noexcept
- : cursor(b), limit(b == nullptr ? nullptr : (b + l))
- {}
- // delete because of pointer members
- input_buffer_adapter(const input_buffer_adapter&) = delete;
- input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
- input_buffer_adapter(input_buffer_adapter&&) = default;
- input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
- std::char_traits<char>::int_type get_character() noexcept
- {
- if (JSON_HEDLEY_LIKELY(cursor < limit))
- {
- assert(cursor != nullptr and limit != nullptr);
- return std::char_traits<char>::to_int_type(*(cursor++));
- }
- return std::char_traits<char>::eof();
- }
- private:
- /// pointer to the current character
- const char* cursor;
- /// pointer past the last character
- const char* const limit;
- };
- template<typename WideStringType, size_t T>
- struct wide_string_input_helper
- {
- // UTF-32
- static void fill_buffer(const WideStringType& str,
- size_t& current_wchar,
- std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
- size_t& utf8_bytes_index,
- size_t& utf8_bytes_filled)
- {
- utf8_bytes_index = 0;
- if (current_wchar == str.size())
- {
- utf8_bytes[0] = std::char_traits<char>::eof();
- utf8_bytes_filled = 1;
- }
- else
- {
- // get the current character
- const auto wc = static_cast<unsigned int>(str[current_wchar++]);
- // UTF-32 to UTF-8 encoding
- if (wc < 0x80)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- else if (wc <= 0x7FF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 2;
- }
- else if (wc <= 0xFFFF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 3;
- }
- else if (wc <= 0x10FFFF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
- utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 4;
- }
- else
- {
- // unknown character
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- }
- }
- };
- template<typename WideStringType>
- struct wide_string_input_helper<WideStringType, 2>
- {
- // UTF-16
- static void fill_buffer(const WideStringType& str,
- size_t& current_wchar,
- std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
- size_t& utf8_bytes_index,
- size_t& utf8_bytes_filled)
- {
- utf8_bytes_index = 0;
- if (current_wchar == str.size())
- {
- utf8_bytes[0] = std::char_traits<char>::eof();
- utf8_bytes_filled = 1;
- }
- else
- {
- // get the current character
- const auto wc = static_cast<unsigned int>(str[current_wchar++]);
- // UTF-16 to UTF-8 encoding
- if (wc < 0x80)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- else if (wc <= 0x7FF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 2;
- }
- else if (0xD800 > wc or wc >= 0xE000)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 3;
- }
- else
- {
- if (current_wchar < str.size())
- {
- const auto wc2 = static_cast<unsigned int>(str[current_wchar++]);
- const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
- utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
- utf8_bytes_filled = 4;
- }
- else
- {
- // unknown character
- ++current_wchar;
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- }
- }
- }
- };
- template<typename WideStringType>
- class wide_string_input_adapter
- {
- public:
- explicit wide_string_input_adapter(const WideStringType& w) noexcept
- : str(w)
- {}
- std::char_traits<char>::int_type get_character() noexcept
- {
- // check if buffer needs to be filled
- if (utf8_bytes_index == utf8_bytes_filled)
- {
- fill_buffer<sizeof(typename WideStringType::value_type)>();
- assert(utf8_bytes_filled > 0);
- assert(utf8_bytes_index == 0);
- }
- // use buffer
- assert(utf8_bytes_filled > 0);
- assert(utf8_bytes_index < utf8_bytes_filled);
- return utf8_bytes[utf8_bytes_index++];
- }
- private:
- template<size_t T>
- void fill_buffer()
- {
- wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
- }
- /// the wstring to process
- const WideStringType& str;
- /// index of the current wchar in str
- std::size_t current_wchar = 0;
- /// a buffer for UTF-8 bytes
- std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
- /// index to the utf8_codes array for the next valid byte
- std::size_t utf8_bytes_index = 0;
- /// number of valid bytes in the utf8_codes array
- std::size_t utf8_bytes_filled = 0;
- };
- inline file_input_adapter input_adapter(std::FILE* file)
- {
- return file_input_adapter(file);
- }
- inline input_stream_adapter input_adapter(std::istream& stream)
- {
- return input_stream_adapter(stream);
- }
- inline input_stream_adapter input_adapter(std::istream&& stream)
- {
- return input_stream_adapter(stream);
- }
- template<typename CharT, typename SizeT,
- typename std::enable_if<
- std::is_pointer<CharT>::value and
- std::is_integral<typename std::remove_pointer<CharT>::type>::value and
- not std::is_same<SizeT, bool>::value and
- sizeof(typename std::remove_pointer<CharT>::type) == 1,
- int>::type = 0>
- input_buffer_adapter input_adapter(CharT b, SizeT l)
- {
- return input_buffer_adapter(reinterpret_cast<const char*>(b), l);
- }
- template<typename CharT,
- typename std::enable_if<
- std::is_pointer<CharT>::value and
- std::is_integral<typename std::remove_pointer<CharT>::type>::value and
- sizeof(typename std::remove_pointer<CharT>::type) == 1,
- int>::type = 0>
- input_buffer_adapter input_adapter(CharT b)
- {
- return input_adapter(reinterpret_cast<const char*>(b),
- std::strlen(reinterpret_cast<const char*>(b)));
- }
- template<class IteratorType,
- typename std::enable_if<
- std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
- int>::type = 0>
- input_buffer_adapter input_adapter(IteratorType first, IteratorType last)
- {
- #ifndef NDEBUG
- // assertion to check that the iterator range is indeed contiguous,
- // see https://stackoverflow.com/a/35008842/266378 for more discussion
- const auto is_contiguous = std::accumulate(
- first, last, std::pair<bool, int>(true, 0),
- [&first](std::pair<bool, int> res, decltype(*first) val)
- {
- res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
- return res;
- }).first;
- assert(is_contiguous);
- #endif
- // assertion to check that each element is 1 byte long
- static_assert(
- sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
- "each element in the iterator range must have the size of 1 byte");
- const auto len = static_cast<size_t>(std::distance(first, last));
- if (JSON_HEDLEY_LIKELY(len > 0))
- {
- // there is at least one element: use the address of first
- return input_buffer_adapter(reinterpret_cast<const char*>(&(*first)), len);
- }
- else
- {
- // the address of first cannot be used: use nullptr
- return input_buffer_adapter(nullptr, len);
- }
- }
- inline wide_string_input_adapter<std::wstring> input_adapter(const std::wstring& ws)
- {
- return wide_string_input_adapter<std::wstring>(ws);
- }
- inline wide_string_input_adapter<std::u16string> input_adapter(const std::u16string& ws)
- {
- return wide_string_input_adapter<std::u16string>(ws);
- }
- inline wide_string_input_adapter<std::u32string> input_adapter(const std::u32string& ws)
- {
- return wide_string_input_adapter<std::u32string>(ws);
- }
- template<class ContiguousContainer, typename
- std::enable_if<not std::is_pointer<ContiguousContainer>::value and
- std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
- int>::type = 0>
- input_buffer_adapter input_adapter(const ContiguousContainer& c)
- {
- return input_adapter(std::begin(c), std::end(c));
- }
- template<class T, std::size_t N>
- input_buffer_adapter input_adapter(T (&array)[N])
- {
- return input_adapter(std::begin(array), std::end(array));
- }
- // This class only handles inputs of input_buffer_adapter type.
- // It's required so that expressions like {ptr, len} can be implicitely casted
- // to the correct adapter.
- class span_input_adapter
- {
- public:
- template<typename CharT,
- typename std::enable_if<
- std::is_pointer<CharT>::value and
- std::is_integral<typename std::remove_pointer<CharT>::type>::value and
- sizeof(typename std::remove_pointer<CharT>::type) == 1,
- int>::type = 0>
- span_input_adapter(CharT b, std::size_t l)
- : ia(reinterpret_cast<const char*>(b), l) {}
- template<typename CharT,
- typename std::enable_if<
- std::is_pointer<CharT>::value and
- std::is_integral<typename std::remove_pointer<CharT>::type>::value and
- sizeof(typename std::remove_pointer<CharT>::type) == 1,
- int>::type = 0>
- span_input_adapter(CharT b)
- : span_input_adapter(reinterpret_cast<const char*>(b),
- std::strlen(reinterpret_cast<const char*>(b))) {}
- template<class IteratorType,
- typename std::enable_if<
- std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
- int>::type = 0>
- span_input_adapter(IteratorType first, IteratorType last)
- : ia(input_adapter(first, last)) {}
- template<class T, std::size_t N>
- span_input_adapter(T (&array)[N])
- : span_input_adapter(std::begin(array), std::end(array)) {}
- /// input adapter for contiguous container
- template<class ContiguousContainer, typename
- std::enable_if<not std::is_pointer<ContiguousContainer>::value and
- std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
- int>::type = 0>
- span_input_adapter(const ContiguousContainer& c)
- : span_input_adapter(std::begin(c), std::end(c)) {}
- input_buffer_adapter&& get()
- {
- return std::move(ia);
- }
- private:
- input_buffer_adapter ia;
- };
- } // namespace detail
- } // namespace nlohmann
|