| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- #pragma once
- #include <array> // array
- #include <cassert> // assert
- #include <cstddef> // size_t
- #include <cstdio> //FILE *
- #include <cstring> // strlen
- #include <istream> // istream
- #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
- #include <memory> // shared_ptr, make_shared, addressof
- #include <numeric> // accumulate
- #include <string> // string, char_traits
- #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
- #include <utility> // pair, declval
- #include <nlohmann/detail/iterators/iterator_traits.hpp>
- #include <nlohmann/detail/macro_scope.hpp>
- namespace nlohmann
- {
- namespace detail
- {
- /// the supported input formats
- enum class input_format_t { json, cbor, msgpack, ubjson, bson };
- ////////////////////
- // input adapters //
- ////////////////////
- /*!
- @brief abstract input adapter interface
- Produces a stream of std::char_traits<char>::int_type characters from a
- std::istream, a buffer, or some other input type. Accepts the return of
- exactly one non-EOF character for future input. The int_type characters
- returned consist of all valid char values as positive values (typically
- unsigned char), plus an EOF value outside that range, specified by the value
- of the function std::char_traits<char>::eof(). This value is typically -1, but
- could be any arbitrary value which is not a valid char value.
- */
- struct input_adapter_protocol
- {
- /// get a character [0,255] or std::char_traits<char>::eof().
- virtual std::char_traits<char>::int_type get_character() = 0;
- virtual ~input_adapter_protocol() = default;
- };
- /// a type to simplify interfaces
- using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
- /*!
- Input adapter for stdio file access. This adapter read only 1 byte and do not use any
- buffer. This adapter is a very low level adapter.
- */
- class file_input_adapter : public input_adapter_protocol
- {
- public:
- JSON_HEDLEY_NON_NULL(2)
- explicit file_input_adapter(std::FILE* f) noexcept
- : m_file(f)
- {}
- // make class move-only
- file_input_adapter(const file_input_adapter&) = delete;
- file_input_adapter(file_input_adapter&&) = default;
- file_input_adapter& operator=(const file_input_adapter&) = delete;
- file_input_adapter& operator=(file_input_adapter&&) = default;
- ~file_input_adapter() override = default;
- std::char_traits<char>::int_type get_character() noexcept override
- {
- return std::fgetc(m_file);
- }
- private:
- /// the file pointer to read from
- std::FILE* m_file;
- };
- /*!
- Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
- beginning of input. Does not support changing the underlying std::streambuf
- in mid-input. Maintains underlying std::istream and std::streambuf to support
- subsequent use of standard std::istream operations to process any input
- characters following those used in parsing the JSON input. Clears the
- std::istream flags; any input errors (e.g., EOF) will be detected by the first
- subsequent call for input from the std::istream.
- */
- class input_stream_adapter : public input_adapter_protocol
- {
- public:
- ~input_stream_adapter() override
- {
- // clear stream flags; we use underlying streambuf I/O, do not
- // maintain ifstream flags, except eof
- is.clear(is.rdstate() & std::ios::eofbit);
- }
- explicit input_stream_adapter(std::istream& i)
- : is(i), sb(*i.rdbuf())
- {}
- // delete because of pointer members
- input_stream_adapter(const input_stream_adapter&) = delete;
- input_stream_adapter& operator=(input_stream_adapter&) = delete;
- input_stream_adapter(input_stream_adapter&&) = delete;
- input_stream_adapter& operator=(input_stream_adapter&&) = delete;
- // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
- // ensure that std::char_traits<char>::eof() and the character 0xFF do not
- // end up as the same value, eg. 0xFFFFFFFF.
- std::char_traits<char>::int_type get_character() override
- {
- auto res = sb.sbumpc();
- // set eof manually, as we don't use the istream interface.
- if (res == EOF)
- {
- is.clear(is.rdstate() | std::ios::eofbit);
- }
- return res;
- }
- private:
- /// the associated input stream
- std::istream& is;
- std::streambuf& sb;
- };
- /// input adapter for buffer input
- class input_buffer_adapter : public input_adapter_protocol
- {
- public:
- input_buffer_adapter(const char* b, const std::size_t l) noexcept
- : cursor(b), limit(b == nullptr ? nullptr : (b + l))
- {}
- // delete because of pointer members
- input_buffer_adapter(const input_buffer_adapter&) = delete;
- input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
- input_buffer_adapter(input_buffer_adapter&&) = delete;
- input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
- ~input_buffer_adapter() override = default;
- std::char_traits<char>::int_type get_character() noexcept override
- {
- if (JSON_HEDLEY_LIKELY(cursor < limit))
- {
- assert(cursor != nullptr and limit != nullptr);
- return std::char_traits<char>::to_int_type(*(cursor++));
- }
- return std::char_traits<char>::eof();
- }
- private:
- /// pointer to the current character
- const char* cursor;
- /// pointer past the last character
- const char* const limit;
- };
- template<typename WideStringType, size_t T>
- struct wide_string_input_helper
- {
- // UTF-32
- static void fill_buffer(const WideStringType& str,
- size_t& current_wchar,
- std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
- size_t& utf8_bytes_index,
- size_t& utf8_bytes_filled)
- {
- utf8_bytes_index = 0;
- if (current_wchar == str.size())
- {
- utf8_bytes[0] = std::char_traits<char>::eof();
- utf8_bytes_filled = 1;
- }
- else
- {
- // get the current character
- const auto wc = static_cast<unsigned int>(str[current_wchar++]);
- // UTF-32 to UTF-8 encoding
- if (wc < 0x80)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- else if (wc <= 0x7FF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 2;
- }
- else if (wc <= 0xFFFF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 3;
- }
- else if (wc <= 0x10FFFF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
- utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 4;
- }
- else
- {
- // unknown character
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- }
- }
- };
- template<typename WideStringType>
- struct wide_string_input_helper<WideStringType, 2>
- {
- // UTF-16
- static void fill_buffer(const WideStringType& str,
- size_t& current_wchar,
- std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
- size_t& utf8_bytes_index,
- size_t& utf8_bytes_filled)
- {
- utf8_bytes_index = 0;
- if (current_wchar == str.size())
- {
- utf8_bytes[0] = std::char_traits<char>::eof();
- utf8_bytes_filled = 1;
- }
- else
- {
- // get the current character
- const auto wc = static_cast<unsigned int>(str[current_wchar++]);
- // UTF-16 to UTF-8 encoding
- if (wc < 0x80)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- else if (wc <= 0x7FF)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 2;
- }
- else if (0xD800 > wc or wc >= 0xE000)
- {
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
- utf8_bytes_filled = 3;
- }
- else
- {
- if (current_wchar < str.size())
- {
- const auto wc2 = static_cast<unsigned int>(str[current_wchar++]);
- const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
- utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
- utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
- utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
- utf8_bytes_filled = 4;
- }
- else
- {
- // unknown character
- ++current_wchar;
- utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
- utf8_bytes_filled = 1;
- }
- }
- }
- }
- };
- template<typename WideStringType>
- class wide_string_input_adapter : public input_adapter_protocol
- {
- public:
- explicit wide_string_input_adapter(const WideStringType& w) noexcept
- : str(w)
- {}
- std::char_traits<char>::int_type get_character() noexcept override
- {
- // check if buffer needs to be filled
- if (utf8_bytes_index == utf8_bytes_filled)
- {
- fill_buffer<sizeof(typename WideStringType::value_type)>();
- assert(utf8_bytes_filled > 0);
- assert(utf8_bytes_index == 0);
- }
- // use buffer
- assert(utf8_bytes_filled > 0);
- assert(utf8_bytes_index < utf8_bytes_filled);
- return utf8_bytes[utf8_bytes_index++];
- }
- private:
- template<size_t T>
- void fill_buffer()
- {
- wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
- }
- /// the wstring to process
- const WideStringType& str;
- /// index of the current wchar in str
- std::size_t current_wchar = 0;
- /// a buffer for UTF-8 bytes
- std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
- /// index to the utf8_codes array for the next valid byte
- std::size_t utf8_bytes_index = 0;
- /// number of valid bytes in the utf8_codes array
- std::size_t utf8_bytes_filled = 0;
- };
- class input_adapter
- {
- public:
- // native support
- JSON_HEDLEY_NON_NULL(2)
- input_adapter(std::FILE* file)
- : ia(std::make_shared<file_input_adapter>(file)) {}
- /// input adapter for input stream
- input_adapter(std::istream& i)
- : ia(std::make_shared<input_stream_adapter>(i)) {}
- /// input adapter for input stream
- input_adapter(std::istream&& i)
- : ia(std::make_shared<input_stream_adapter>(i)) {}
- input_adapter(const std::wstring& ws)
- : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {}
- input_adapter(const std::u16string& ws)
- : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {}
- input_adapter(const std::u32string& ws)
- : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {}
- /// input adapter for buffer
- template<typename CharT,
- typename std::enable_if<
- std::is_pointer<CharT>::value and
- std::is_integral<typename std::remove_pointer<CharT>::type>::value and
- sizeof(typename std::remove_pointer<CharT>::type) == 1,
- int>::type = 0>
- input_adapter(CharT b, std::size_t l)
- : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
- // derived support
- /// input adapter for string literal
- template<typename CharT,
- typename std::enable_if<
- std::is_pointer<CharT>::value and
- std::is_integral<typename std::remove_pointer<CharT>::type>::value and
- sizeof(typename std::remove_pointer<CharT>::type) == 1,
- int>::type = 0>
- input_adapter(CharT b)
- : input_adapter(reinterpret_cast<const char*>(b),
- std::strlen(reinterpret_cast<const char*>(b))) {}
- /// input adapter for iterator range with contiguous storage
- template<class IteratorType,
- typename std::enable_if<
- std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
- int>::type = 0>
- input_adapter(IteratorType first, IteratorType last)
- {
- #ifndef NDEBUG
- // assertion to check that the iterator range is indeed contiguous,
- // see https://stackoverflow.com/a/35008842/266378 for more discussion
- const auto is_contiguous = std::accumulate(
- first, last, std::pair<bool, int>(true, 0),
- [&first](std::pair<bool, int> res, decltype(*first) val)
- {
- res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
- return res;
- }).first;
- assert(is_contiguous);
- #endif
- // assertion to check that each element is 1 byte long
- static_assert(
- sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
- "each element in the iterator range must have the size of 1 byte");
- const auto len = static_cast<size_t>(std::distance(first, last));
- if (JSON_HEDLEY_LIKELY(len > 0))
- {
- // there is at least one element: use the address of first
- ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
- }
- else
- {
- // the address of first cannot be used: use nullptr
- ia = std::make_shared<input_buffer_adapter>(nullptr, len);
- }
- }
- /// input adapter for array
- template<class T, std::size_t N>
- input_adapter(T (&array)[N])
- : input_adapter(std::begin(array), std::end(array)) {}
- /// input adapter for contiguous container
- template<class ContiguousContainer, typename
- std::enable_if<not std::is_pointer<ContiguousContainer>::value and
- std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
- int>::type = 0>
- input_adapter(const ContiguousContainer& c)
- : input_adapter(std::begin(c), std::end(c)) {}
- operator input_adapter_t()
- {
- return ia;
- }
- private:
- /// the actual adapter
- input_adapter_t ia = nullptr;
- };
- } // namespace detail
- } // namespace nlohmann
|