Horizon
parser.hpp
1 #pragma once
2 
3 #include <cassert> // assert
4 #include <cmath> // isfinite
5 #include <cstdint> // uint8_t
6 #include <functional> // function
7 #include <string> // string
8 #include <utility> // move
9 #include <vector> // vector
10 
11 #include <nlohmann/detail/exceptions.hpp>
12 #include <nlohmann/detail/input/input_adapters.hpp>
13 #include <nlohmann/detail/input/json_sax.hpp>
14 #include <nlohmann/detail/input/lexer.hpp>
15 #include <nlohmann/detail/macro_scope.hpp>
16 #include <nlohmann/detail/meta/is_sax.hpp>
17 #include <nlohmann/detail/value_t.hpp>
18 
19 namespace nlohmann
20 {
21 namespace detail
22 {
24 // parser //
26 
32 template<typename BasicJsonType>
33 class parser
34 {
35  using number_integer_t = typename BasicJsonType::number_integer_t;
36  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
37  using number_float_t = typename BasicJsonType::number_float_t;
38  using string_t = typename BasicJsonType::string_t;
40  using token_type = typename lexer_t::token_type;
41 
42  public:
43  enum class parse_event_t : uint8_t
44  {
48  object_end,
52  array_end,
54  key,
56  value
57  };
58 
59  using parser_callback_t =
60  std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
61 
63  explicit parser(detail::input_adapter_t&& adapter,
64  const parser_callback_t cb = nullptr,
65  const bool allow_exceptions_ = true)
66  : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
67  {
68  // read first token
69  get_token();
70  }
71 
82  void parse(const bool strict, BasicJsonType& result)
83  {
84  if (callback)
85  {
86  json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
87  sax_parse_internal(&sdp);
88  result.assert_invariant();
89 
90  // in strict mode, input must be completely read
91  if (strict and (get_token() != token_type::end_of_input))
92  {
93  sdp.parse_error(m_lexer.get_position(),
94  m_lexer.get_token_string(),
95  parse_error::create(101, m_lexer.get_position(),
96  exception_message(token_type::end_of_input, "value")));
97  }
98 
99  // in case of an error, return discarded value
100  if (sdp.is_errored())
101  {
102  result = value_t::discarded;
103  return;
104  }
105 
106  // set top-level value to null if it was discarded by the callback
107  // function
108  if (result.is_discarded())
109  {
110  result = nullptr;
111  }
112  }
113  else
114  {
115  json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
116  sax_parse_internal(&sdp);
117  result.assert_invariant();
118 
119  // in strict mode, input must be completely read
120  if (strict and (get_token() != token_type::end_of_input))
121  {
122  sdp.parse_error(m_lexer.get_position(),
123  m_lexer.get_token_string(),
124  parse_error::create(101, m_lexer.get_position(),
125  exception_message(token_type::end_of_input, "value")));
126  }
127 
128  // in case of an error, return discarded value
129  if (sdp.is_errored())
130  {
131  result = value_t::discarded;
132  return;
133  }
134  }
135  }
136 
143  bool accept(const bool strict = true)
144  {
146  return sax_parse(&sax_acceptor, strict);
147  }
148 
149  template <typename SAX>
150  JSON_HEDLEY_NON_NULL(2)
151  bool sax_parse(SAX* sax, const bool strict = true)
152  {
154  const bool result = sax_parse_internal(sax);
155 
156  // strict mode: next byte must be EOF
157  if (result and strict and (get_token() != token_type::end_of_input))
158  {
159  return sax->parse_error(m_lexer.get_position(),
160  m_lexer.get_token_string(),
161  parse_error::create(101, m_lexer.get_position(),
162  exception_message(token_type::end_of_input, "value")));
163  }
164 
165  return result;
166  }
167 
168  private:
169  template <typename SAX>
170  JSON_HEDLEY_NON_NULL(2)
171  bool sax_parse_internal(SAX* sax)
172  {
173  // stack to remember the hierarchy of structured values we are parsing
174  // true = array; false = object
175  std::vector<bool> states;
176  // value to avoid a goto (see comment where set to true)
177  bool skip_to_state_evaluation = false;
178 
179  while (true)
180  {
181  if (not skip_to_state_evaluation)
182  {
183  // invariant: get_token() was called before each iteration
184  switch (last_token)
185  {
186  case token_type::begin_object:
187  {
188  if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
189  {
190  return false;
191  }
192 
193  // closing } -> we are done
194  if (get_token() == token_type::end_object)
195  {
196  if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
197  {
198  return false;
199  }
200  break;
201  }
202 
203  // parse key
204  if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
205  {
206  return sax->parse_error(m_lexer.get_position(),
207  m_lexer.get_token_string(),
208  parse_error::create(101, m_lexer.get_position(),
209  exception_message(token_type::value_string, "object key")));
210  }
211  if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
212  {
213  return false;
214  }
215 
216  // parse separator (:)
217  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
218  {
219  return sax->parse_error(m_lexer.get_position(),
220  m_lexer.get_token_string(),
221  parse_error::create(101, m_lexer.get_position(),
222  exception_message(token_type::name_separator, "object separator")));
223  }
224 
225  // remember we are now inside an object
226  states.push_back(false);
227 
228  // parse values
229  get_token();
230  continue;
231  }
232 
233  case token_type::begin_array:
234  {
235  if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
236  {
237  return false;
238  }
239 
240  // closing ] -> we are done
241  if (get_token() == token_type::end_array)
242  {
243  if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
244  {
245  return false;
246  }
247  break;
248  }
249 
250  // remember we are now inside an array
251  states.push_back(true);
252 
253  // parse values (no need to call get_token)
254  continue;
255  }
256 
257  case token_type::value_float:
258  {
259  const auto res = m_lexer.get_number_float();
260 
261  if (JSON_HEDLEY_UNLIKELY(not std::isfinite(res)))
262  {
263  return sax->parse_error(m_lexer.get_position(),
264  m_lexer.get_token_string(),
265  out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
266  }
267 
268  if (JSON_HEDLEY_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
269  {
270  return false;
271  }
272 
273  break;
274  }
275 
276  case token_type::literal_false:
277  {
278  if (JSON_HEDLEY_UNLIKELY(not sax->boolean(false)))
279  {
280  return false;
281  }
282  break;
283  }
284 
285  case token_type::literal_null:
286  {
287  if (JSON_HEDLEY_UNLIKELY(not sax->null()))
288  {
289  return false;
290  }
291  break;
292  }
293 
294  case token_type::literal_true:
295  {
296  if (JSON_HEDLEY_UNLIKELY(not sax->boolean(true)))
297  {
298  return false;
299  }
300  break;
301  }
302 
303  case token_type::value_integer:
304  {
305  if (JSON_HEDLEY_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
306  {
307  return false;
308  }
309  break;
310  }
311 
312  case token_type::value_string:
313  {
314  if (JSON_HEDLEY_UNLIKELY(not sax->string(m_lexer.get_string())))
315  {
316  return false;
317  }
318  break;
319  }
320 
321  case token_type::value_unsigned:
322  {
323  if (JSON_HEDLEY_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
324  {
325  return false;
326  }
327  break;
328  }
329 
330  case token_type::parse_error:
331  {
332  // using "uninitialized" to avoid "expected" message
333  return sax->parse_error(m_lexer.get_position(),
334  m_lexer.get_token_string(),
335  parse_error::create(101, m_lexer.get_position(),
336  exception_message(token_type::uninitialized, "value")));
337  }
338 
339  default: // the last token was unexpected
340  {
341  return sax->parse_error(m_lexer.get_position(),
342  m_lexer.get_token_string(),
343  parse_error::create(101, m_lexer.get_position(),
344  exception_message(token_type::literal_or_value, "value")));
345  }
346  }
347  }
348  else
349  {
350  skip_to_state_evaluation = false;
351  }
352 
353  // we reached this line after we successfully parsed a value
354  if (states.empty())
355  {
356  // empty stack: we reached the end of the hierarchy: done
357  return true;
358  }
359 
360  if (states.back()) // array
361  {
362  // comma -> next value
363  if (get_token() == token_type::value_separator)
364  {
365  // parse a new value
366  get_token();
367  continue;
368  }
369 
370  // closing ]
371  if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
372  {
373  if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
374  {
375  return false;
376  }
377 
378  // We are done with this array. Before we can parse a
379  // new value, we need to evaluate the new state first.
380  // By setting skip_to_state_evaluation to false, we
381  // are effectively jumping to the beginning of this if.
382  assert(not states.empty());
383  states.pop_back();
384  skip_to_state_evaluation = true;
385  continue;
386  }
387 
388  return sax->parse_error(m_lexer.get_position(),
389  m_lexer.get_token_string(),
390  parse_error::create(101, m_lexer.get_position(),
391  exception_message(token_type::end_array, "array")));
392  }
393  else // object
394  {
395  // comma -> next value
396  if (get_token() == token_type::value_separator)
397  {
398  // parse key
399  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
400  {
401  return sax->parse_error(m_lexer.get_position(),
402  m_lexer.get_token_string(),
403  parse_error::create(101, m_lexer.get_position(),
404  exception_message(token_type::value_string, "object key")));
405  }
406 
407  if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
408  {
409  return false;
410  }
411 
412  // parse separator (:)
413  if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
414  {
415  return sax->parse_error(m_lexer.get_position(),
416  m_lexer.get_token_string(),
417  parse_error::create(101, m_lexer.get_position(),
418  exception_message(token_type::name_separator, "object separator")));
419  }
420 
421  // parse values
422  get_token();
423  continue;
424  }
425 
426  // closing }
427  if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
428  {
429  if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
430  {
431  return false;
432  }
433 
434  // We are done with this object. Before we can parse a
435  // new value, we need to evaluate the new state first.
436  // By setting skip_to_state_evaluation to false, we
437  // are effectively jumping to the beginning of this if.
438  assert(not states.empty());
439  states.pop_back();
440  skip_to_state_evaluation = true;
441  continue;
442  }
443 
444  return sax->parse_error(m_lexer.get_position(),
445  m_lexer.get_token_string(),
446  parse_error::create(101, m_lexer.get_position(),
447  exception_message(token_type::end_object, "object")));
448  }
449  }
450  }
451 
453  token_type get_token()
454  {
455  return last_token = m_lexer.scan();
456  }
457 
458  std::string exception_message(const token_type expected, const std::string& context)
459  {
460  std::string error_msg = "syntax error ";
461 
462  if (not context.empty())
463  {
464  error_msg += "while parsing " + context + " ";
465  }
466 
467  error_msg += "- ";
468 
469  if (last_token == token_type::parse_error)
470  {
471  error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
472  m_lexer.get_token_string() + "'";
473  }
474  else
475  {
476  error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
477  }
478 
479  if (expected != token_type::uninitialized)
480  {
481  error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
482  }
483 
484  return error_msg;
485  }
486 
487  private:
489  const parser_callback_t callback = nullptr;
491  token_type last_token = token_type::uninitialized;
493  lexer_t m_lexer;
495  const bool allow_exceptions = true;
496 };
497 } // namespace detail
498 } // namespace nlohmann
Definition: json_sax.hpp:632
SAX implementation to create a JSON value from SAX events.
Definition: json_sax.hpp:146
lexical analysis
Definition: lexer.hpp:32
token_type
token types for the parser
Definition: lexer.hpp:41
std::string get_token_string() const
return the last read token (for errors only).
Definition: lexer.hpp:1358
string_t & get_string()
return current string value (implicitly resets the token; useful only once)
Definition: lexer.hpp:1340
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition: lexer.hpp:1328
constexpr JSON_HEDLEY_RETURNS_NON_NULL const char * get_error_message() const noexcept
return syntax error message
Definition: lexer.hpp:1383
constexpr position_t get_position() const noexcept
return position of last read token
Definition: lexer.hpp:1350
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition: lexer.hpp:1334
JSON_HEDLEY_RETURNS_NON_NULL static JSON_HEDLEY_CONST const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition: lexer.hpp:64
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition: lexer.hpp:1322
static parse_error create(int id_, const position_t &pos, const std::string &what_arg)
create a parse error exception
Definition: exceptions.hpp:129
syntax analysis
Definition: parser.hpp:34
void parse(const bool strict, BasicJsonType &result)
public parser interface
Definition: parser.hpp:82
parser(detail::input_adapter_t &&adapter, const parser_callback_t cb=nullptr, const bool allow_exceptions_=true)
a parser reading from an input adapter
Definition: parser.hpp:63
bool accept(const bool strict=true)
public accept interface
Definition: parser.hpp:143
parse_event_t
Definition: parser.hpp:44
@ value
the parser finished reading a JSON value
@ key
the parser read a key of a value in an object
@ array_end
the parser read ] and finished processing a JSON array
@ array_start
the parser read [ and started to process a JSON array
@ object_start
the parser read { and started to process a JSON object
@ object_end
the parser read } and finished processing a JSON object
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
@ discarded
discarded by the the parser callback function
@ strict
throw a type_error exception in case of invalid UTF-8
std::shared_ptr< input_adapter_protocol > input_adapter_t
a type to simplify interfaces
Definition: input_adapters.hpp:49
namespace for Niels Lohmann
Definition: adl_serializer.hpp:9