tree.hpp (60220B)
1 #ifndef _C4_YML_TREE_HPP_ 2 #define _C4_YML_TREE_HPP_ 3 4 5 #include "c4/error.hpp" 6 #include "c4/types.hpp" 7 #ifndef _C4_YML_COMMON_HPP_ 8 #include "c4/yml/common.hpp" 9 #endif 10 11 #include <c4/charconv.hpp> 12 #include <cmath> 13 #include <limits> 14 15 16 C4_SUPPRESS_WARNING_MSVC_PUSH 17 C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct 18 C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' 19 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH 20 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast") 21 C4_SUPPRESS_WARNING_GCC("-Wtype-limits") 22 23 24 namespace c4 { 25 namespace yml { 26 27 struct NodeScalar; 28 struct NodeInit; 29 struct NodeData; 30 class NodeRef; 31 class ConstNodeRef; 32 class Tree; 33 34 35 /** encode a floating point value to a string. */ 36 template<class T> 37 size_t to_chars_float(substr buf, T val) 38 { 39 C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal"); 40 static_assert(std::is_floating_point<T>::value, "must be floating point"); 41 if(C4_UNLIKELY(std::isnan(val))) 42 return to_chars(buf, csubstr(".nan")); 43 else if(C4_UNLIKELY(val == std::numeric_limits<T>::infinity())) 44 return to_chars(buf, csubstr(".inf")); 45 else if(C4_UNLIKELY(val == -std::numeric_limits<T>::infinity())) 46 return to_chars(buf, csubstr("-.inf")); 47 return to_chars(buf, val); 48 C4_SUPPRESS_WARNING_GCC_CLANG_POP 49 } 50 51 52 /** decode a floating point from string. Accepts special values: .nan, 53 * .inf, -.inf */ 54 template<class T> 55 bool from_chars_float(csubstr buf, T *C4_RESTRICT val) 56 { 57 static_assert(std::is_floating_point<T>::value, "must be floating point"); 58 if(C4_LIKELY(from_chars(buf, val))) 59 { 60 return true; 61 } 62 else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) 63 { 64 *val = std::numeric_limits<T>::quiet_NaN(); 65 return true; 66 } 67 else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF")) 68 { 69 *val = std::numeric_limits<T>::infinity(); 70 return true; 71 } 72 else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF")) 73 { 74 *val = -std::numeric_limits<T>::infinity(); 75 return true; 76 } 77 else 78 { 79 return false; 80 } 81 } 82 83 84 //----------------------------------------------------------------------------- 85 //----------------------------------------------------------------------------- 86 //----------------------------------------------------------------------------- 87 88 /** the integral type necessary to cover all the bits marking node tags */ 89 using tag_bits = uint16_t; 90 91 /** a bit mask for marking tags for types */ 92 typedef enum : tag_bits { 93 // container types 94 TAG_NONE = 0, 95 TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ 96 TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ 97 TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ 98 TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ 99 TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ 100 // scalar types 101 TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ 102 TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ 103 TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ 104 TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ 105 TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ 106 TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ 107 TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ 108 TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ 109 TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ 110 TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ 111 } YamlTag_e; 112 113 YamlTag_e to_tag(csubstr tag); 114 csubstr from_tag(YamlTag_e tag); 115 csubstr from_tag_long(YamlTag_e tag); 116 csubstr normalize_tag(csubstr tag); 117 csubstr normalize_tag_long(csubstr tag); 118 119 struct TagDirective 120 { 121 /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ 122 csubstr handle; 123 /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ 124 csubstr prefix; 125 /** The next node to which this tag directive applies */ 126 size_t next_node_id; 127 }; 128 129 #ifndef RYML_MAX_TAG_DIRECTIVES 130 /** the maximum number of tag directives in a Tree */ 131 #define RYML_MAX_TAG_DIRECTIVES 4 132 #endif 133 134 135 136 //----------------------------------------------------------------------------- 137 //----------------------------------------------------------------------------- 138 //----------------------------------------------------------------------------- 139 140 141 /** the integral type necessary to cover all the bits marking node types */ 142 using type_bits = uint64_t; 143 144 145 /** a bit mask for marking node types */ 146 typedef enum : type_bits { 147 // a convenience define, undefined below 148 #define c4bit(v) (type_bits(1) << v) 149 NOTYPE = 0, ///< no node type is set 150 VAL = c4bit(0), ///< a leaf node, has a (possibly empty) value 151 KEY = c4bit(1), ///< is member of a map, must have non-empty key 152 MAP = c4bit(2), ///< a map: a parent of keyvals 153 SEQ = c4bit(3), ///< a seq: a parent of vals 154 DOC = c4bit(4), ///< a document 155 STREAM = c4bit(5)|SEQ, ///< a stream: a seq of docs 156 KEYREF = c4bit(6), ///< a *reference: the key references an &anchor 157 VALREF = c4bit(7), ///< a *reference: the val references an &anchor 158 KEYANCH = c4bit(8), ///< the key has an &anchor 159 VALANCH = c4bit(9), ///< the val has an &anchor 160 KEYTAG = c4bit(10), ///< the key has an explicit tag/type 161 VALTAG = c4bit(11), ///< the val has an explicit tag/type 162 _TYMASK = c4bit(12)-1, // all the bits up to here 163 VALQUO = c4bit(12), ///< the val is quoted by '', "", > or | 164 KEYQUO = c4bit(13), ///< the key is quoted by '', "", > or | 165 KEYVAL = KEY|VAL, 166 KEYSEQ = KEY|SEQ, 167 KEYMAP = KEY|MAP, 168 DOCMAP = DOC|MAP, 169 DOCSEQ = DOC|SEQ, 170 DOCVAL = DOC|VAL, 171 _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, 172 _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, 173 // these flags are from a work in progress and should be used with care 174 _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}') 175 _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}') 176 _WIP_STYLE_BLOCK = c4bit(16), ///< mark container with block format (seqs as '- val\n', maps as 'key: val') 177 _WIP_KEY_LITERAL = c4bit(17), ///< mark key scalar as multiline, block literal | 178 _WIP_VAL_LITERAL = c4bit(18), ///< mark val scalar as multiline, block literal | 179 _WIP_KEY_FOLDED = c4bit(19), ///< mark key scalar as multiline, block folded > 180 _WIP_VAL_FOLDED = c4bit(20), ///< mark val scalar as multiline, block folded > 181 _WIP_KEY_SQUO = c4bit(21), ///< mark key scalar as single quoted 182 _WIP_VAL_SQUO = c4bit(22), ///< mark val scalar as single quoted 183 _WIP_KEY_DQUO = c4bit(23), ///< mark key scalar as double quoted 184 _WIP_VAL_DQUO = c4bit(24), ///< mark val scalar as double quoted 185 _WIP_KEY_PLAIN = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) 186 _WIP_VAL_PLAIN = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) 187 _WIP_KEY_STYLE = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN, 188 _WIP_VAL_STYLE = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN, 189 _WIP_KEY_FT_NL = c4bit(27), ///< features: mark key scalar as having \n in its contents 190 _WIP_VAL_FT_NL = c4bit(28), ///< features: mark val scalar as having \n in its contents 191 _WIP_KEY_FT_SQ = c4bit(29), ///< features: mark key scalar as having single quotes in its contents 192 _WIP_VAL_FT_SQ = c4bit(30), ///< features: mark val scalar as having single quotes in its contents 193 _WIP_KEY_FT_DQ = c4bit(31), ///< features: mark key scalar as having double quotes in its contents 194 _WIP_VAL_FT_DQ = c4bit(32), ///< features: mark val scalar as having double quotes in its contents 195 #undef c4bit 196 } NodeType_e; 197 198 199 //----------------------------------------------------------------------------- 200 //----------------------------------------------------------------------------- 201 //----------------------------------------------------------------------------- 202 203 /** wraps a NodeType_e element with some syntactic sugar and predicates */ 204 struct NodeType 205 { 206 public: 207 208 NodeType_e type; 209 210 public: 211 212 C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} 213 C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} 214 C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} 215 216 C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); } 217 static const char* type_str(NodeType_e t); 218 219 C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; } 220 C4_ALWAYS_INLINE void set(type_bits t) { type = (NodeType_e)t; } 221 222 C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); } 223 C4_ALWAYS_INLINE void add(type_bits t) { type = (NodeType_e)(type|t); } 224 225 C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); } 226 C4_ALWAYS_INLINE void rem(type_bits t) { type = (NodeType_e)(type & ~t); } 227 228 C4_ALWAYS_INLINE void clear() { type = NOTYPE; } 229 230 public: 231 232 C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } 233 C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } 234 235 C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; } 236 C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; } 237 238 public: 239 240 #if defined(__clang__) 241 # pragma clang diagnostic push 242 # pragma clang diagnostic ignored "-Wnull-dereference" 243 #elif defined(__GNUC__) 244 # pragma GCC diagnostic push 245 # if __GNUC__ >= 6 246 # pragma GCC diagnostic ignored "-Wnull-dereference" 247 # endif 248 #endif 249 250 C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; } 251 C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } 252 C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } 253 C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } 254 C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; } 255 C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; } 256 C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; } 257 C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; } 258 C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; } 259 C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; } 260 C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); } 261 C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); } 262 C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } 263 C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } 264 C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } 265 C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } 266 C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } 267 C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } 268 C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; } 269 C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; } 270 C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; } 271 C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; } 272 C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); } 273 C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); } 274 C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); } 275 276 // these predicates are a work in progress and subject to change. Don't use yet. 277 C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; } 278 C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; } 279 C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; } 280 C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; } 281 C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; } 282 C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; } 283 C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; } 284 C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; } 285 C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; } 286 C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; } 287 C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; } 288 C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; } 289 C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; } 290 C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; } 291 C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; } 292 293 #if defined(__clang__) 294 # pragma clang diagnostic pop 295 #elif defined(__GNUC__) 296 # pragma GCC diagnostic pop 297 #endif 298 299 }; 300 301 302 //----------------------------------------------------------------------------- 303 //----------------------------------------------------------------------------- 304 //----------------------------------------------------------------------------- 305 306 /** a node scalar is a csubstr, which may be tagged and anchored. */ 307 struct NodeScalar 308 { 309 csubstr tag; 310 csubstr scalar; 311 csubstr anchor; 312 313 public: 314 315 /// initialize as an empty scalar 316 inline NodeScalar() noexcept : tag(), scalar(), anchor() {} 317 318 /// initialize as an untagged scalar 319 template<size_t N> 320 inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {} 321 inline NodeScalar(csubstr s ) noexcept : tag(), scalar(s), anchor() {} 322 323 /// initialize as a tagged scalar 324 template<size_t N, size_t M> 325 inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {} 326 inline NodeScalar(csubstr t , csubstr s ) noexcept : tag(t), scalar(s), anchor() {} 327 328 public: 329 330 ~NodeScalar() noexcept = default; 331 NodeScalar(NodeScalar &&) noexcept = default; 332 NodeScalar(NodeScalar const&) noexcept = default; 333 NodeScalar& operator= (NodeScalar &&) noexcept = default; 334 NodeScalar& operator= (NodeScalar const&) noexcept = default; 335 336 public: 337 338 bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); } 339 340 void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); } 341 342 void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) noexcept 343 { 344 csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref; 345 anchor = trimmed; 346 if((!has_scalar) || !scalar.ends_with(trimmed)) 347 scalar = ref; 348 } 349 }; 350 C4_MUST_BE_TRIVIAL_COPY(NodeScalar); 351 352 353 //----------------------------------------------------------------------------- 354 //----------------------------------------------------------------------------- 355 //----------------------------------------------------------------------------- 356 357 /** convenience class to initialize nodes */ 358 struct NodeInit 359 { 360 361 NodeType type; 362 NodeScalar key; 363 NodeScalar val; 364 365 public: 366 367 /// initialize as an empty node 368 NodeInit() : type(NOTYPE), key(), val() {} 369 /// initialize as a typed node 370 NodeInit(NodeType_e t) : type(t), key(), val() {} 371 /// initialize as a sequence member 372 NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } 373 /// initialize as a mapping member 374 NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } 375 /// initialize as a mapping member with explicit type 376 NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } 377 /// initialize as a mapping member with explicit type (eg SEQ or MAP) 378 NodeInit(NodeType_e t, NodeScalar const& k ) : type(t ), key(k.tag, k.scalar), val( ) { _add_flags(KEY); } 379 380 public: 381 382 void clear() 383 { 384 type.clear(); 385 key.clear(); 386 val.clear(); 387 } 388 389 void _add_flags(type_bits more_flags=0) 390 { 391 type = (type|more_flags); 392 if( ! key.tag.empty()) 393 type = (type|KEYTAG); 394 if( ! val.tag.empty()) 395 type = (type|VALTAG); 396 if( ! key.anchor.empty()) 397 type = (type|KEYANCH); 398 if( ! val.anchor.empty()) 399 type = (type|VALANCH); 400 } 401 402 bool _check() const 403 { 404 // key cannot be empty 405 RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0)); 406 // key tag cannot be empty 407 RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0)); 408 // val may be empty even though VAL is set. But when VAL is not set, val must be empty 409 RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty()); 410 // val tag cannot be empty 411 RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0)); 412 return true; 413 } 414 }; 415 416 417 //----------------------------------------------------------------------------- 418 //----------------------------------------------------------------------------- 419 //----------------------------------------------------------------------------- 420 421 /** contains the data for each YAML node. */ 422 struct NodeData 423 { 424 NodeType m_type; 425 426 NodeScalar m_key; 427 NodeScalar m_val; 428 429 size_t m_parent; 430 size_t m_first_child; 431 size_t m_last_child; 432 size_t m_next_sibling; 433 size_t m_prev_sibling; 434 }; 435 C4_MUST_BE_TRIVIAL_COPY(NodeData); 436 437 438 //----------------------------------------------------------------------------- 439 //----------------------------------------------------------------------------- 440 //----------------------------------------------------------------------------- 441 442 class RYML_EXPORT Tree 443 { 444 public: 445 446 /** @name construction and assignment */ 447 /** @{ */ 448 449 Tree() : Tree(get_callbacks()) {} 450 Tree(Callbacks const& cb); 451 Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} 452 Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb); 453 454 ~Tree(); 455 456 Tree(Tree const& that) noexcept; 457 Tree(Tree && that) noexcept; 458 459 Tree& operator= (Tree const& that) noexcept; 460 Tree& operator= (Tree && that) noexcept; 461 462 /** @} */ 463 464 public: 465 466 /** @name memory and sizing */ 467 /** @{ */ 468 469 void reserve(size_t node_capacity); 470 471 /** clear the tree and zero every node 472 * @note does NOT clear the arena 473 * @see clear_arena() */ 474 void clear(); 475 inline void clear_arena() { m_arena_pos = 0; } 476 477 inline bool empty() const { return m_size == 0; } 478 479 inline size_t size() const { return m_size; } 480 inline size_t capacity() const { return m_cap; } 481 inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } 482 483 Callbacks const& callbacks() const { return m_callbacks; } 484 void callbacks(Callbacks const& cb) { m_callbacks = cb; } 485 486 /** @} */ 487 488 public: 489 490 /** @name node getters */ 491 /** @{ */ 492 493 //! get the index of a node belonging to this tree. 494 //! @p n can be nullptr, in which case a 495 size_t id(NodeData const* n) const 496 { 497 if( ! n) 498 { 499 return NONE; 500 } 501 RYML_ASSERT(n >= m_buf && n < m_buf + m_cap); 502 return static_cast<size_t>(n - m_buf); 503 } 504 505 //! get a pointer to a node's NodeData. 506 //! i can be NONE, in which case a nullptr is returned 507 inline NodeData *get(size_t i) 508 { 509 if(i == NONE) 510 return nullptr; 511 RYML_ASSERT(i >= 0 && i < m_cap); 512 return m_buf + i; 513 } 514 //! get a pointer to a node's NodeData. 515 //! i can be NONE, in which case a nullptr is returned. 516 inline NodeData const *get(size_t i) const 517 { 518 if(i == NONE) 519 return nullptr; 520 RYML_ASSERT(i >= 0 && i < m_cap); 521 return m_buf + i; 522 } 523 524 //! An if-less form of get() that demands a valid node index. 525 //! This function is implementation only; use at your own risk. 526 inline NodeData * _p(size_t i) { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } 527 //! An if-less form of get() that demands a valid node index. 528 //! This function is implementation only; use at your own risk. 529 inline NodeData const * _p(size_t i) const { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } 530 531 //! Get the id of the root node 532 size_t root_id() { if(m_cap == 0) { reserve(16); } RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } 533 //! Get the id of the root node 534 size_t root_id() const { RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } 535 536 //! Get a NodeRef of a node by id 537 NodeRef ref(size_t id); 538 //! Get a NodeRef of a node by id 539 ConstNodeRef ref(size_t id) const; 540 //! Get a NodeRef of a node by id 541 ConstNodeRef cref(size_t id); 542 //! Get a NodeRef of a node by id 543 ConstNodeRef cref(size_t id) const; 544 545 //! Get the root as a NodeRef 546 NodeRef rootref(); 547 //! Get the root as a NodeRef 548 ConstNodeRef rootref() const; 549 //! Get the root as a NodeRef 550 ConstNodeRef crootref(); 551 //! Get the root as a NodeRef 552 ConstNodeRef crootref() const; 553 554 //! find a root child by name, return it as a NodeRef 555 //! @note requires the root to be a map. 556 NodeRef operator[] (csubstr key); 557 //! find a root child by name, return it as a NodeRef 558 //! @note requires the root to be a map. 559 ConstNodeRef operator[] (csubstr key) const; 560 561 //! find a root child by index: return the root node's @p i-th child as a NodeRef 562 //! @note @i is NOT the node id, but the child's position 563 NodeRef operator[] (size_t i); 564 //! find a root child by index: return the root node's @p i-th child as a NodeRef 565 //! @note @i is NOT the node id, but the child's position 566 ConstNodeRef operator[] (size_t i) const; 567 568 //! get the i-th document of the stream 569 //! @note @i is NOT the node id, but the doc position within the stream 570 NodeRef docref(size_t i); 571 //! get the i-th document of the stream 572 //! @note @i is NOT the node id, but the doc position within the stream 573 ConstNodeRef docref(size_t i) const; 574 575 /** @} */ 576 577 public: 578 579 /** @name node property getters */ 580 /** @{ */ 581 582 NodeType type(size_t node) const { return _p(node)->m_type; } 583 const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); } 584 585 csubstr const& key (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key.scalar; } 586 csubstr const& key_tag (size_t node) const { RYML_ASSERT(has_key_tag(node)); return _p(node)->m_key.tag; } 587 csubstr const& key_ref (size_t node) const { RYML_ASSERT(is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; } 588 csubstr const& key_anchor(size_t node) const { RYML_ASSERT( ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; } 589 NodeScalar const& keysc (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key; } 590 591 csubstr const& val (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val.scalar; } 592 csubstr const& val_tag (size_t node) const { RYML_ASSERT(has_val_tag(node)); return _p(node)->m_val.tag; } 593 csubstr const& val_ref (size_t node) const { RYML_ASSERT(is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; } 594 csubstr const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } 595 NodeScalar const& valsc (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; } 596 597 /** @} */ 598 599 public: 600 601 /** @name node predicates */ 602 /** @{ */ 603 604 C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } 605 C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); } 606 C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); } 607 C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); } 608 C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); } 609 C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); } 610 C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); } 611 C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); } 612 C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); } 613 C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); } 614 C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); } 615 C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); } 616 C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); } 617 C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); } 618 C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); } 619 C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); } 620 C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); } 621 C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); } 622 C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); } 623 C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); } 624 C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); } 625 C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); } 626 C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); } 627 C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); } 628 629 C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { RYML_ASSERT(has_parent(node)); return is_seq(_p(node)->m_parent); } 630 C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); } 631 632 /** true when key and val are empty, and has no children */ 633 C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } 634 /** true when the node has an anchor named a */ 635 C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } 636 637 C4_ALWAYS_INLINE bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && _is_null(n->m_key.scalar); } 638 C4_ALWAYS_INLINE bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && _is_null(n->m_val.scalar); } 639 static bool _is_null(csubstr s) noexcept 640 { 641 return s.str == nullptr || 642 s == "~" || 643 s == "null" || 644 s == "Null" || 645 s == "NULL"; 646 } 647 648 /** @} */ 649 650 public: 651 652 /** @name hierarchy predicates */ 653 /** @{ */ 654 655 bool is_root(size_t node) const { RYML_ASSERT(_p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } 656 657 bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } 658 659 /** true if @p node has a child with id @p ch */ 660 bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; } 661 /** true if @p node has a child with key @p key */ 662 bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } 663 /** true if @p node has any children key */ 664 bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } 665 666 /** true if @p node has a sibling with id @p sib */ 667 bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } 668 /** true if one of the node's siblings has the given key */ 669 bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } 670 /** true if node is not a single child */ 671 bool has_other_siblings(size_t node) const 672 { 673 NodeData const *n = _p(node); 674 if(C4_LIKELY(n->m_parent != NONE)) 675 { 676 n = _p(n->m_parent); 677 return n->m_first_child != n->m_last_child; 678 } 679 return false; 680 } 681 682 RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; } 683 684 /** @} */ 685 686 public: 687 688 /** @name hierarchy getters */ 689 /** @{ */ 690 691 size_t parent(size_t node) const { return _p(node)->m_parent; } 692 693 size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; } 694 size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; } 695 696 /** O(#num_children) */ 697 size_t num_children(size_t node) const; 698 size_t child_pos(size_t node, size_t ch) const; 699 size_t first_child(size_t node) const { return _p(node)->m_first_child; } 700 size_t last_child(size_t node) const { return _p(node)->m_last_child; } 701 size_t child(size_t node, size_t pos) const; 702 size_t find_child(size_t node, csubstr const& key) const; 703 704 /** O(#num_siblings) */ 705 /** counts with this */ 706 size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } 707 /** does not count with this */ 708 size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); RYML_ASSERT(ns > 0); return ns-1; } 709 size_t sibling_pos(size_t node, size_t sib) const { RYML_ASSERT( ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } 710 size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } 711 size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } 712 size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); } 713 size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } 714 715 size_t doc(size_t i) const { size_t rid = root_id(); RYML_ASSERT(is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. 716 717 /** @} */ 718 719 public: 720 721 /** @name node modifiers */ 722 /** @{ */ 723 724 void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0); 725 void to_map(size_t node, csubstr key, type_bits more_flags=0); 726 void to_seq(size_t node, csubstr key, type_bits more_flags=0); 727 void to_val(size_t node, csubstr val, type_bits more_flags=0); 728 void to_map(size_t node, type_bits more_flags=0); 729 void to_seq(size_t node, type_bits more_flags=0); 730 void to_doc(size_t node, type_bits more_flags=0); 731 void to_stream(size_t node, type_bits more_flags=0); 732 733 void set_key(size_t node, csubstr key) { RYML_ASSERT(has_key(node)); _p(node)->m_key.scalar = key; } 734 void set_val(size_t node, csubstr val) { RYML_ASSERT(has_val(node)); _p(node)->m_val.scalar = val; } 735 736 void set_key_tag(size_t node, csubstr tag) { RYML_ASSERT(has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } 737 void set_val_tag(size_t node, csubstr tag) { RYML_ASSERT(has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } 738 739 void set_key_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } 740 void set_val_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } 741 void set_key_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } 742 void set_val_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } 743 744 void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } 745 void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } 746 void rem_key_ref (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } 747 void rem_val_ref (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } 748 void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } 749 750 /** @} */ 751 752 public: 753 754 /** @name tree modifiers */ 755 /** @{ */ 756 757 /** reorder the tree in memory so that all the nodes are stored 758 * in a linear sequence when visited in depth-first order. 759 * This will invalidate existing ids, since the node id is its 760 * position in the node array. */ 761 void reorder(); 762 763 /** Resolve references (aliases <- anchors) in the tree. 764 * 765 * Dereferencing is opt-in; after parsing, Tree::resolve() 766 * has to be called explicitly for obtaining resolved references in the 767 * tree. This method will resolve all references and substitute the 768 * anchored values in place of the reference. 769 * 770 * This method first does a full traversal of the tree to gather all 771 * anchors and references in a separate collection, then it goes through 772 * that collection to locate the names, which it does by obeying the YAML 773 * standard diktat that "an alias node refers to the most recent node in 774 * the serialization having the specified anchor" 775 * 776 * So, depending on the number of anchor/alias nodes, this is a 777 * potentially expensive operation, with a best-case linear complexity 778 * (from the initial traversal). This potential cost is the reason for 779 * requiring an explicit call. 780 */ 781 void resolve(); 782 783 /** @} */ 784 785 public: 786 787 /** @name tag directives */ 788 /** @{ */ 789 790 void resolve_tags(); 791 792 size_t num_tag_directives() const; 793 size_t add_tag_directive(TagDirective const& td); 794 void clear_tag_directives(); 795 796 size_t resolve_tag(substr output, csubstr tag, size_t node_id) const; 797 csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const 798 { 799 size_t needed = resolve_tag(output, tag, node_id); 800 return needed <= output.len ? output.first(needed) : output; 801 } 802 803 using tag_directive_const_iterator = TagDirective const*; 804 tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; } 805 tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); } 806 807 struct TagDirectiveProxy 808 { 809 tag_directive_const_iterator b, e; 810 tag_directive_const_iterator begin() const { return b; } 811 tag_directive_const_iterator end() const { return e; } 812 }; 813 814 TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; } 815 816 /** @} */ 817 818 public: 819 820 /** @name modifying hierarchy */ 821 /** @{ */ 822 823 /** create and insert a new child of @p parent. insert after the (to-be) 824 * sibling @p after, which must be a child of @p parent. To insert as the 825 * first child, set after to NONE */ 826 C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after) 827 { 828 RYML_ASSERT(parent != NONE); 829 RYML_ASSERT(is_container(parent) || is_root(parent)); 830 RYML_ASSERT(after == NONE || (_p(after)->m_parent == parent)); 831 size_t child = _claim(); 832 _set_hierarchy(child, parent, after); 833 return child; 834 } 835 /** create and insert a node as the first child of @p parent */ 836 C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } 837 /** create and insert a node as the last child of @p parent */ 838 C4_ALWAYS_INLINE size_t append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); } 839 840 public: 841 842 #if defined(__clang__) 843 # pragma clang diagnostic push 844 # pragma clang diagnostic ignored "-Wnull-dereference" 845 #elif defined(__GNUC__) 846 # pragma GCC diagnostic push 847 # if __GNUC__ >= 6 848 # pragma GCC diagnostic ignored "-Wnull-dereference" 849 # endif 850 #endif 851 852 //! create and insert a new sibling of n. insert after "after" 853 C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after) 854 { 855 return insert_child(_p(node)->m_parent, after); 856 } 857 /** create and insert a node as the first node of @p parent */ 858 C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); } 859 C4_ALWAYS_INLINE size_t append_sibling(size_t node) { return append_child(_p(node)->m_parent); } 860 861 public: 862 863 /** remove an entire branch at once: ie remove the children and the node itself */ 864 inline void remove(size_t node) 865 { 866 remove_children(node); 867 _release(node); 868 } 869 870 /** remove all the node's children, but keep the node itself */ 871 void remove_children(size_t node); 872 873 /** change the @p type of the node to one of MAP, SEQ or VAL. @p 874 * type must have one and only one of MAP,SEQ,VAL; @p type may 875 * possibly have KEY, but if it does, then the @p node must also 876 * have KEY. Changing to the same type is a no-op. Otherwise, 877 * changing to a different type will initialize the node with an 878 * empty value of the desired type: changing to VAL will 879 * initialize with a null scalar (~), changing to MAP will 880 * initialize with an empty map ({}), and changing to SEQ will 881 * initialize with an empty seq ([]). */ 882 bool change_type(size_t node, NodeType type); 883 884 bool change_type(size_t node, type_bits type) 885 { 886 return change_type(node, (NodeType)type); 887 } 888 889 #if defined(__clang__) 890 # pragma clang diagnostic pop 891 #elif defined(__GNUC__) 892 # pragma GCC diagnostic pop 893 #endif 894 895 public: 896 897 /** change the node's position in the parent */ 898 void move(size_t node, size_t after); 899 900 /** change the node's parent and position */ 901 void move(size_t node, size_t new_parent, size_t after); 902 903 /** change the node's parent and position to a different tree 904 * @return the index of the new node in the destination tree */ 905 size_t move(Tree * src, size_t node, size_t new_parent, size_t after); 906 907 /** ensure the first node is a stream. Eg, change this tree 908 * 909 * DOCMAP 910 * MAP 911 * KEYVAL 912 * KEYVAL 913 * SEQ 914 * VAL 915 * 916 * to 917 * 918 * STREAM 919 * DOCMAP 920 * MAP 921 * KEYVAL 922 * KEYVAL 923 * SEQ 924 * VAL 925 * 926 * If the root is already a stream, this is a no-op. 927 */ 928 void set_root_as_stream(); 929 930 public: 931 932 /** recursively duplicate a node from this tree into a new parent, 933 * placing it after one of its children 934 * @return the index of the copy */ 935 size_t duplicate(size_t node, size_t new_parent, size_t after); 936 /** recursively duplicate a node from a different tree into a new parent, 937 * placing it after one of its children 938 * @return the index of the copy */ 939 size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after); 940 941 /** recursively duplicate the node's children (but not the node) 942 * @return the index of the last duplicated child */ 943 size_t duplicate_children(size_t node, size_t parent, size_t after); 944 /** recursively duplicate the node's children (but not the node), where 945 * the node is from a different tree 946 * @return the index of the last duplicated child */ 947 size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after); 948 949 void duplicate_contents(size_t node, size_t where); 950 void duplicate_contents(Tree const* src, size_t node, size_t where); 951 952 /** duplicate the node's children (but not the node) in a new parent, but 953 * omit repetitions where a duplicated node has the same key (in maps) or 954 * value (in seqs). If one of the duplicated children has the same key 955 * (in maps) or value (in seqs) as one of the parent's children, the one 956 * that is placed closest to the end will prevail. */ 957 size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after); 958 size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after); 959 960 public: 961 962 void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE); 963 964 /** @} */ 965 966 public: 967 968 /** @name internal string arena */ 969 /** @{ */ 970 971 /** get the current size of the tree's internal arena */ 972 RYML_DEPRECATED("use arena_size() instead") size_t arena_pos() const { return m_arena_pos; } 973 /** get the current size of the tree's internal arena */ 974 inline size_t arena_size() const { return m_arena_pos; } 975 /** get the current capacity of the tree's internal arena */ 976 inline size_t arena_capacity() const { return m_arena.len; } 977 /** get the current slack of the tree's internal arena */ 978 inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } 979 980 /** get the current arena */ 981 substr arena() const { return m_arena.first(m_arena_pos); } 982 983 /** return true if the given substring is part of the tree's string arena */ 984 bool in_arena(csubstr s) const 985 { 986 return m_arena.is_super(s); 987 } 988 989 /** serialize the given floating-point variable to the tree's 990 * arena, growing it as needed to accomodate the serialization. 991 * 992 * @note Growing the arena may cause relocation of the entire 993 * existing arena, and thus change the contents of individual 994 * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this 995 * cost, ensure that the arena is reserved to an appropriate size 996 * using .reserve_arena() 997 * 998 * @see alloc_arena() */ 999 template<class T> 1000 typename std::enable_if<std::is_floating_point<T>::value, csubstr>::type 1001 to_arena(T const& C4_RESTRICT a) 1002 { 1003 substr rem(m_arena.sub(m_arena_pos)); 1004 size_t num = to_chars_float(rem, a); 1005 if(num > rem.len) 1006 { 1007 rem = _grow_arena(num); 1008 num = to_chars_float(rem, a); 1009 RYML_ASSERT(num <= rem.len); 1010 } 1011 rem = _request_span(num); 1012 return rem; 1013 } 1014 1015 /** serialize the given non-floating-point variable to the tree's 1016 * arena, growing it as needed to accomodate the serialization. 1017 * 1018 * @note Growing the arena may cause relocation of the entire 1019 * existing arena, and thus change the contents of individual 1020 * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this 1021 * cost, ensure that the arena is reserved to an appropriate size 1022 * using .reserve_arena() 1023 * 1024 * @see alloc_arena() */ 1025 template<class T> 1026 typename std::enable_if<!std::is_floating_point<T>::value, csubstr>::type 1027 to_arena(T const& C4_RESTRICT a) 1028 { 1029 substr rem(m_arena.sub(m_arena_pos)); 1030 size_t num = to_chars(rem, a); 1031 if(num > rem.len) 1032 { 1033 rem = _grow_arena(num); 1034 num = to_chars(rem, a); 1035 RYML_ASSERT(num <= rem.len); 1036 } 1037 rem = _request_span(num); 1038 return rem; 1039 } 1040 1041 /** serialize the given csubstr to the tree's arena, growing the 1042 * arena as needed to accomodate the serialization. 1043 * 1044 * @note Growing the arena may cause relocation of the entire 1045 * existing arena, and thus change the contents of individual 1046 * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this 1047 * cost, ensure that the arena is reserved to an appropriate size 1048 * using .reserve_arena() 1049 * 1050 * @see alloc_arena() */ 1051 csubstr to_arena(csubstr a) 1052 { 1053 if(a.len > 0) 1054 { 1055 substr rem(m_arena.sub(m_arena_pos)); 1056 size_t num = to_chars(rem, a); 1057 if(num > rem.len) 1058 { 1059 rem = _grow_arena(num); 1060 num = to_chars(rem, a); 1061 RYML_ASSERT(num <= rem.len); 1062 } 1063 return _request_span(num); 1064 } 1065 else 1066 { 1067 if(a.str == nullptr) 1068 { 1069 return csubstr{}; 1070 } 1071 else if(m_arena.str == nullptr) 1072 { 1073 // Arena is empty and we want to store a non-null 1074 // zero-length string. 1075 // Even though the string has zero length, we need 1076 // some "memory" to store a non-nullptr string 1077 _grow_arena(1); 1078 } 1079 return _request_span(0); 1080 } 1081 } 1082 C4_ALWAYS_INLINE csubstr to_arena(const char *s) 1083 { 1084 return to_arena(to_csubstr(s)); 1085 } 1086 C4_ALWAYS_INLINE csubstr to_arena(std::nullptr_t) 1087 { 1088 return csubstr{}; 1089 } 1090 1091 /** copy the given substr to the tree's arena, growing it by the 1092 * required size 1093 * 1094 * @note Growing the arena may cause relocation of the entire 1095 * existing arena, and thus change the contents of individual 1096 * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this 1097 * cost, ensure that the arena is reserved to an appropriate size 1098 * using .reserve_arena() 1099 * 1100 * @see alloc_arena() */ 1101 substr copy_to_arena(csubstr s) 1102 { 1103 substr cp = alloc_arena(s.len); 1104 RYML_ASSERT(cp.len == s.len); 1105 RYML_ASSERT(!s.overlaps(cp)); 1106 #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) 1107 C4_SUPPRESS_WARNING_GCC_PUSH 1108 C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0 1109 C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior 1110 #endif 1111 if(s.len) 1112 memcpy(cp.str, s.str, s.len); 1113 #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) 1114 C4_SUPPRESS_WARNING_GCC_POP 1115 #endif 1116 return cp; 1117 } 1118 1119 /** grow the tree's string arena by the given size and return a substr 1120 * of the added portion 1121 * 1122 * @note Growing the arena may cause relocation of the entire 1123 * existing arena, and thus change the contents of individual 1124 * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this 1125 * cost, ensure that the arena is reserved to an appropriate size 1126 * using .reserve_arena(). 1127 * 1128 * @see reserve_arena() */ 1129 substr alloc_arena(size_t sz) 1130 { 1131 if(sz > arena_slack()) 1132 _grow_arena(sz - arena_slack()); 1133 substr s = _request_span(sz); 1134 return s; 1135 } 1136 1137 /** ensure the tree's internal string arena is at least the given capacity 1138 * @note This operation has a potential complexity of O(numNodes)+O(arenasize). 1139 * Growing the arena may cause relocation of the entire 1140 * existing arena, and thus change the contents of individual nodes. */ 1141 void reserve_arena(size_t arena_cap) 1142 { 1143 if(arena_cap > m_arena.len) 1144 { 1145 substr buf; 1146 buf.str = (char*) m_callbacks.m_allocate(arena_cap, m_arena.str, m_callbacks.m_user_data); 1147 buf.len = arena_cap; 1148 if(m_arena.str) 1149 { 1150 RYML_ASSERT(m_arena.len >= 0); 1151 _relocate(buf); // does a memcpy and changes nodes using the arena 1152 m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data); 1153 } 1154 m_arena = buf; 1155 } 1156 } 1157 1158 /** @} */ 1159 1160 private: 1161 1162 substr _grow_arena(size_t more) 1163 { 1164 size_t cap = m_arena.len + more; 1165 cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap; 1166 cap = cap < 64 ? 64 : cap; 1167 reserve_arena(cap); 1168 return m_arena.sub(m_arena_pos); 1169 } 1170 1171 substr _request_span(size_t sz) 1172 { 1173 substr s; 1174 s = m_arena.sub(m_arena_pos, sz); 1175 m_arena_pos += sz; 1176 return s; 1177 } 1178 1179 substr _relocated(csubstr s, substr next_arena) const 1180 { 1181 RYML_ASSERT(m_arena.is_super(s)); 1182 RYML_ASSERT(m_arena.sub(0, m_arena_pos).is_super(s)); 1183 auto pos = (s.str - m_arena.str); 1184 substr r(next_arena.str + pos, s.len); 1185 RYML_ASSERT(r.str - next_arena.str == pos); 1186 RYML_ASSERT(next_arena.sub(0, m_arena_pos).is_super(r)); 1187 return r; 1188 } 1189 1190 public: 1191 1192 /** @name lookup */ 1193 /** @{ */ 1194 1195 struct lookup_result 1196 { 1197 size_t target; 1198 size_t closest; 1199 size_t path_pos; 1200 csubstr path; 1201 1202 inline operator bool() const { return target != NONE; } 1203 1204 lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {} 1205 lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {} 1206 1207 /** get the part ot the input path that was resolved */ 1208 csubstr resolved() const; 1209 /** get the part ot the input path that was unresolved */ 1210 csubstr unresolved() const; 1211 }; 1212 1213 /** for example foo.bar[0].baz */ 1214 lookup_result lookup_path(csubstr path, size_t start=NONE) const; 1215 1216 /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify 1217 * the tree so that the corresponding lookup_path() would return the 1218 * default value. 1219 * @see lookup_path() */ 1220 size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE); 1221 1222 /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify 1223 * the tree so that the corresponding lookup_path() would return the 1224 * branch @p src_node (from the tree @p src). 1225 * @see lookup_path() */ 1226 size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE); 1227 1228 /** @} */ 1229 1230 private: 1231 1232 struct _lookup_path_token 1233 { 1234 csubstr value; 1235 NodeType type; 1236 _lookup_path_token() : value(), type() {} 1237 _lookup_path_token(csubstr v, NodeType t) : value(v), type(t) {} 1238 inline operator bool() const { return type != NOTYPE; } 1239 bool is_index() const { return value.begins_with('[') && value.ends_with(']'); } 1240 }; 1241 1242 size_t _lookup_path_or_create(csubstr path, size_t start); 1243 1244 void _lookup_path (lookup_result *r) const; 1245 void _lookup_path_modify(lookup_result *r); 1246 1247 size_t _next_node (lookup_result *r, _lookup_path_token *parent) const; 1248 size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent); 1249 1250 void _advance(lookup_result *r, size_t more) const; 1251 1252 _lookup_path_token _next_token(lookup_result *r, _lookup_path_token const& parent) const; 1253 1254 private: 1255 1256 void _clear(); 1257 void _free(); 1258 void _copy(Tree const& that); 1259 void _move(Tree & that); 1260 1261 void _relocate(substr next_arena); 1262 1263 public: 1264 1265 #if ! RYML_USE_ASSERT 1266 C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {} 1267 #else 1268 void _check_next_flags(size_t node, type_bits f) 1269 { 1270 auto n = _p(node); 1271 type_bits o = n->m_type; // old 1272 C4_UNUSED(o); 1273 if(f & MAP) 1274 { 1275 RYML_ASSERT_MSG((f & SEQ) == 0, "cannot mark simultaneously as map and seq"); 1276 RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as map and val"); 1277 RYML_ASSERT_MSG((o & SEQ) == 0, "cannot turn a seq into a map; clear first"); 1278 RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a map; clear first"); 1279 } 1280 else if(f & SEQ) 1281 { 1282 RYML_ASSERT_MSG((f & MAP) == 0, "cannot mark simultaneously as seq and map"); 1283 RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as seq and val"); 1284 RYML_ASSERT_MSG((o & MAP) == 0, "cannot turn a map into a seq; clear first"); 1285 RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a seq; clear first"); 1286 } 1287 if(f & KEY) 1288 { 1289 RYML_ASSERT(!is_root(node)); 1290 auto pid = parent(node); C4_UNUSED(pid); 1291 RYML_ASSERT(is_map(pid)); 1292 } 1293 if((f & VAL) && !is_root(node)) 1294 { 1295 auto pid = parent(node); C4_UNUSED(pid); 1296 RYML_ASSERT(is_map(pid) || is_seq(pid)); 1297 } 1298 } 1299 #endif 1300 1301 inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } 1302 inline void _set_flags(size_t node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } 1303 1304 inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } 1305 inline void _add_flags(size_t node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } 1306 1307 inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } 1308 inline void _rem_flags(size_t node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } 1309 1310 void _set_key(size_t node, csubstr key, type_bits more_flags=0) 1311 { 1312 _p(node)->m_key.scalar = key; 1313 _add_flags(node, KEY|more_flags); 1314 } 1315 void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0) 1316 { 1317 _p(node)->m_key = key; 1318 _add_flags(node, KEY|more_flags); 1319 } 1320 1321 void _set_val(size_t node, csubstr val, type_bits more_flags=0) 1322 { 1323 RYML_ASSERT(num_children(node) == 0); 1324 RYML_ASSERT(!is_seq(node) && !is_map(node)); 1325 _p(node)->m_val.scalar = val; 1326 _add_flags(node, VAL|more_flags); 1327 } 1328 void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0) 1329 { 1330 RYML_ASSERT(num_children(node) == 0); 1331 RYML_ASSERT( ! is_container(node)); 1332 _p(node)->m_val = val; 1333 _add_flags(node, VAL|more_flags); 1334 } 1335 1336 void _set(size_t node, NodeInit const& i) 1337 { 1338 RYML_ASSERT(i._check()); 1339 NodeData *n = _p(node); 1340 RYML_ASSERT(n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar); 1341 _add_flags(node, i.type); 1342 if(n->m_key.scalar.empty()) 1343 { 1344 if( ! i.key.scalar.empty()) 1345 { 1346 _set_key(node, i.key.scalar); 1347 } 1348 } 1349 n->m_key.tag = i.key.tag; 1350 n->m_val = i.val; 1351 } 1352 1353 void _set_parent_as_container_if_needed(size_t in) 1354 { 1355 NodeData const* n = _p(in); 1356 size_t ip = parent(in); 1357 if(ip != NONE) 1358 { 1359 if( ! (is_seq(ip) || is_map(ip))) 1360 { 1361 if((in == first_child(ip)) && (in == last_child(ip))) 1362 { 1363 if( ! n->m_key.empty() || has_key(in)) 1364 { 1365 _add_flags(ip, MAP); 1366 } 1367 else 1368 { 1369 _add_flags(ip, SEQ); 1370 } 1371 } 1372 } 1373 } 1374 } 1375 1376 void _seq2map(size_t node) 1377 { 1378 RYML_ASSERT(is_seq(node)); 1379 for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) 1380 { 1381 NodeData *C4_RESTRICT ch = _p(i); 1382 if(ch->m_type.is_keyval()) 1383 continue; 1384 ch->m_type.add(KEY); 1385 ch->m_key = ch->m_val; 1386 } 1387 auto *C4_RESTRICT n = _p(node); 1388 n->m_type.rem(SEQ); 1389 n->m_type.add(MAP); 1390 } 1391 1392 size_t _do_reorder(size_t *node, size_t count); 1393 1394 void _swap(size_t n_, size_t m_); 1395 void _swap_props(size_t n_, size_t m_); 1396 void _swap_hierarchy(size_t n_, size_t m_); 1397 void _copy_hierarchy(size_t dst_, size_t src_); 1398 1399 inline void _copy_props(size_t dst_, size_t src_) 1400 { 1401 _copy_props(dst_, this, src_); 1402 } 1403 1404 inline void _copy_props_wo_key(size_t dst_, size_t src_) 1405 { 1406 _copy_props_wo_key(dst_, this, src_); 1407 } 1408 1409 void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) 1410 { 1411 auto & C4_RESTRICT dst = *_p(dst_); 1412 auto const& C4_RESTRICT src = *that_tree->_p(src_); 1413 dst.m_type = src.m_type; 1414 dst.m_key = src.m_key; 1415 dst.m_val = src.m_val; 1416 } 1417 1418 void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_) 1419 { 1420 auto & C4_RESTRICT dst = *_p(dst_); 1421 auto const& C4_RESTRICT src = *that_tree->_p(src_); 1422 dst.m_type = (src.m_type & ~_KEYMASK) | (dst.m_type & _KEYMASK); 1423 dst.m_val = src.m_val; 1424 } 1425 1426 inline void _clear_type(size_t node) 1427 { 1428 _p(node)->m_type = NOTYPE; 1429 } 1430 1431 inline void _clear(size_t node) 1432 { 1433 auto *C4_RESTRICT n = _p(node); 1434 n->m_type = NOTYPE; 1435 n->m_key.clear(); 1436 n->m_val.clear(); 1437 n->m_parent = NONE; 1438 n->m_first_child = NONE; 1439 n->m_last_child = NONE; 1440 } 1441 1442 inline void _clear_key(size_t node) 1443 { 1444 _p(node)->m_key.clear(); 1445 _rem_flags(node, KEY); 1446 } 1447 1448 inline void _clear_val(size_t node) 1449 { 1450 _p(node)->m_val.clear(); 1451 _rem_flags(node, VAL); 1452 } 1453 1454 private: 1455 1456 void _clear_range(size_t first, size_t num); 1457 1458 size_t _claim(); 1459 void _claim_root(); 1460 void _release(size_t node); 1461 void _free_list_add(size_t node); 1462 void _free_list_rem(size_t node); 1463 1464 void _set_hierarchy(size_t node, size_t parent, size_t after_sibling); 1465 void _rem_hierarchy(size_t node); 1466 1467 public: 1468 1469 // members are exposed, but you should NOT access them directly 1470 1471 NodeData * m_buf; 1472 size_t m_cap; 1473 1474 size_t m_size; 1475 1476 size_t m_free_head; 1477 size_t m_free_tail; 1478 1479 substr m_arena; 1480 size_t m_arena_pos; 1481 1482 Callbacks m_callbacks; 1483 1484 TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES]; 1485 1486 }; 1487 1488 } // namespace yml 1489 } // namespace c4 1490 1491 1492 C4_SUPPRESS_WARNING_MSVC_POP 1493 C4_SUPPRESS_WARNING_GCC_CLANG_POP 1494 1495 1496 #endif /* _C4_YML_TREE_HPP_ */