From fd5116034b6b501b23916b3f99aca6174e6f86fe Mon Sep 17 00:00:00 2001 From: Sandu Liviu Catalin Date: Wed, 22 Feb 2017 18:23:42 +0200 Subject: [PATCH] Update the PUGIXML library to the latest version. --- config/common/pugiconfig.hpp | 6 +- external/PUGIXML/pugixml.cpp | 886 ++++++++++++++++++----------------- include/pugixml.hpp | 30 +- 3 files changed, 485 insertions(+), 437 deletions(-) diff --git a/config/common/pugiconfig.hpp b/config/common/pugiconfig.hpp index fd08e02e..6d962cfe 100644 --- a/config/common/pugiconfig.hpp +++ b/config/common/pugiconfig.hpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.7 + * pugixml parser - version 1.8 * -------------------------------------------------------- - * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2017, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -49,7 +49,7 @@ #endif /** - * Copyright (c) 2006-2016 Arseny Kapoulkine + * Copyright (c) 2006-2017 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/external/PUGIXML/pugixml.cpp b/external/PUGIXML/pugixml.cpp index d2f0d6b9..7368184b 100644 --- a/external/PUGIXML/pugixml.cpp +++ b/external/PUGIXML/pugixml.cpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.7 + * pugixml parser - version 1.8 * -------------------------------------------------------- - * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2017, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -29,9 +29,6 @@ #ifndef PUGIXML_NO_XPATH # include # include -# ifdef PUGIXML_NO_EXCEPTIONS -# include -# endif #endif #ifndef PUGIXML_NO_STL @@ -47,10 +44,8 @@ # pragma warning(push) # pragma warning(disable: 4127) // conditional expression is constant # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable # pragma warning(disable: 4702) // unreachable code # pragma warning(disable: 4996) // this function or variable may be unsafe -# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged #endif #ifdef __INTEL_COMPILER @@ -424,7 +419,8 @@ PUGI__NS_BEGIN #define PUGI__GETPAGE_IMPL(header) (header).get_page() #else #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast(object) - reinterpret_cast(page)) << 8) | (flags)) - #define PUGI__GETPAGE_IMPL(header) const_cast(reinterpret_cast(reinterpret_cast(&header) - (header >> 8))) + // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings + #define PUGI__GETPAGE_IMPL(header) static_cast(const_cast(static_cast(reinterpret_cast(&header) - (header >> 8)))) #endif #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) @@ -1119,9 +1115,6 @@ PUGI__NS_BEGIN { xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) { - #ifdef PUGIXML_COMPACT - _hash = &hash; - #endif } const char_t* buffer; @@ -2567,7 +2560,7 @@ PUGI__NS_BEGIN #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) - #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } + #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } #define PUGI__POPNODE() { cursor = cursor->parent; } #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } #define PUGI__SCANWHILE(X) { while (X) ++s; } @@ -2891,20 +2884,14 @@ PUGI__NS_BEGIN struct xml_parser { - xml_allocator alloc; - xml_allocator* alloc_state; + xml_allocator* alloc; char_t* error_offset; xml_parse_status error_status; - xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) + xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) { } - ~xml_parser() - { - *alloc_state = alloc; - } - // DOCTYPE consists of nested sections of the following possible types: // , , "...", '...' // @@ -3263,7 +3250,7 @@ PUGI__NS_BEGIN if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... { - xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. + xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); a->name = s; // Save the offset. @@ -3355,18 +3342,20 @@ PUGI__NS_BEGIN { ++s; + mark = s; + char_t* name = cursor->name; - if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); + if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); while (PUGI__IS_CHARTYPE(*s, ct_symbol)) { - if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s); + if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); } if (*name) { if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); - else PUGI__THROW_ERROR(status_end_element_mismatch, s); + else PUGI__THROW_ERROR(status_end_element_mismatch, mark); } PUGI__POPNODE(); // Pop. @@ -4087,12 +4076,23 @@ PUGI__NS_BEGIN { if (!node->first_child) { - if ((flags & format_raw) == 0) - writer.write(' '); + if (flags & format_no_empty_element_tags) + { + writer.write('>', '<', '/'); + writer.write_string(name); + writer.write('>'); - writer.write('/', '>'); + return false; + } + else + { + if ((flags & format_raw) == 0) + writer.write(' '); - return false; + writer.write('/', '>'); + + return false; + } } else { @@ -4516,7 +4516,14 @@ PUGI__NS_BEGIN } if (negative) + { + // Workaround for crayc++ CC-3059: Expected no overflow in routine. + #ifdef _CRAYC + return (overflow || result > minneg) ? ~minneg + 1 : ~result + 1; + #else return (overflow || result > minneg) ? 0 - minneg : 0 - result; + #endif + } else return (overflow || result > maxpos) ? maxpos : result; } @@ -4570,27 +4577,10 @@ PUGI__NS_BEGIN } #endif - template struct make_unsigned; - - template <> struct make_unsigned { typedef unsigned int type; }; - template <> struct make_unsigned { typedef unsigned int type; }; - template <> struct make_unsigned { typedef unsigned long type; }; - template <> struct make_unsigned { typedef unsigned long type; }; - -#ifdef PUGIXML_HAS_LONG_LONG - template <> struct make_unsigned { typedef unsigned long long type; }; - template <> struct make_unsigned { typedef unsigned long long type; }; -#endif - - template - PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, T value) + template PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) { - typedef typename make_unsigned::type U; - - bool negative = value < 0; - char_t* result = end - 1; - U rest = negative ? 0 - U(value) : U(value); + U rest = negative ? 0 - value : value; do { @@ -4624,12 +4614,12 @@ PUGI__NS_BEGIN #endif } - template - PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, Integer value) + template + PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) { char_t buf[64]; char_t* end = buf + sizeof(buf) / sizeof(buf[0]); - char_t* begin = integer_to_string(buf, end, value); + char_t* begin = integer_to_string(buf, end, value, negative); return strcpy_insitu(dest, header, header_mask, begin, end - begin); } @@ -4653,7 +4643,7 @@ PUGI__NS_BEGIN } template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) + PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) { return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); } @@ -5283,28 +5273,28 @@ namespace pugi { if (!_attr) return false; - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); } PUGI__FN bool xml_attribute::set_value(unsigned int rhs) { if (!_attr) return false; - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); } PUGI__FN bool xml_attribute::set_value(long rhs) { if (!_attr) return false; - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); } PUGI__FN bool xml_attribute::set_value(unsigned long rhs) { if (!_attr) return false; - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); } PUGI__FN bool xml_attribute::set_value(double rhs) @@ -5325,7 +5315,7 @@ namespace pugi { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); } #ifdef PUGIXML_HAS_LONG_LONG @@ -5333,14 +5323,14 @@ namespace pugi { if (!_attr) return false; - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); } PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) { if (!_attr) return false; - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); } #endif @@ -6123,7 +6113,7 @@ namespace pugi if (j != _root) result[--offset] = delimiter; - if (j->name && *j->name) + if (j->name) { size_t length = impl::strlength(j->name); @@ -6142,7 +6132,7 @@ namespace pugi { xml_node found = *this; // Current search context. - if (!_root || !path_ || !path_[0]) return found; + if (!_root || !path_[0]) return found; if (path_[0] == delimiter) { @@ -6296,6 +6286,7 @@ namespace pugi return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; default: + assert(false && "Invalid node type"); return -1; } } @@ -6438,28 +6429,28 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; } PUGI__FN bool xml_text::set(unsigned int rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; } PUGI__FN bool xml_text::set(long rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; } PUGI__FN bool xml_text::set(unsigned long rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; } PUGI__FN bool xml_text::set(float rhs) @@ -6480,7 +6471,7 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; } #ifdef PUGIXML_HAS_LONG_LONG @@ -6488,14 +6479,14 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; } PUGI__FN bool xml_text::set(unsigned long long rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; } #endif @@ -6812,18 +6803,18 @@ namespace pugi PUGI__FN xml_document::xml_document(): _buffer(0) { - create(); + _create(); } PUGI__FN xml_document::~xml_document() { - destroy(); + _destroy(); } PUGI__FN void xml_document::reset() { - destroy(); - create(); + _destroy(); + _create(); } PUGI__FN void xml_document::reset(const xml_document& proto) @@ -6834,7 +6825,7 @@ namespace pugi append_copy(cur); } - PUGI__FN void xml_document::create() + PUGI__FN void xml_document::_create() { assert(!_root); @@ -6867,11 +6858,16 @@ namespace pugi // setup sentinel page page->allocator = static_cast(_root); + // setup hash table pointer in allocator + #ifdef PUGIXML_COMPACT + page->allocator->_hash = &static_cast(_root)->hash; + #endif + // verify the document allocation assert(reinterpret_cast(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); } - PUGI__FN void xml_document::destroy() + PUGI__FN void xml_document::_destroy() { assert(_root); @@ -7221,134 +7217,76 @@ PUGI__NS_BEGIN return write + 1; } - template void copy_backwards(I begin, I end, I target) + template void insertion_sort(T* begin, T* end, const Pred& pred) { - while (begin != end) *--target = *--end; - } + if (begin == end) + return; - template void insertion_sort(I begin, I end, const Pred& pred, T*) - { - assert(begin != end); - - for (I it = begin + 1; it != end; ++it) + for (T* it = begin + 1; it != end; ++it) { T val = *it; + T* hole = it; - if (pred(val, *begin)) + // move hole backwards + while (hole > begin && pred(val, *(hole - 1))) { - // move to front - copy_backwards(begin, it, it + 1); - *begin = val; + *hole = *(hole - 1); + hole--; } + + // fill hole with element + *hole = val; + } + } + + template I median3(I first, I middle, I last, const Pred& pred) + { + if (pred(*middle, *first)) swap(middle, first); + if (pred(*last, *middle)) swap(last, middle); + if (pred(*middle, *first)) swap(middle, first); + + return middle; + } + + template void partition(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) + { + // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) + T* eq = begin; + T* lt = begin; + T* gt = end; + + while (lt < gt) + { + if (pred(*lt, pivot)) + lt++; + else if (*lt == pivot) + swap(*eq++, *lt++); else - { - I hole = it; - - // move hole backwards - while (pred(val, *(hole - 1))) - { - *hole = *(hole - 1); - hole--; - } - - // fill hole with element - *hole = val; - } + swap(*lt, *--gt); } - } - // std variant for elements with == - template void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) - { - I eqbeg = middle, eqend = middle + 1; + // we now have just 4 groups: = < >; move equal elements to the middle + T* eqbeg = gt; - // expand equal range - while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; - while (eqend != end && *eqend == *eqbeg) ++eqend; + for (T* it = begin; it != eq; ++it) + swap(*it, *--eqbeg); - // process outer elements - I ltend = eqbeg, gtbeg = eqend; - - for (;;) - { - // find the element from the right side that belongs to the left one - for (; gtbeg != end; ++gtbeg) - if (!pred(*eqbeg, *gtbeg)) - { - if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); - else break; - } - - // find the element from the left side that belongs to the right one - for (; ltend != begin; --ltend) - if (!pred(*(ltend - 1), *eqbeg)) - { - if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); - else break; - } - - // scanned all elements - if (gtbeg == end && ltend == begin) - { - *out_eqbeg = eqbeg; - *out_eqend = eqend; - return; - } - - // make room for elements by moving equal area - if (gtbeg == end) - { - if (--ltend != --eqbeg) swap(*ltend, *eqbeg); - swap(*eqbeg, *--eqend); - } - else if (ltend == begin) - { - if (eqend != gtbeg) swap(*eqbeg, *eqend); - ++eqend; - swap(*gtbeg++, *eqbeg++); - } - else swap(*gtbeg++, *--ltend); - } - } - - template void median3(I first, I middle, I last, const Pred& pred) - { - if (pred(*middle, *first)) swap(*middle, *first); - if (pred(*last, *middle)) swap(*last, *middle); - if (pred(*middle, *first)) swap(*middle, *first); - } - - template void median(I first, I middle, I last, const Pred& pred) - { - if (last - first <= 40) - { - // median of three for small chunks - median3(first, middle, last, pred); - } - else - { - // median of nine - size_t step = (last - first + 1) / 8; - - median3(first, first + step, first + 2 * step, pred); - median3(middle - step, middle, middle + step, pred); - median3(last - 2 * step, last - step, last, pred); - median3(first + step, middle, last - step, pred); - } + *out_eqbeg = eqbeg; + *out_eqend = gt; } template void sort(I begin, I end, const Pred& pred) { // sort large chunks - while (end - begin > 32) + while (end - begin > 16) { // find median element I middle = begin + (end - begin) / 2; - median(begin, middle, end - 1, pred); + I median = median3(begin, middle, end - 1, pred); // partition in three chunks (< = >) I eqbeg, eqend; - partition(begin, middle, end, pred, &eqbeg, &eqend); + partition(begin, end, *median, pred, &eqbeg, &eqend); // loop on larger half if (eqbeg - begin > end - eqend) @@ -7364,7 +7302,7 @@ PUGI__NS_BEGIN } // insertion sort small chunk - if (begin != end) insertion_sort(begin, end, pred, &*begin); + insertion_sort(begin, end, pred); } PUGI__NS_END @@ -7392,24 +7330,17 @@ PUGI__NS_BEGIN }; }; - class xpath_allocator + struct xpath_allocator { xpath_memory_block* _root; size_t _root_size; + bool* _error; - public: - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf* error_handler; - #endif - - xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) + xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) { - #ifdef PUGIXML_NO_EXCEPTIONS - error_handler = 0; - #endif } - void* allocate_nothrow(size_t size) + void* allocate(size_t size) { // round size up to block alignment boundary size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); @@ -7430,7 +7361,11 @@ PUGI__NS_BEGIN size_t block_size = block_capacity + offsetof(xpath_memory_block, data); xpath_memory_block* block = static_cast(xml_memory::allocate(block_size)); - if (!block) return 0; + if (!block) + { + if (_error) *_error = true; + return 0; + } block->next = _root; block->capacity = block_capacity; @@ -7442,23 +7377,6 @@ PUGI__NS_BEGIN } } - void* allocate(size_t size) - { - void* result = allocate_nothrow(size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - void* reallocate(void* ptr, size_t old_size, size_t new_size) { // round size up to block alignment boundary @@ -7468,33 +7386,35 @@ PUGI__NS_BEGIN // we can only reallocate the last object assert(ptr == 0 || static_cast(ptr) + old_size == &_root->data[0] + _root_size); - // adjust root size so that we have not allocated the object at all - bool only_object = (_root_size == old_size); + // try to reallocate the object inplace + if (ptr && _root_size - old_size + new_size <= _root->capacity) + { + _root_size = _root_size - old_size + new_size; + return ptr; + } - if (ptr) _root_size -= old_size; - - // allocate a new version (this will obviously reuse the memory if possible) + // allocate a new block void* result = allocate(new_size); - assert(result); + if (!result) return 0; // we have a new block - if (result != ptr && ptr) + if (ptr) { - // copy old data + // copy old data (we only support growing) assert(new_size >= old_size); memcpy(result, ptr, old_size); // free the previous page if it had no other objects - if (only_object) - { - assert(_root->data == result); - assert(_root->next); + assert(_root->data == result); + assert(_root->next); + if (_root->next->data == ptr) + { + // deallocate the whole page, unless it was the first one xpath_memory_block* next = _root->next->next; if (next) { - // deallocate the whole page, unless it was the first one xml_memory::deallocate(_root->next); _root->next = next; } @@ -7566,22 +7486,15 @@ PUGI__NS_BEGIN xpath_allocator result; xpath_allocator temp; xpath_stack stack; + bool oom; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf error_handler; - #endif - - xpath_stack_data(): result(blocks + 0), temp(blocks + 1) + xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) { blocks[0].next = blocks[1].next = 0; blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); stack.result = &result; stack.temp = &temp; - - #ifdef PUGIXML_NO_EXCEPTIONS - result.error_handler = temp.error_handler = &error_handler; - #endif } ~xpath_stack_data() @@ -7603,7 +7516,7 @@ PUGI__NS_BEGIN static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) { char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); - assert(result); + if (!result) return 0; memcpy(result, string, length * sizeof(char_t)); result[length] = 0; @@ -7632,9 +7545,13 @@ PUGI__NS_BEGIN { assert(begin <= end); - size_t length = static_cast(end - begin); + if (begin == end) + return xpath_string(); - return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); + size_t length = static_cast(end - begin); + const char_t* data = duplicate_string(begin, length, alloc); + + return data ? xpath_string(data, true, length) : xpath_string(); } xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) @@ -7660,7 +7577,7 @@ PUGI__NS_BEGIN // allocate new buffer char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); - assert(result); + if (!result) return; // append first string to the new buffer in case there was no reallocation if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); @@ -7692,8 +7609,11 @@ PUGI__NS_BEGIN if (!_uses_heap) { size_t length_ = strlength(_buffer); + const char_t* data_ = duplicate_string(_buffer, length_, alloc); - _buffer = duplicate_string(_buffer, length_, alloc); + if (!data_) return 0; + + _buffer = data_; _uses_heap = true; _length_heap = length_; } @@ -8117,7 +8037,7 @@ PUGI__NS_BEGIN // allocate a buffer of suitable length for the number size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); - assert(result); + if (!result) return xpath_string(); // make the number! char_t* s = result; @@ -8401,12 +8321,10 @@ PUGI__NS_BEGIN if (!table[i]) table[i] = static_cast(i); - void* result = alloc->allocate_nothrow(sizeof(table)); + void* result = alloc->allocate(sizeof(table)); + if (!result) return 0; - if (result) - { - memcpy(result, table, sizeof(table)); - } + memcpy(result, table, sizeof(table)); return static_cast(result); } @@ -8749,7 +8667,7 @@ PUGI__NS_BEGIN { // reallocate the old array or allocate a new one xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); - assert(data); + if (!data) return; // finalize _begin = data; @@ -8801,7 +8719,7 @@ PUGI__NS_BEGIN // reallocate the old array or allocate a new one xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); + if (!data) return; // finalize _begin = data; @@ -10387,7 +10305,7 @@ PUGI__NS_BEGIN if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) { buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); - assert(buffer); + if (!buffer) return xpath_string(); } // evaluate all strings to temporary stack @@ -10405,7 +10323,7 @@ PUGI__NS_BEGIN // create final string char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); - assert(result); + if (!result) return xpath_string(); char_t* ri = result; @@ -10572,6 +10490,8 @@ PUGI__NS_BEGIN xpath_string s = string_value(c.n, stack.result); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10582,6 +10502,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = normalize_space(begin); return xpath_string::from_heap_preallocated(begin, end); @@ -10598,6 +10520,8 @@ PUGI__NS_BEGIN xpath_string to = _right->_next->eval_string(c, swapped_stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); return xpath_string::from_heap_preallocated(begin, end); @@ -10608,6 +10532,8 @@ PUGI__NS_BEGIN xpath_string s = _left->eval_string(c, stack); char_t* begin = s.data(stack.result); + if (!begin) return xpath_string(); + char_t* end = translate_table(begin, _data.table); return xpath_string::from_heap_preallocated(begin, end); @@ -10781,9 +10707,14 @@ PUGI__NS_BEGIN void optimize(xpath_allocator* alloc) { - if (_left) _left->optimize(alloc); - if (_right) _right->optimize(alloc); - if (_next) _next->optimize(alloc); + if (_left) + _left->optimize(alloc); + + if (_right) + _right->optimize(alloc); + + if (_next) + _next->optimize(alloc); optimize_self(alloc); } @@ -10912,65 +10843,77 @@ PUGI__NS_BEGIN char_t _scratch[32]; - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf _error_handler; - #endif - - void throw_error(const char* message) + xpath_ast_node* error(const char* message) { _result->error = message; _result->offset = _lexer.current_pos() - _query; - #ifdef PUGIXML_NO_EXCEPTIONS - longjmp(_error_handler, 1); - #else - throw xpath_exception(*_result); - #endif + return 0; } - void throw_error_oom() + xpath_ast_node* error_oom() { - #ifdef PUGIXML_NO_EXCEPTIONS - throw_error("Out of memory"); - #else - throw std::bad_alloc(); - #endif + assert(_alloc->_error); + *_alloc->_error = true; + + return 0; } void* alloc_node() { - void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); + return _alloc->allocate(sizeof(xpath_ast_node)); + } - if (!result) throw_error_oom(); + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } - return result; + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; + } + + xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) + { + void* memory = alloc_node(); + return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; } const char_t* alloc_string(const xpath_lexer_string& value) { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); + if (!value.begin) + return PUGIXML_TEXT(""); - char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis + size_t length = static_cast(value.end - value.begin); - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; + char_t* c = static_cast(_alloc->allocate((length + 1) * sizeof(char_t))); + if (!c) return 0; - return c; - } - else return 0; - } + memcpy(c, value.begin, length * sizeof(char_t)); + c[length] = 0; - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - - return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); + return c; } xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) @@ -10979,101 +10922,110 @@ PUGI__NS_BEGIN { case 'b': if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); + return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); break; case 'c': if (name == PUGIXML_TEXT("count") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(ast_func_count, xpath_type_number, args[0]); } else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); + return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); break; case 'f': if (name == PUGIXML_TEXT("false") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); + return alloc_node(ast_func_false, xpath_type_boolean); else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); + return alloc_node(ast_func_floor, xpath_type_number, args[0]); break; case 'i': if (name == PUGIXML_TEXT("id") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); + return alloc_node(ast_func_id, xpath_type_node_set, args[0]); break; case 'l': if (name == PUGIXML_TEXT("last") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); + return alloc_node(ast_func_last, xpath_type_number); else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); + return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); + } break; case 'n': if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); + } else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); + { + if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); + } else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); + return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); + return alloc_node(ast_func_not, xpath_type_boolean, args[0]); else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); + return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); break; case 'p': if (name == PUGIXML_TEXT("position") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); + return alloc_node(ast_func_position, xpath_type_number); break; case 'r': if (name == PUGIXML_TEXT("round") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); + return alloc_node(ast_func_round, xpath_type_number, args[0]); break; case 's': if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); + return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); + return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); + return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); + return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("sum") && argc == 1) { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); + if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); + return alloc_node(ast_func_sum, xpath_type_number, args[0]); } break; case 't': if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); + return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); + return alloc_node(ast_func_true, xpath_type_boolean); break; @@ -11081,9 +11033,7 @@ PUGI__NS_BEGIN break; } - throw_error("Unrecognized function or wrong parameter count"); - - return 0; + return error("Unrecognized function or wrong parameter count"); } axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) @@ -11199,18 +11149,18 @@ PUGI__NS_BEGIN xpath_lexer_string name = _lexer.contents(); if (!_variables) - throw_error("Unknown variable: variable set is not provided"); + return error("Unknown variable: variable set is not provided"); xpath_variable* var = 0; if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) - throw_error_oom(); + return error_oom(); if (!var) - throw_error("Unknown variable: variable set does not contain the given name"); + return error("Unknown variable: variable set does not contain the given name"); _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); + return alloc_node(ast_variable, var->type(), var); } case lex_open_brace: @@ -11218,9 +11168,10 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (_lexer.current() != lex_close_brace) - throw_error("Unmatched braces"); + return error("Expected ')' to match an opening '('"); _lexer.next(); @@ -11230,11 +11181,11 @@ PUGI__NS_BEGIN case lex_quoted_string: { const char_t* value = alloc_string(_lexer.contents()); + if (!value) return 0; - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); _lexer.next(); - return n; + return alloc_node(ast_string_constant, xpath_type_string, value); } case lex_number: @@ -11242,12 +11193,11 @@ PUGI__NS_BEGIN double value = 0; if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) - throw_error_oom(); + return error_oom(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); _lexer.next(); - return n; + return alloc_node(ast_number_constant, xpath_type_number, value); } case lex_string: @@ -11261,19 +11211,20 @@ PUGI__NS_BEGIN xpath_ast_node* last_arg = 0; if (_lexer.current() != lex_open_brace) - throw_error("Unrecognized function call"); + return error("Unrecognized function call"); _lexer.next(); - if (_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - while (_lexer.current() != lex_close_brace) { - if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); - _lexer.next(); + if (argc > 0) + { + if (_lexer.current() != lex_comma) + return error("No comma between function arguments"); + _lexer.next(); + } xpath_ast_node* n = parse_expression(); + if (!n) return 0; if (argc < 2) args[argc] = n; else last_arg->set_next(n); @@ -11288,9 +11239,7 @@ PUGI__NS_BEGIN } default: - throw_error("Unrecognizable primary expression"); - - return 0; + return error("Unrecognizable primary expression"); } } @@ -11300,19 +11249,23 @@ PUGI__NS_BEGIN xpath_ast_node* parse_filter_expression() { xpath_ast_node* n = parse_primary_expression(); + if (!n) return 0; while (_lexer.current() == lex_open_square_brace) { _lexer.next(); + if (n->rettype() != xpath_type_node_set) + return error("Predicate has to be applied to node set"); + xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; - if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); - - n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); + n = alloc_node(ast_filter, n, expr, predicate_default); + if (!n) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); } @@ -11328,7 +11281,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_step(xpath_ast_node* set) { if (set && set->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); + return error("Step has to be applied to node set"); bool axis_specified = false; axis_t axis = axis_child; // implied child axis @@ -11344,13 +11297,19 @@ PUGI__NS_BEGIN { _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + + return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); } else if (_lexer.current() == lex_double_dot) { _lexer.next(); - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); + if (_lexer.current() == lex_open_square_brace) + return error("Predicates are not allowed after an abbreviated step"); + + return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); } nodetest_t nt_type = nodetest_none; @@ -11366,11 +11325,13 @@ PUGI__NS_BEGIN if (_lexer.current() == lex_double_colon) { // parse axis name - if (axis_specified) throw_error("Two axis specifiers in one step"); + if (axis_specified) + return error("Two axis specifiers in one step"); axis = parse_axis_name(nt_name, axis_specified); - if (!axis_specified) throw_error("Unknown axis"); + if (!axis_specified) + return error("Unknown axis"); // read actual node test _lexer.next(); @@ -11386,7 +11347,10 @@ PUGI__NS_BEGIN nt_name = _lexer.contents(); _lexer.next(); } - else throw_error("Unrecognized node test"); + else + { + return error("Unrecognized node test"); + } } if (nt_type == nodetest_none) @@ -11402,26 +11366,28 @@ PUGI__NS_BEGIN nt_type = parse_node_test_type(nt_name); - if (nt_type == nodetest_none) throw_error("Unrecognized node type"); + if (nt_type == nodetest_none) + return error("Unrecognized node type"); nt_name = xpath_lexer_string(); } else if (nt_name == PUGIXML_TEXT("processing-instruction")) { if (_lexer.current() != lex_quoted_string) - throw_error("Only literals are allowed as arguments to processing-instruction()"); + return error("Only literals are allowed as arguments to processing-instruction()"); nt_type = nodetest_pi; nt_name = _lexer.contents(); _lexer.next(); if (_lexer.current() != lex_close_brace) - throw_error("Unmatched brace near processing-instruction()"); + return error("Unmatched brace near processing-instruction()"); _lexer.next(); } else - throw_error("Unmatched brace near node type test"); - + { + return error("Unmatched brace near node type test"); + } } // QName or NCName:* else @@ -11432,7 +11398,10 @@ PUGI__NS_BEGIN nt_type = nodetest_all_in_namespace; } - else nt_type = nodetest_name; + else + { + nt_type = nodetest_name; + } } } } @@ -11441,9 +11410,16 @@ PUGI__NS_BEGIN nt_type = nodetest_all; _lexer.next(); } - else throw_error("Unrecognized node test"); + else + { + return error("Unrecognized node test"); + } - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); + const char_t* nt_name_copy = alloc_string(nt_name); + if (!nt_name_copy) return 0; + + xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); + if (!n) return 0; xpath_ast_node* last = 0; @@ -11452,11 +11428,13 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* expr = parse_expression(); + if (!expr) return 0; - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); + xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); + if (!pred) return 0; if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); + return error("Expected ']' to match an opening '['"); _lexer.next(); if (last) last->set_next(pred); @@ -11472,6 +11450,7 @@ PUGI__NS_BEGIN xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) { xpath_ast_node* n = parse_step(set); + if (!n) return 0; while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { @@ -11479,9 +11458,13 @@ PUGI__NS_BEGIN _lexer.next(); if (l == lex_double_slash) - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + { + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; + } n = parse_step(n); + if (!n) return 0; } return n; @@ -11495,7 +11478,8 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path lexeme_t l = _lexer.current(); @@ -11509,8 +11493,11 @@ PUGI__NS_BEGIN { _lexer.next(); - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); + if (!n) return 0; + + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; return parse_relative_location_path(n); } @@ -11533,7 +11520,6 @@ PUGI__NS_BEGIN // PrimaryExpr begins with '$' in case of it being a variable reference, // '(' in case of it being an expression, string literal, number constant or // function call. - if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || _lexer.current() == lex_string) @@ -11545,13 +11531,16 @@ PUGI__NS_BEGIN while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; - if (*state != '(') return parse_location_path(); + if (*state != '(') + return parse_location_path(); // This looks like a function call; however this still can be a node-test. Check it. - if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); + if (parse_node_test_type(_lexer.contents()) != nodetest_none) + return parse_location_path(); } xpath_ast_node* n = parse_filter_expression(); + if (!n) return 0; if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { @@ -11560,9 +11549,11 @@ PUGI__NS_BEGIN if (l == lex_double_slash) { - if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); + if (n->rettype() != xpath_type_node_set) + return error("Step has to be applied to node set"); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); + if (!n) return 0; } // select from location path @@ -11576,12 +11567,15 @@ PUGI__NS_BEGIN _lexer.next(); // precedence 7+ - only parses union expressions - xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); + xpath_ast_node* n = parse_expression(7); + if (!n) return 0; - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); + return alloc_node(ast_op_negate, xpath_type_number, n); } else + { return parse_location_path(); + } } struct binary_op_t @@ -11659,20 +11653,23 @@ PUGI__NS_BEGIN _lexer.next(); xpath_ast_node* rhs = parse_path_or_unary_expression(); + if (!rhs) return 0; binary_op_t nextop = binary_op_t::parse(_lexer); while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) { rhs = parse_expression_rec(rhs, nextop.precedence); + if (!rhs) return 0; nextop = binary_op_t::parse(_lexer); } if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) - throw_error("Union operator has to be applied to node sets"); + return error("Union operator has to be applied to node sets"); - lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); + lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); + if (!lhs) return 0; op = binary_op_t::parse(_lexer); } @@ -11698,9 +11695,12 @@ PUGI__NS_BEGIN // | MultiplicativeExpr '*' UnaryExpr // | MultiplicativeExpr 'div' UnaryExpr // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_expression() + xpath_ast_node* parse_expression(int limit = 0) { - return parse_expression_rec(parse_path_or_unary_expression(), 0); + xpath_ast_node* n = parse_path_or_unary_expression(); + if (!n) return 0; + + return parse_expression_rec(n, limit); } xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) @@ -11709,28 +11709,21 @@ PUGI__NS_BEGIN xpath_ast_node* parse() { - xpath_ast_node* result = parse_expression(); + xpath_ast_node* n = parse_expression(); + if (!n) return 0; + // check if there are unparsed tokens left if (_lexer.current() != lex_eof) - { - // there are still unparsed tokens left, error - throw_error("Incorrect query"); - } + return error("Incorrect query"); - return result; + return n; } static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) { xpath_parser parser(query, variables, alloc, result); - #ifdef PUGIXML_NO_EXCEPTIONS - int error = setjmp(parser._error_handler); - - return (error == 0) ? parser.parse() : 0; - #else return parser.parse(); - #endif } }; @@ -11753,7 +11746,7 @@ PUGI__NS_BEGIN xml_memory::deallocate(impl); } - xpath_query_impl(): root(0), alloc(&block) + xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) { block.next = 0; block.capacity = sizeof(block.data); @@ -11762,21 +11755,9 @@ PUGI__NS_BEGIN xpath_ast_node* root; xpath_allocator alloc; xpath_memory_block block; + bool oom; }; - PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) - { - if (!impl) return xpath_string(); - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_string(); - #endif - - xpath_context c(n, 1, 1); - - return impl->root->eval_string(c, sd.stack); - } - PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) { if (!impl) return 0; @@ -11923,7 +11904,7 @@ namespace pugi } } -#if __cplusplus >= 201103 +#ifdef PUGIXML_HAS_MOVE PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) { _type = rhs._type; @@ -11966,7 +11947,7 @@ namespace pugi return *this; } -#if __cplusplus >= 201103 +#ifdef PUGIXML_HAS_MOVE PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) { _move(rhs); @@ -12166,7 +12147,7 @@ namespace pugi return *this; } -#if __cplusplus >= 201103 +#ifdef PUGIXML_HAS_MOVE PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) { for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) @@ -12347,6 +12328,15 @@ namespace pugi _impl = impl.release(); _result.error = 0; } + else + { + #ifdef PUGIXML_NO_EXCEPTIONS + if (qimpl->oom) _result.error = "Out of memory"; + #else + if (qimpl->oom) throw std::bad_alloc(); + throw xpath_exception(_result); + #endif + } } } @@ -12360,7 +12350,7 @@ namespace pugi impl::xpath_query_impl::destroy(static_cast(_impl)); } -#if __cplusplus >= 201103 +#ifdef PUGIXML_HAS_MOVE PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) { _impl = rhs._impl; @@ -12399,11 +12389,18 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return false; - #endif + bool r = static_cast(_impl)->root->eval_boolean(c, sd.stack); - return static_cast(_impl)->root->eval_boolean(c, sd.stack); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return false; + #else + throw std::bad_alloc(); + #endif + } + + return r; } PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const @@ -12413,19 +12410,38 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return impl::gen_nan(); - #endif + double r = static_cast(_impl)->root->eval_number(c, sd.stack); - return static_cast(_impl)->root->eval_number(c, sd.stack); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return impl::gen_nan(); + #else + throw std::bad_alloc(); + #endif + } + + return r; } #ifndef PUGIXML_NO_STL PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const { + if (!_impl) return string_t(); + + impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); + impl::xpath_string r = static_cast(_impl)->root->eval_string(c, sd.stack); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return string_t(); + #else + throw std::bad_alloc(); + #endif + } return string_t(r.c_str(), r.length()); } @@ -12433,9 +12449,19 @@ namespace pugi PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const { + impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); + impl::xpath_string r = _impl ? static_cast(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); + + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + r = impl::xpath_string(); + #else + throw std::bad_alloc(); + #endif + } size_t full_size = r.length() + 1; @@ -12459,12 +12485,17 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node_set(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node_set(); + #else + throw std::bad_alloc(); + #endif + } + return xpath_node_set(r.begin(), r.end(), r.type()); } @@ -12476,12 +12507,17 @@ namespace pugi impl::xpath_context c(n, 1, 1); impl::xpath_stack_data sd; - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node(); - #endif - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); + if (sd.oom) + { + #ifdef PUGIXML_NO_EXCEPTIONS + return xpath_node(); + #else + throw std::bad_alloc(); + #endif + } + return r.first(); } @@ -12507,7 +12543,7 @@ namespace pugi PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_node(q); + return q.evaluate_node(*this); } PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const @@ -12518,7 +12554,7 @@ namespace pugi PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_nodes(q); + return q.evaluate_node_set(*this); } PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const @@ -12529,7 +12565,7 @@ namespace pugi PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const { xpath_query q(query, variables); - return select_single_node(q); + return q.evaluate_node(*this); } PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const @@ -12582,7 +12618,7 @@ namespace pugi #endif /** - * Copyright (c) 2006-2016 Arseny Kapoulkine + * Copyright (c) 2006-2017 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/include/pugixml.hpp b/include/pugixml.hpp index c4d44e6f..4d76bfa2 100644 --- a/include/pugixml.hpp +++ b/include/pugixml.hpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.7 + * pugixml parser - version 1.8 * -------------------------------------------------------- - * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2017, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at http://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -13,7 +13,7 @@ #ifndef PUGIXML_VERSION // Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons -# define PUGIXML_VERSION 170 +# define PUGIXML_VERSION 180 #endif // Include user configuration file (this can define various configuration macros) @@ -72,6 +72,15 @@ # endif #endif +// If the platform is known to have move semantics support, compile move ctor/operator implementation +#ifndef PUGIXML_HAS_MOVE +# if __cplusplus >= 201103 +# define PUGIXML_HAS_MOVE +# elif defined(_MSC_VER) && _MSC_VER >= 1600 +# define PUGIXML_HAS_MOVE +# endif +#endif + // If C++ is 2011 or higher, add 'override' qualifiers #ifndef PUGIXML_OVERRIDE # if __cplusplus >= 201103 @@ -220,6 +229,9 @@ namespace pugi // Write every attribute on a new line with appropriate indentation. This flag is off by default. const unsigned int format_indent_attributes = 0x40; + // Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default. + const unsigned int format_no_empty_element_tags = 0x80; + // The default set of formatting flags. // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. const unsigned int format_default = format_indent; @@ -969,8 +981,8 @@ namespace pugi xml_document(const xml_document&); xml_document& operator=(const xml_document&); - void create(); - void destroy(); + void _create(); + void _destroy(); public: // Default constructor, makes empty document @@ -1117,7 +1129,7 @@ namespace pugi xpath_variable_set(const xpath_variable_set& rhs); xpath_variable_set& operator=(const xpath_variable_set& rhs); - #if __cplusplus >= 201103 + #ifdef PUGIXML_HAS_MOVE // Move semantics support xpath_variable_set(xpath_variable_set&& rhs); xpath_variable_set& operator=(xpath_variable_set&& rhs); @@ -1161,7 +1173,7 @@ namespace pugi // Destructor ~xpath_query(); - #if __cplusplus >= 201103 + #ifdef PUGIXML_HAS_MOVE // Move semantics support xpath_query(xpath_query&& rhs); xpath_query& operator=(xpath_query&& rhs); @@ -1302,7 +1314,7 @@ namespace pugi xpath_node_set(const xpath_node_set& ns); xpath_node_set& operator=(const xpath_node_set& ns); - #if __cplusplus >= 201103 + #ifdef PUGIXML_HAS_MOVE // Move semantics support xpath_node_set(xpath_node_set&& rhs); xpath_node_set& operator=(xpath_node_set&& rhs); @@ -1397,7 +1409,7 @@ namespace std #endif /** - * Copyright (c) 2006-2016 Arseny Kapoulkine + * Copyright (c) 2006-2017 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation