diff --git a/stl/inc/regex b/stl/inc/regex index a29abc03a2..84f08d4be5 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1342,10 +1342,6 @@ struct _Buf { // character buffer _Chrs[_Nchrs++] = _Ch; } - _Elem _Del() noexcept { // remove and return last character - return _Chrs[--_Nchrs]; - } - template void _Insert2(_FwdIt _First, _FwdIt _Last) { // append multiple characters while (_First != _Last) { @@ -1593,12 +1589,12 @@ enum class _Rx_char_class_kind : int { // must be aligned with corresponding _No }; template -class _Builder2 { // provides operations used by _Parser2 to build the nfa +class _Builder3 { // provides operations used by _Parser3 to build the nfa public: - _Builder2(const _RxTraits& _Tr, regex_constants::syntax_option_type); + _Builder3(const _RxTraits& _Tr, regex_constants::syntax_option_type); void _Setlong(); void _Tidy() noexcept; - _Node_base* _Getmark() const; + _Node_base* _Getmark(); void _Add_bol(); void _Add_eol(); @@ -1618,7 +1614,7 @@ public: _Node_base* _Begin_capture_group(unsigned int _Idx); void _Add_backreference(unsigned int _Idx); _Node_base* _Begin_if(_Node_base* _Start); - void _Else_if2(_Node_base*, _Node_base*); + void _Else_if(_Node_base*, _Node_base*); void _Add_rep(int _Min, int _Max, bool _Greedy); void _Negate(); _Root_node* _End_pattern(); @@ -1627,7 +1623,7 @@ private: _Node_base* _Link_node(_Node_base*); static void _Insert_node(_Node_base*, _Node_base*); _Node_base* _New_node(_Node_type _Kind); - void _Add_str_node(); + void _Emit_str_node(); void _Add_char_to_bitmap(unsigned char _Ch); void _Add_char_to_array(_Elem _Ch); void _Add_elts(_Node_class<_Elem, _RxTraits>*, typename _RxTraits::char_class_type, bool); @@ -1637,10 +1633,11 @@ private: _Node_base* _Current; regex_constants::syntax_option_type _Flags; const _RxTraits& _Traits; + typename _RxTraits::string_type _Chars; public: - _Builder2(const _Builder2&) = delete; - _Builder2& operator=(const _Builder2&) = delete; + _Builder3(const _Builder3&) = delete; + _Builder3& operator=(const _Builder3&) = delete; }; template > @@ -2197,9 +2194,9 @@ enum _Prs_ret { // indicate class element type enum class _Lex_mode : unsigned char { _Default, _Character_class }; template -class _Parser2 { // parse a regular expression +class _Parser3 { // parse a regular expression public: - _Parser2(const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast, regex_constants::syntax_option_type _Fx); + _Parser3(const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast, regex_constants::syntax_option_type _Fx); _Root_node* _Compile(); private: @@ -2237,7 +2234,7 @@ private: void _Do_assert_group(bool _Neg); bool _Wrapped_disjunction(); void _Quantifier(); - void _Alternative2(); + void _Alternative(); void _Disjunction(); void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep, bool _Nonreentrant); @@ -2246,7 +2243,7 @@ private: unsigned int _Grp_idx = 0; int _Disj_count = 0; vector _Finished_grps; - _Builder2<_FwdIt, _Elem, _RxTraits> _Nfa; + _Builder3<_FwdIt, _Elem, _RxTraits> _Nfa; const _RxTraits& _Traits; unsigned long long _L_flags; regex_constants::syntax_option_type _Flags; @@ -2533,7 +2530,7 @@ private: _Visualization.assign(_First, _Last); #endif // _ENHANCED_REGEX_VISUALIZER - _Parser2<_InIt, _Elem, _RxTraits> _Prs(_Traits, _First, _Last, _Flags); + _Parser3<_InIt, _Elem, _RxTraits> _Prs(_Traits, _First, _Last, _Flags); _Root_node* _Rx = _Prs._Compile(); _Reset(_Rx); } else { @@ -3280,26 +3277,27 @@ _EXPORT_STD using sregex_token_iterator = regex_token_iterator; template -_Builder2<_FwdIt, _Elem, _RxTraits>::_Builder2(const _RxTraits& _Tr, regex_constants::syntax_option_type _Fx) +_Builder3<_FwdIt, _Elem, _RxTraits>::_Builder3(const _RxTraits& _Tr, regex_constants::syntax_option_type _Fx) : _Root(new _Root_node), _Current(_Root), _Flags(_Fx), _Traits(_Tr) {} template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Setlong() { // set flag +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Setlong() { // set flag _Root->_Flags |= _Fl_longest; } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Negate() { // set flag +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Negate() { // set flag _Current->_Flags ^= _Fl_negate; } template -_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Getmark() const { +_Node_base* _Builder3<_FwdIt, _Elem, _RxTraits>::_Getmark() { + _Emit_str_node(); return _Current; } template -_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { // insert _Nx at current location +_Node_base* _Builder3<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { // insert _Nx at current location _Nx->_Prev = _Current; if (_Current->_Next) { // set back pointer _Nx->_Next = _Current->_Next; @@ -3311,7 +3309,7 @@ _Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { / } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Insert_node(_Node_base* _Insert_before, _Node_base* _To_insert) { +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Insert_node(_Node_base* _Insert_before, _Node_base* _To_insert) { // insert _To_insert into the graph before the node _Insert_before _Insert_before->_Prev->_Next = _To_insert; _To_insert->_Prev = _Insert_before->_Prev; @@ -3320,58 +3318,62 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Insert_node(_Node_base* _Insert_befor } template -_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_New_node(_Node_type _Kind) { // allocate and link simple node +_Node_base* _Builder3<_FwdIt, _Elem, _RxTraits>::_New_node(_Node_type _Kind) { // allocate and link simple node + _Emit_str_node(); return _Link_node(new _Node_base(_Kind)); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_bol() { // add bol node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_bol() { // add bol node _New_node(_N_bol); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_eol() { // add eol node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_eol() { // add eol node _New_node(_N_eol); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_wbound() { // add wbound node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_wbound() { // add wbound node _New_node(_N_wbound); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_dot() { // add dot node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_dot() { // add dot node _New_node(_N_dot); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_str_node() { // add string node - _Link_node(new _Node_str<_Elem>); +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Emit_str_node() { // emit string node if necessary + if (_Chars.empty()) { + return; + } + + auto _Node = new _Node_str<_Elem>; + _Link_node(_Node); + _Node->_Data._Insert2(_Chars.data(), _Chars.data() + _Chars.size()); + _Chars.clear(); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char(_Elem _Ch) { // append character - if (_Current->_Kind != _N_str) { - _Add_str_node(); - } - +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_char(_Elem _Ch) { // append character if (_Flags & regex_constants::icase) { _Ch = _Traits.translate_nocase(_Ch); } else if (_Flags & regex_constants::collate) { _Ch = _Traits.translate(_Ch); } - _Node_str<_Elem>* _Node = static_cast<_Node_str<_Elem>*>(_Current); - _Node->_Data._Insert2(_Ch); + _Chars.push_back(_Ch); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_class() { // add bracket expression node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_class() { // add bracket expression node + _Emit_str_node(); _Link_node(new _Node_class<_Elem, _RxTraits>); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_bitmap(unsigned char _Ch) { // add character to accelerator table +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_char_to_bitmap(unsigned char _Ch) { // add character to accelerator table _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); if (!_Node->_Small) { @@ -3382,7 +3384,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_bitmap(unsigned char _Ch) } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // append character to character array +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // append character to character array _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); if (!_Node->_Large) { _Node->_Large = new _Buf<_Elem>; @@ -3392,7 +3394,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // app } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add character to bracket expression +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add character to bracket expression if (_Flags & regex_constants::icase) { _Ch = _Traits.translate_nocase(_Ch); } else if (_Flags & regex_constants::collate) { @@ -3408,7 +3410,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, const _Elem _Arg1) { +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, const _Elem _Arg1) { // add character range to set using _String_type = typename _RxTraits::string_type; using _Char_traits_type = typename _String_type::traits_type; @@ -3502,7 +3504,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, const _Elem _A } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_elts( +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_elts( _Node_class<_Elem, _RxTraits>* _Node, typename _RxTraits::char_class_type _Cl, bool _Negative) { // add characters in named class to set for (unsigned int _Ch = 0; _Ch < _Bmp_max; ++_Ch) { // add elements or their inverse @@ -3518,7 +3520,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_elts( } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_named_class( +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_named_class( typename _RxTraits::char_class_type _Cl, const _Rx_char_class_kind _Kind) { // add contents of named class to bracket expression using _Char_class_type = typename _RxTraits::char_class_type; @@ -3543,7 +3545,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_named_class( } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Char_to_elts(const _Elem* const _First, const _Elem* const _Last, +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Char_to_elts(const _Elem* const _First, const _Elem* const _Last, _Sequence<_Elem>** _Cur) { // add collation element to element sequence auto _Diff = static_cast(_Last - _First); while (*_Cur && _Diff < (*_Cur)->_Sz) { @@ -3560,7 +3562,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Char_to_elts(const _Elem* const _Firs } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_equiv(const _Elem* const _First, const _Elem* const _Last) { +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_equiv(const _Elem* const _First, const _Elem* const _Last) { // add elements of equivalence class to bracket expression _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); typename _RxTraits::string_type _Str = _Traits.transform_primary(_First, _Last); @@ -3588,7 +3590,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_equiv(const _Elem* const _First, } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_coll(const _Elem* const _First, const _Elem* const _Last) { +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_coll(const _Elem* const _First, const _Elem* const _Last) { // add collation element to bracket expression _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); _Sequence<_Elem>** _Cur = _STD addressof(_Node->_Coll); @@ -3596,12 +3598,13 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_coll(const _Elem* const _First, c } template -_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_group() { // add group node +_Node_base* _Builder3<_FwdIt, _Elem, _RxTraits>::_Begin_group() { // add group node return _New_node(_N_group); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_End_group(_Node_base* _Back) { // add end of group node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_End_group(_Node_base* _Back) { // add end of group node + _Emit_str_node(); _Node_type _Elt; if (_Back->_Kind == _N_group) { _Elt = _N_end_group; @@ -3615,14 +3618,15 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_End_group(_Node_base* _Back) { // add } template -_Node_assert* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_assert_group2(const bool _Neg) { // add assert node +_Node_assert* _Builder3<_FwdIt, _Elem, _RxTraits>::_Begin_assert_group2(const bool _Neg) { // add assert node + _Emit_str_node(); const auto _Node = new _Node_assert(_Neg ? _N_neg_assert : _N_assert); _Link_node(_Node); return _Node; } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_End_assert_group2(_Node_assert* const _Assert_start) { +void _Builder3<_FwdIt, _Elem, _RxTraits>::_End_assert_group2(_Node_assert* const _Assert_start) { // add end of assert node _End_group(_Assert_start); _Assert_start->_Child = _Assert_start->_Next; @@ -3631,17 +3635,20 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_End_assert_group2(_Node_assert* const } template -_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_capture_group(unsigned int _Idx) { // add capture group node +_Node_base* _Builder3<_FwdIt, _Elem, _RxTraits>::_Begin_capture_group(unsigned int _Idx) { // add capture group node + _Emit_str_node(); return _Link_node(new _Node_capture(_Idx)); } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_backreference(unsigned int _Idx) { // add back reference node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_backreference(unsigned int _Idx) { // add back reference node + _Emit_str_node(); _Link_node(new _Node_back(_Idx)); } template -_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { // add if node +_Node_base* _Builder3<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { // add if node + _Emit_str_node(); // append endif node _Node_base* _Res = new _Node_endif; _Link_node(_Res); @@ -3654,7 +3661,9 @@ _Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Else_if2(_Node_base* const _Start, _Node_base* const _End) { // add else node +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Else_if(_Node_base* const _Start, _Node_base* const _End) { // add else node + _Emit_str_node(); + auto _Parent = static_cast<_Node_if*>(_Start->_Next); while (_Parent->_Child) { _Parent = _Parent->_Child; @@ -3676,12 +3685,14 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Else_if2(_Node_base* const _Start, _N } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_rep(int _Min, int _Max, bool _Greedy) { // add repeat node - if (_Current->_Kind == _N_str - && static_cast<_Node_str<_Elem>*>(_Current)->_Data._Size() != 1) { // move final character to new string node - _Node_str<_Elem>* _Node = static_cast<_Node_str<_Elem>*>(_Current); - _Add_str_node(); - _Add_char(_Node->_Data._Del()); +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Add_rep(int _Min, int _Max, bool _Greedy) { // add repeat node + if (!_Chars.empty()) { + const _Elem _Last_char = _Chars.back(); + _Chars.pop_back(); + _Emit_str_node(); + auto _Node = new _Node_str<_Elem>; + _Link_node(_Node); + _Node->_Data._Insert2(_Last_char); } _Node_base* _Pos = _Current; @@ -3724,13 +3735,13 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_rep(int _Min, int _Max, bool _Gre } template -_Root_node* _Builder2<_FwdIt, _Elem, _RxTraits>::_End_pattern() { // wrap up +_Root_node* _Builder3<_FwdIt, _Elem, _RxTraits>::_End_pattern() { // wrap up _New_node(_N_end); return _Root; } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory +void _Builder3<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory _Destroy_node(_Root); _Root = nullptr; } @@ -5110,12 +5121,12 @@ _It _Matcher3<_Elem, _RxTraits, _It, _Alloc>::_Skip( } template -[[noreturn]] void _Parser2<_FwdIt, _Elem, _RxTraits>::_Error(regex_constants::error_type _Code) { // handle error +[[noreturn]] void _Parser3<_FwdIt, _Elem, _RxTraits>::_Error(regex_constants::error_type _Code) { // handle error _Xregex_error(_Code); } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes _Ch0 != _End +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes _Ch0 != _End return _Mode == _Lex_mode::_Default && ++_Ch0 != _End && ((!(_L_flags & _L_nex_grp) && (*_Ch0 == static_cast(_Meta_lpar) || *_Ch0 == static_cast(_Meta_rpar))) @@ -5124,7 +5135,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-character +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-character static constexpr char _Meta_map[] = {_Meta_lpar, _Meta_rpar, _Meta_dlr, _Meta_caret, _Meta_dot, _Meta_star, _Meta_plus, _Meta_query, _Meta_lsq, _Meta_rsq, _Meta_bar, _Meta_esc, _Meta_dash, _Meta_lbr, _Meta_rbr, _Meta_comma, _Meta_colon, _Meta_equal, _Meta_exc, _Meta_nl, _Meta_cr, _Meta_bsp, 0}; // array of meta chars @@ -5237,7 +5248,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-cha } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input character +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input character if (_Pat != _End) { // advance if (*_Pat == static_cast(_Meta_esc) && _Is_esc(_Pat)) { ++_Pat; @@ -5249,7 +5260,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input char } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants::error_type _Code) { +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants::error_type _Code) { // check whether current meta-character is _St if (_Mchar != _St) { _Error(_Code); @@ -5259,7 +5270,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants } template -int _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_digits( +int _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_digits( int _Base, int _Initial, int _Count, regex_constants::error_type _Error_type) { // translate digits to numeric value int _Chv; _Val = _Initial; @@ -5276,25 +5287,25 @@ int _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_digits( } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_DecimalDigits( +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_DecimalDigits( const regex_constants::error_type _Error_type, const int _Initial /* = 0 */) { // check for decimal value return _Do_digits(10, _Initial, INT_MAX, _Error_type) != INT_MAX; } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_HexDigits(int _Count) { // check for _Count hex digits +void _Parser3<_FwdIt, _Elem, _RxTraits>::_HexDigits(int _Count) { // check for _Count hex digits if (_Do_digits(16, 0, _Count, regex_constants::error_escape) != 0) { _Error(regex_constants::error_escape); } } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 octal digits +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 octal digits return _Do_digits(8, 0, 3, regex_constants::error_escape) != 3; } template -_Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ex_class( +_Prs_ret _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_ex_class( _Meta_type _End_arg) { // handle delimited expressions within bracket expression const regex_constants::error_type _Errtype = _End_arg == _Meta_colon ? regex_constants::error_ctype : regex_constants::error_collate; @@ -5363,7 +5374,7 @@ _Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ex_class( } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // check for character class escape +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // check for character class escape typename _RxTraits::char_class_type _Cls; _FwdIt _Ch0 = _Pat; if (_Ch0 == _End || (_Cls = _Traits.lookup_classname(_Pat, ++_Ch0, (_Flags & regex_constants::icase) != 0)) == 0) { @@ -5392,7 +5403,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // } template -_Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassEscape() { // check for class escape +_Prs_ret _Parser3<_FwdIt, _Elem, _RxTraits>::_ClassEscape() { // check for class escape if ((_L_flags & _L_esc_bsp) && _Char == static_cast(_Esc_ctrl_b)) { // handle backspace escape _Next(); _Unescaped_char = static_cast<_Elem>(static_cast(_Meta_bsp)); @@ -5416,7 +5427,7 @@ _Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassEscape() { // check for class } template -_Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom +_Prs_ret _Parser3<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom if (_Mchar == _Meta_esc && (_L_flags & _L_grp_esc)) { // check for valid escape sequence _Next(); return _ClassEscape(); @@ -5444,7 +5455,7 @@ _Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { / } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges +void _Parser3<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges _Prs_ret _Ret; bool _Initial = true; @@ -5494,7 +5505,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid cla } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expression +void _Parser3<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expression _Nfa._Add_class(); if (_Mchar == _Meta_caret) { // negate bracket expression _Nfa._Negate(); @@ -5505,7 +5516,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expr } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_capture_group() { // add capture group +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_capture_group() { // add capture group ++_Grp_idx; if (_Grp_idx >= 1000) { // hardcoded limit @@ -5520,21 +5531,21 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_capture_group() { // add capture gr } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_noncapture_group() { // add non-capture group +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_noncapture_group() { // add non-capture group _Node_base* _Pos1 = _Nfa._Begin_group(); _Disjunction(); _Nfa._End_group(_Pos1); } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_assert_group(const bool _Neg) { // add assert group +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_assert_group(const bool _Neg) { // add assert group const auto _Assert_start = _Nfa._Begin_assert_group2(_Neg); _Disjunction(); _Nfa._End_assert_group2(_Assert_start); } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunction inside group +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunction inside group ++_Disj_count; if (_Disj_count >= 1000) { // hardcoded limit _Error(regex_constants::error_stack); @@ -5570,7 +5581,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunc } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_class) const { +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_class) const { // check for valid identity escape auto _Uchar = static_cast(_Char); @@ -5629,7 +5640,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_cl } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_class) { +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_class) { // check whether an escape is valid, and process it if so if (_IsIdentityEscape(_In_character_class)) { _Unescaped_char = _Char; @@ -5641,7 +5652,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_clas } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ffn(_Elem _Ch) { // check for limited file format escape characters +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_ffn(_Elem _Ch) { // check for limited file format escape characters if (_Ch == static_cast(_Esc_ctrl_f)) { _Val = '\f'; } else if (_Ch == static_cast(_Esc_ctrl_n)) { @@ -5660,7 +5671,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ffn(_Elem _Ch) { // check for limit } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the remaining file format escape characters +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the remaining file format escape characters if (_Ch == static_cast(_Esc_ctrl_a)) { _Val = '\a'; } else if (_Ch == static_cast(_Esc_ctrl_b)) { @@ -5673,7 +5684,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_class) { +bool _Parser3<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_class) { // check for valid character escape if (_Mchar == _Meta_eos) { _Error(regex_constants::error_escape); @@ -5724,7 +5735,7 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_cla } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape +void _Parser3<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape if ((_L_flags & (_L_bzr_chr | _L_bckr)) && (_Val = _Traits.value(_Char, 10)) != -1) { // escaped decimal sequence _Next(); if ((_L_flags & _L_bzr_chr) && _Val == 0) { // handle \0 @@ -5754,7 +5765,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier following atom +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier following atom int _Min = 0; int _Max = -1; if (_Mchar != _Meta_star) { @@ -5800,7 +5811,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Alternative2() { // check for valid alternative +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alternative bool _Found = false; while (_Mchar != _Meta_eos && _Mchar != _Meta_bar && (_Mchar != _Meta_rpar || _Disj_count == 0)) { // concatenate valid elements @@ -5868,23 +5879,23 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Alternative2() { // check for valid al } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disjunction +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disjunction _Node_base* _Pos1 = _Nfa._Getmark(); - _Alternative2(); + _Alternative(); if (_Mchar == _Meta_bar) { // at least one more alternative _Node_base* _Pos2 = _Nfa._Begin_if(_Pos1); do { // append terms as long as we keep finding | characters _Next(); - _Alternative2(); - _Nfa._Else_if2(_Pos1, _Pos2); + _Alternative(); + _Nfa._Else_if(_Pos1, _Pos2); } while (_Mchar == _Meta_bar); } } template -void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( +void _Parser3<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( _Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep, const bool _Nonreentrant) { // walks regex NFA, calculates values of _Node_rep::_Simple_loop for (; _Nx != _Ne && _Nx; _Nx = _Nx->_Next) { @@ -5995,7 +6006,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( } template -_Root_node* _Parser2<_FwdIt, _Elem, _RxTraits>::_Compile() { // compile regular expression +_Root_node* _Parser3<_FwdIt, _Elem, _RxTraits>::_Compile() { // compile regular expression _Root_node* _Res = nullptr; _Tidy_guard _Guard{_STD addressof(_Nfa)}; _Node_base* _Pos1 = _Nfa._Begin_capture_group(0); @@ -6012,7 +6023,7 @@ _Root_node* _Parser2<_FwdIt, _Elem, _RxTraits>::_Compile() { // compile regular } template -_Parser2<_FwdIt, _Elem, _RxTraits>::_Parser2( +_Parser3<_FwdIt, _Elem, _RxTraits>::_Parser3( const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast, regex_constants::syntax_option_type _Fx) : _Pat(_Pfirst), _End(_Plast), _Nfa(_Tr, _Fx), _Traits(_Tr), _Flags(_Fx) { diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index bea5aa2d48..21ed86e57d 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -2652,6 +2652,33 @@ void test_gh_6267() { } } +void test_gh_6289() { + // GH-6289: Emit complete _N_str nodes only during NFA construction + g_regexTester.should_match("ab", "ab"); + g_regexTester.should_not_match("", "ab"); + g_regexTester.should_match("\na", "\n^a", multiline); + g_regexTester.should_match("a\nb", "a$\nb", multiline); + g_regexTester.should_match("a ", R"(a\b )"); + g_regexTester.should_match("ab", R"(a\Bb)"); + g_regexTester.should_match("ab", "a."); + g_regexTester.should_match("ab", "a[b]"); + g_regexTester.should_match("a", "a(?:b)*"); + g_regexTester.should_match("ab", "a(?:b)*"); + g_regexTester.should_match("abb", "a(?:b)*"); + g_regexTester.should_match("abc", "a(?=b).."); + g_regexTester.should_not_match("aab", "a(?=b).."); + g_regexTester.should_capture("a", "a(b)*", ""); + g_regexTester.should_capture("ab", "a(b)*", "b"); + g_regexTester.should_capture("abb", "a(b)*", "b"); + g_regexTester.should_match("aba", R"((a)b\1)"); + g_regexTester.should_match("a", "a|b|c"); + g_regexTester.should_match("b", "a|b|c"); + g_regexTester.should_match("c", "a|b|c"); + g_regexTester.should_not_match("ab", "a|b|c"); + g_regexTester.should_not_match("bc", "a|b|c"); + g_regexTester.should_not_match("abc", "a|b|c"); +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -2722,6 +2749,7 @@ int main() { test_gh_6249(); test_gh_6262(); test_gh_6267(); + test_gh_6289(); return g_regexTester.result(); }