Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<regex>: Revise caret parsing in basic and grep mode #5165

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -3874,10 +3874,9 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char
break;

case _Meta_caret:
if ((_L_flags & _L_anch_rstr) && !_Nfa._Beg_expr()) {
_Mchar = _Meta_chr;
}

// A caret can always negate a bracket expression,
// but _L_anch_rstr (used by basic/grep) restricts caret anchors to the beginning.
// We'll handle that restriction when we're about to add a bol node.
break;

case _Meta_dlr:
Expand Down Expand Up @@ -4435,7 +4434,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte
_Next();
_Quant = _Wrapped_disjunction();
_Expect(_Meta_rpar, regex_constants::error_paren);
} else if (_Mchar == _Meta_caret) { // add bol node
} else if (_Mchar == _Meta_caret && (!(_L_flags & _L_anch_rstr) || _Nfa._Beg_expr())) { // add bol node
_Nfa._Add_bol();
_Next();
_Quant = false;
Expand Down
38 changes: 38 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,43 @@ void test_gh_5160() {
neg_regex.should_search_fail(L"xxxYxx\x2009xxxZxxx"); // U+2009 THIN SPACE
}

void test_gh_5165_syntax_option(const syntax_option_type basic_or_grep) {
g_regexTester.should_not_match("yx", "y[^x]", basic_or_grep);
g_regexTester.should_match("yz", "y[^x]", basic_or_grep);
g_regexTester.should_match("y^", "y[^x]", basic_or_grep);

g_regexTester.should_match("yx", "y[x^]", basic_or_grep);
g_regexTester.should_not_match("yz", "y[x^]", basic_or_grep);
g_regexTester.should_match("y^", "y[x^]", basic_or_grep);

g_regexTester.should_not_match("yx", "y[^x^]", basic_or_grep);
g_regexTester.should_match("yz", "y[^x^]", basic_or_grep);
g_regexTester.should_not_match("y^", "y[^x^]", basic_or_grep);

{
const test_regex no_anchor(&g_regexTester, "meo[wW]", basic_or_grep);
no_anchor.should_search_match("meow_machine", "meow");
no_anchor.should_search_match("homeowner", "meow");
}
{
const test_regex beginning_anchor(&g_regexTester, "^meo[wW]", basic_or_grep);
beginning_anchor.should_search_match("meow_machine", "meow");
beginning_anchor.should_search_fail("homeowner");
}
{
const test_regex middle_anchor(&g_regexTester, "me^o[wW]", basic_or_grep);
middle_anchor.should_search_fail("meow_machine");
middle_anchor.should_search_fail("homeowner");
middle_anchor.should_search_match("home^owner", "me^ow");
}
}

void test_gh_5165() {
// GH-5165: Caret ^ should negate character classes in basic regular expressions
test_gh_5165_syntax_option(basic);
test_gh_5165_syntax_option(grep);
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -699,6 +736,7 @@ int main() {
test_gh_4995();
test_gh_5058();
test_gh_5160();
test_gh_5165();

return g_regexTester.result();
}