parser: fix alternation expansions that occur inside character classes

The parser was converting alternation characters ('{', '}', and ',')
to their pcre versions ('(', ')', and '|', respectively) that occurred
inside of character class patterns (i.e. inside '[ ]'). This patch
fixes the issue and adds a few unit tests around character classes.

Signed-off-by: Steve Beattie <steve@nxnw.org>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
This commit is contained in:
Steve Beattie 2013-12-10 12:22:32 -08:00
parent 9a4b1c6493
commit 2e8f7fff7c

View file

@ -251,17 +251,22 @@ static pattern_t convert_aaregex_to_pcre(const char *aare, int anchor,
/* { is a PCRE special character */ /* { is a PCRE special character */
STORE("\\{", dptr, 2); STORE("\\{", dptr, 2);
} else { } else {
update_re_pos(sptr - aare); if (incharclass) {
ingrouping++; /* don't expand inside [] */
if (ingrouping >= MAX_ALT_DEPTH) { STORE("{", dptr, 1);
error = e_parse_error;
PERROR(_("%s: Regex grouping error: Exceeded maximum nesting of {}\n"), progname);
} else { } else {
grouping_count[ingrouping] = 0; update_re_pos(sptr - aare);
ptype = ePatternRegex; ingrouping++;
STORE("(", dptr, 1); if (ingrouping >= MAX_ALT_DEPTH) {
} error = e_parse_error;
PERROR(_("%s: Regex grouping error: Exceeded maximum nesting of {}\n"), progname);
} else {
grouping_count[ingrouping] = 0;
ptype = ePatternRegex;
STORE("(", dptr, 1);
}
} /* incharclass */
} }
break; break;
@ -270,31 +275,43 @@ static pattern_t convert_aaregex_to_pcre(const char *aare, int anchor,
/* { is a PCRE special character */ /* { is a PCRE special character */
STORE("\\}", dptr, 2); STORE("\\}", dptr, 2);
} else { } else {
if (grouping_count[ingrouping] == 0) { if (incharclass) {
error = e_parse_error; /* don't expand inside [] */
PERROR(_("%s: Regex grouping error: Invalid number of items between {}\n"), progname); STORE("}", dptr, 1);
} else {
if (grouping_count[ingrouping] == 0) {
error = e_parse_error;
PERROR(_("%s: Regex grouping error: Invalid number of items between {}\n"), progname);
} }
ingrouping--; ingrouping--;
if (ingrouping < 0) { if (ingrouping < 0) {
error = e_parse_error; error = e_parse_error;
PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname); PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname);
ingrouping = 0; ingrouping = 0;
} }
STORE(")", dptr, 1); STORE(")", dptr, 1);
} /* incharclass */
} /* bEscape */ } /* bEscape */
break; break;
case ',': case ',':
if (bEscape) { if (bEscape) {
/* , is not a PCRE regex character if (incharclass) {
* so no need to escape, just skip /* escape inside char class is a
* transform * valid matching char for '\'
*/ */
STORE(sptr, dptr, 1); STORE("\\,", dptr, 2);
} else {
/* ',' is not a PCRE regex character
* so no need to escape, just skip
* transform
*/
STORE(sptr, dptr, 1);
}
} else { } else {
if (ingrouping) { if (ingrouping && !incharclass) {
grouping_count[ingrouping]++; grouping_count[ingrouping]++;
STORE("|", dptr, 1); STORE("|", dptr, 1);
} else { } else {
@ -1419,6 +1436,18 @@ static int test_aaregex_to_pcre(void)
MY_REGEX_TEST("\\\\(", "\\\\\\(", ePatternBasic); MY_REGEX_TEST("\\\\(", "\\\\\\(", ePatternBasic);
MY_REGEX_TEST("\\\\)", "\\\\\\)", ePatternBasic); MY_REGEX_TEST("\\\\)", "\\\\\\)", ePatternBasic);
/* more complicated character class tests */
/* -- embedded alternations */
MY_REGEX_TEST("b[\\lor]t", "b[lor]t", ePatternRegex);
MY_REGEX_TEST("b[{a,b}]t", "b[{a,b}]t", ePatternRegex);
MY_REGEX_TEST("{alpha,b[{a,b}]t,gamma}", "(alpha|b[{a,b}]t|gamma)", ePatternRegex);
/* pcre will ignore the '\' before '\{', but it should be okay
* for us to pass this on to pcre as '\{' */
MY_REGEX_TEST("b[\\{a,b\\}]t", "b[\\{a,b\\}]t", ePatternRegex);
MY_REGEX_TEST("{alpha,b[\\{a,b\\}]t,gamma}", "(alpha|b[\\{a,b\\}]t|gamma)", ePatternRegex);
MY_REGEX_TEST("{alpha,b[\\{a\\,b\\}]t,gamma}", "(alpha|b[\\{a\\,b\\}]t|gamma)", ePatternRegex);
return rc; return rc;
} }