2011-03-13 05:46:29 -07:00
|
|
|
/*
|
|
|
|
* (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
|
|
|
|
* Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
|
|
|
|
* Copyright 2009-2010 Canonical Ltd.
|
|
|
|
*
|
|
|
|
* The libapparmor library is licensed under the terms of the GNU
|
|
|
|
* Lesser General Public License, version 2.1. Please see the file
|
|
|
|
* COPYING.LGPL.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Parsing of regular expression into expression trees as implemented in
|
|
|
|
* expr-tree
|
|
|
|
*/
|
|
|
|
|
|
|
|
%{
|
|
|
|
/* #define DEBUG_TREE */
|
2011-03-13 05:58:54 -07:00
|
|
|
#include "expr-tree.h"
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
%}
|
|
|
|
|
|
|
|
%union {
|
2011-03-13 05:58:54 -07:00
|
|
|
char c;
|
|
|
|
Node *node;
|
|
|
|
Chars *cset;
|
2011-03-13 05:46:29 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
%{
|
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
void regex_error(Node **, const char *, const char *);
|
|
|
|
#define YYLEX_PARAM &text
|
|
|
|
int regex_lex(YYSTYPE *, const char **);
|
|
|
|
|
|
|
|
static inline Chars *insert_char(Chars* cset, uchar a)
|
|
|
|
{
|
2011-03-13 05:46:29 -07:00
|
|
|
cset->insert(a);
|
|
|
|
return cset;
|
2011-03-13 05:58:54 -07:00
|
|
|
}
|
2011-03-13 05:46:29 -07:00
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
static inline Chars* insert_char_range(Chars* cset, uchar a, uchar b)
|
|
|
|
{
|
2011-03-13 05:46:29 -07:00
|
|
|
if (a > b)
|
2011-03-13 05:58:54 -07:00
|
|
|
swap(a, b);
|
2011-03-13 05:46:29 -07:00
|
|
|
for (uchar i = a; i <= b; i++)
|
2011-03-13 05:58:54 -07:00
|
|
|
cset->insert(i);
|
2011-03-13 05:46:29 -07:00
|
|
|
return cset;
|
2011-03-13 05:58:54 -07:00
|
|
|
}
|
|
|
|
|
2011-03-13 05:46:29 -07:00
|
|
|
%}
|
|
|
|
|
|
|
|
%pure-parser
|
|
|
|
/* %error-verbose */
|
|
|
|
%parse-param {Node **root}
|
|
|
|
%parse-param {const char *text}
|
|
|
|
%name-prefix = "regex_"
|
|
|
|
|
|
|
|
%token <c> CHAR
|
|
|
|
%type <c> regex_char cset_char1 cset_char cset_charN
|
|
|
|
%type <cset> charset cset_chars
|
|
|
|
%type <node> regex expr terms0 terms qterm term
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Note: destroy all nodes upon failure, but *not* the start symbol once
|
|
|
|
* parsing succeeds!
|
|
|
|
*/
|
|
|
|
%destructor { $$->release(); } expr terms0 terms qterm term
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
/* FIXME: Does not parse "[--]", "[---]", "[^^-x]". I don't actually know
|
|
|
|
which precise grammer Perl regexs use, and rediscovering that
|
|
|
|
is proving to be painful. */
|
|
|
|
|
|
|
|
regex : /* empty */ { *root = $$ = &epsnode; }
|
|
|
|
| expr { *root = $$ = $1; }
|
|
|
|
;
|
|
|
|
|
|
|
|
expr : terms
|
|
|
|
| expr '|' terms0 { $$ = new AltNode($1, $3); }
|
|
|
|
| '|' terms0 { $$ = new AltNode(&epsnode, $2); }
|
|
|
|
;
|
|
|
|
|
|
|
|
terms0 : /* empty */ { $$ = &epsnode; }
|
|
|
|
| terms
|
|
|
|
;
|
|
|
|
|
|
|
|
terms : qterm
|
|
|
|
| terms qterm { $$ = new CatNode($1, $2); }
|
|
|
|
;
|
|
|
|
|
|
|
|
qterm : term
|
|
|
|
| term '*' { $$ = new StarNode($1); }
|
|
|
|
| term '+' { $$ = new PlusNode($1); }
|
|
|
|
;
|
|
|
|
|
|
|
|
term : '.' { $$ = new AnyCharNode; }
|
|
|
|
| regex_char { $$ = new CharNode($1); }
|
|
|
|
| '[' charset ']' { $$ = new CharSetNode(*$2);
|
|
|
|
delete $2; }
|
|
|
|
| '[' '^' charset ']'
|
|
|
|
{ $$ = new NotCharSetNode(*$3);
|
|
|
|
delete $3; }
|
|
|
|
| '[' '^' '^' cset_chars ']'
|
|
|
|
{ $4->insert('^');
|
|
|
|
$$ = new NotCharSetNode(*$4);
|
|
|
|
delete $4; }
|
|
|
|
| '(' regex ')' { $$ = $2; }
|
|
|
|
;
|
|
|
|
|
|
|
|
regex_char : CHAR
|
|
|
|
| '^' { $$ = '^'; }
|
|
|
|
| '-' { $$ = '-'; }
|
|
|
|
| ']' { $$ = ']'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
charset : cset_char1 cset_chars
|
|
|
|
{ $$ = insert_char($2, $1); }
|
|
|
|
| cset_char1 '-' cset_charN cset_chars
|
|
|
|
{ $$ = insert_char_range($4, $1, $3); }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_chars : /* nothing */ { $$ = new Chars; }
|
|
|
|
| cset_chars cset_charN
|
|
|
|
{ $$ = insert_char($1, $2); }
|
|
|
|
| cset_chars cset_charN '-' cset_charN
|
|
|
|
{ $$ = insert_char_range($1, $2, $4); }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_char1 : cset_char
|
|
|
|
| ']' { $$ = ']'; }
|
|
|
|
| '-' { $$ = '-'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_charN : cset_char
|
|
|
|
| '^' { $$ = '^'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_char : CHAR
|
|
|
|
| '[' { $$ = '['; }
|
|
|
|
| '*' { $$ = '*'; }
|
|
|
|
| '+' { $$ = '+'; }
|
|
|
|
| '.' { $$ = '.'; }
|
|
|
|
| '|' { $$ = '|'; }
|
|
|
|
| '(' { $$ = '('; }
|
|
|
|
| ')' { $$ = ')'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
int octdigit(char c)
|
2011-03-13 05:46:29 -07:00
|
|
|
{
|
2011-03-13 05:58:54 -07:00
|
|
|
if (c >= '0' && c <= '7')
|
|
|
|
return c - '0';
|
|
|
|
return -1;
|
2011-03-13 05:46:29 -07:00
|
|
|
}
|
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
int hexdigit(char c)
|
2011-03-13 05:46:29 -07:00
|
|
|
{
|
2011-03-13 05:58:54 -07:00
|
|
|
if (c >= '0' && c <= '9')
|
|
|
|
return c - '0';
|
|
|
|
else if (c >= 'A' && c <= 'F')
|
|
|
|
return 10 + c - 'A';
|
|
|
|
else if (c >= 'a' && c <= 'f')
|
2012-02-24 04:20:46 -08:00
|
|
|
return 10 + c - 'a';
|
2011-03-13 05:58:54 -07:00
|
|
|
else
|
|
|
|
return -1;
|
2011-03-13 05:46:29 -07:00
|
|
|
}
|
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
int regex_lex(YYSTYPE *val, const char **pos)
|
2011-03-13 05:46:29 -07:00
|
|
|
{
|
2011-03-13 05:58:54 -07:00
|
|
|
int c;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = **pos;
|
|
|
|
switch(*(*pos)++) {
|
2011-03-13 05:46:29 -07:00
|
|
|
case '\0':
|
2011-03-13 05:58:54 -07:00
|
|
|
(*pos)--;
|
|
|
|
return 0;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case '*': case '+': case '.': case '|': case '^': case '-':
|
|
|
|
case '[': case ']': case '(' : case ')':
|
2011-03-13 05:58:54 -07:00
|
|
|
return *(*pos - 1);
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case '\\':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = **pos;
|
|
|
|
switch(*(*pos)++) {
|
2011-03-13 05:46:29 -07:00
|
|
|
case '\0':
|
2011-03-13 05:58:54 -07:00
|
|
|
(*pos)--;
|
|
|
|
/* fall through */
|
2011-03-13 05:46:29 -07:00
|
|
|
case '\\':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = '\\';
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case '0':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = 0;
|
|
|
|
if ((c = octdigit(**pos)) >= 0) {
|
|
|
|
val->c = c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
if ((c = octdigit(**pos)) >= 0) {
|
|
|
|
val->c = (val->c << 3) + c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
if ((c = octdigit(**pos)) >= 0) {
|
|
|
|
val->c = (val->c << 3) + c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 'x':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = 0;
|
|
|
|
if ((c = hexdigit(**pos)) >= 0) {
|
|
|
|
val->c = c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
if ((c = hexdigit(**pos)) >= 0) {
|
|
|
|
val->c = (val->c << 4) + c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 'a':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = '\a';
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 'e':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = 033 /* ESC */;
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 'f':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = '\f';
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 'n':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = '\n';
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 'r':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = '\r';
|
|
|
|
break;
|
2011-03-13 05:46:29 -07:00
|
|
|
|
|
|
|
case 't':
|
2011-03-13 05:58:54 -07:00
|
|
|
val->c = '\t';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return CHAR;
|
2011-03-13 05:46:29 -07:00
|
|
|
}
|
|
|
|
|
2011-03-13 05:58:54 -07:00
|
|
|
void regex_error(Node ** __attribute__((unused)),
|
|
|
|
const char *text __attribute__((unused)),
|
|
|
|
const char *error __attribute__((unused)))
|
2011-03-13 05:46:29 -07:00
|
|
|
{
|
2011-03-13 05:58:54 -07:00
|
|
|
/* We don't want the library to print error messages. */
|
2011-03-13 05:46:29 -07:00
|
|
|
}
|