2007-02-27 02:29:16 +00:00
|
|
|
/*
|
|
|
|
* regexp.y -- Regular Expression Matcher Generator
|
2007-04-11 08:12:51 +00:00
|
|
|
* (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
|
2007-02-27 02:29:16 +00:00
|
|
|
*
|
|
|
|
* Implementation based on the Lexical Analysis chapter of:
|
|
|
|
* Alfred V. Aho, Ravi Sethi, Jeffrey D. Ullman:
|
|
|
|
* Compilers: Principles, Techniques, and Tools (The "Dragon Book"),
|
|
|
|
* Addison-Wesley, 1986.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* See http://www.gnu.org for more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
%{
|
2009-07-24 07:33:09 +00:00
|
|
|
/* #define DEBUG_TREE */
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
#include <list>
|
|
|
|
#include <vector>
|
|
|
|
#include <set>
|
|
|
|
#include <map>
|
|
|
|
#include <ostream>
|
2008-11-07 13:00:05 +00:00
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
typedef unsigned char uchar;
|
|
|
|
typedef set<uchar> Chars;
|
|
|
|
|
|
|
|
ostream& operator<<(ostream& os, uchar c);
|
|
|
|
|
|
|
|
/* Compute the union of two sets. */
|
|
|
|
template<class T>
|
|
|
|
set<T> operator+(const set<T>& a, const set<T>& b)
|
|
|
|
{
|
|
|
|
set<T> c(a);
|
|
|
|
c.insert(b.begin(), b.end());
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
* When creating DFAs from regex trees, a DFA state is constructed from
|
|
|
|
* a set of important nodes in the syntax tree. This includes AcceptNodes,
|
|
|
|
* which indicate that when a match ends in a particular state, the
|
|
|
|
* regular expressions that the AcceptNode belongs to match.
|
2007-02-27 02:29:16 +00:00
|
|
|
*/
|
|
|
|
class ImportantNode;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
typedef set <ImportantNode *> NodeSet;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
/**
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
* Out-edges from a state to another: we store the follow-set of Nodes
|
2007-02-27 02:29:16 +00:00
|
|
|
* for each input character that is not a default match in
|
|
|
|
* cases (i.e., following a CharNode or CharSetNode), and default
|
|
|
|
* matches in otherwise as well as in all matching explicit cases
|
|
|
|
* (i.e., following an AnyCharNode or NotCharSetNode). This avoids
|
|
|
|
* enumerating all the explicit tranitions for default matches.
|
|
|
|
*/
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
typedef struct NodeCases {
|
|
|
|
typedef map<uchar, NodeSet *>::iterator iterator;
|
2007-02-27 02:29:16 +00:00
|
|
|
iterator begin() { return cases.begin(); }
|
|
|
|
iterator end() { return cases.end(); }
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeCases() : otherwise(0) { }
|
|
|
|
map<uchar, NodeSet *> cases;
|
|
|
|
NodeSet *otherwise;
|
|
|
|
} NodeCases;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2008-11-07 13:00:05 +00:00
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/* An abstract node in the syntax tree. */
|
|
|
|
class Node {
|
|
|
|
public:
|
|
|
|
Node() :
|
2010-11-09 11:28:22 -08:00
|
|
|
nullable(false) { child[0] = child[1] = 0; }
|
2007-02-27 02:29:16 +00:00
|
|
|
Node(Node *left) :
|
2010-11-09 11:28:22 -08:00
|
|
|
nullable(false) { child[0] = left; child[1] = 0; }
|
2007-02-27 02:29:16 +00:00
|
|
|
Node(Node *left, Node *right) :
|
2010-11-09 11:28:22 -08:00
|
|
|
nullable(false) { child[0] = left; child[1] = right; }
|
2007-02-27 02:29:16 +00:00
|
|
|
virtual ~Node()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
if (child[0])
|
|
|
|
child[0]->release();
|
|
|
|
if (child[1])
|
|
|
|
child[1]->release();
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* See the "Dragon Book" for an explanation of nullable, firstpos,
|
|
|
|
* lastpos, and followpos.
|
|
|
|
*/
|
|
|
|
virtual void compute_nullable() { }
|
|
|
|
virtual void compute_firstpos() = 0;
|
|
|
|
virtual void compute_lastpos() = 0;
|
|
|
|
virtual void compute_followpos() { }
|
2008-11-07 13:00:05 +00:00
|
|
|
virtual int eq(Node *other) = 0;
|
2007-02-27 02:29:16 +00:00
|
|
|
virtual ostream& dump(ostream& os) = 0;
|
|
|
|
|
|
|
|
bool nullable;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet firstpos, lastpos, followpos;
|
2008-11-07 13:00:05 +00:00
|
|
|
/* child 0 is left, child 1 is right */
|
|
|
|
Node *child[2];
|
2007-03-30 14:13:56 +00:00
|
|
|
|
2010-07-10 17:52:13 -07:00
|
|
|
unsigned int label; /* unique number for debug etc */
|
2007-03-30 14:13:56 +00:00
|
|
|
/**
|
2010-11-09 11:28:22 -08:00
|
|
|
* We indirectly release Nodes through a virtual function because
|
|
|
|
* accept and Eps Nodes are shared, and must be treated specially.
|
|
|
|
* We could use full reference counting here but the indirect release
|
|
|
|
* is sufficient and has less overhead
|
2007-03-30 14:13:56 +00:00
|
|
|
*/
|
2010-11-09 11:28:22 -08:00
|
|
|
virtual void release(void) {
|
|
|
|
delete this;
|
2007-03-30 14:13:56 +00:00
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
};
|
|
|
|
|
2010-11-09 11:34:59 -08:00
|
|
|
class InnerNode : public Node {
|
|
|
|
public:
|
|
|
|
InnerNode() : Node() { };
|
|
|
|
InnerNode(Node *left) : Node(left) {};
|
|
|
|
InnerNode(Node *left, Node *right) : Node(left, right) { };
|
|
|
|
};
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/* Match nothing (//). */
|
|
|
|
class EpsNode : public Node {
|
|
|
|
public:
|
|
|
|
EpsNode()
|
|
|
|
{
|
|
|
|
nullable = true;
|
2010-08-04 09:53:46 -07:00
|
|
|
label = 0;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2010-11-09 11:28:22 -08:00
|
|
|
void release(void)
|
|
|
|
{
|
|
|
|
/* don't delete Eps nodes because there is a single static instance
|
|
|
|
* shared by all trees. Look for epsnode in the code
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
void compute_firstpos()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
void compute_lastpos()
|
|
|
|
{
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<EpsNode *>(other))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
|
|
|
return os << "[]";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Leaf nodes in the syntax tree are important to us: they describe the
|
|
|
|
* characters that the regular expression matches. We also consider
|
|
|
|
* AcceptNodes import: they indicate when a regular expression matches.
|
|
|
|
*/
|
|
|
|
class ImportantNode : public Node {
|
|
|
|
public:
|
|
|
|
ImportantNode() { }
|
|
|
|
void compute_firstpos()
|
|
|
|
{
|
|
|
|
firstpos.insert(this);
|
|
|
|
}
|
|
|
|
void compute_lastpos() {
|
|
|
|
lastpos.insert(this);
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
virtual void follow(NodeCases& cases) = 0;
|
2007-02-27 02:29:16 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Match one specific character (/c/). */
|
|
|
|
class CharNode : public ImportantNode {
|
|
|
|
public:
|
|
|
|
CharNode(uchar c) : c(c) { }
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
void follow(NodeCases& cases)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet **x = &cases.cases[c];
|
2007-02-27 02:29:16 +00:00
|
|
|
if (!*x) {
|
|
|
|
if (cases.otherwise)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*x = new NodeSet(*cases.otherwise);
|
2007-02-27 02:29:16 +00:00
|
|
|
else
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*x = new NodeSet;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
(*x)->insert(followpos.begin(), followpos.end());
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
CharNode *o = dynamic_cast<CharNode *>(other);
|
|
|
|
if (o) {
|
|
|
|
return c == o->c;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
|
|
|
return os << c;
|
|
|
|
}
|
|
|
|
|
|
|
|
uchar c;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Match a set of characters (/[abc]/). */
|
|
|
|
class CharSetNode : public ImportantNode {
|
|
|
|
public:
|
|
|
|
CharSetNode(Chars& chars) : chars(chars) { }
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
void follow(NodeCases& cases)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
for (Chars::iterator i = chars.begin(); i != chars.end(); i++) {
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet **x = &cases.cases[*i];
|
2007-02-27 02:29:16 +00:00
|
|
|
if (!*x) {
|
|
|
|
if (cases.otherwise)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*x = new NodeSet(*cases.otherwise);
|
2007-02-27 02:29:16 +00:00
|
|
|
else
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*x = new NodeSet;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
(*x)->insert(followpos.begin(), followpos.end());
|
|
|
|
}
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
CharSetNode *o = dynamic_cast<CharSetNode *>(other);
|
|
|
|
if (!o || chars.size() != o->chars.size())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (Chars::iterator i = chars.begin(), j = o->chars.begin();
|
|
|
|
i != chars.end() && j != o->chars.end();
|
|
|
|
i++, j++) {
|
|
|
|
if (*i != *j)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
|
|
|
os << '[';
|
|
|
|
for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
|
|
|
|
os << *i;
|
|
|
|
return os << ']';
|
|
|
|
}
|
|
|
|
|
|
|
|
Chars chars;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Match all except one character (/[^abc]/). */
|
|
|
|
class NotCharSetNode : public ImportantNode {
|
|
|
|
public:
|
|
|
|
NotCharSetNode(Chars& chars) : chars(chars) { }
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
void follow(NodeCases& cases)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
if (!cases.otherwise)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
cases.otherwise = new NodeSet;
|
2007-02-27 02:29:16 +00:00
|
|
|
for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet **x = &cases.cases[*j];
|
2007-02-27 02:29:16 +00:00
|
|
|
if (!*x)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*x = new NodeSet(*cases.otherwise);
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
/**
|
|
|
|
* Note: Add to the nonmatching characters after copying away the
|
|
|
|
* old otherwise state for the matching characters.
|
|
|
|
*/
|
|
|
|
cases.otherwise->insert(followpos.begin(), followpos.end());
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (NodeCases::iterator i = cases.begin(); i != cases.end(); i++) {
|
2007-02-27 02:29:16 +00:00
|
|
|
if (chars.find(i->first) == chars.end())
|
|
|
|
i->second->insert(followpos.begin(), followpos.end());
|
|
|
|
}
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
NotCharSetNode *o = dynamic_cast<NotCharSetNode *>(other);
|
|
|
|
if (!o || chars.size() != o->chars.size())
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (Chars::iterator i = chars.begin(), j = o->chars.begin();
|
|
|
|
i != chars.end() && j != o->chars.end();
|
|
|
|
i++, j++) {
|
|
|
|
if (*i != *j)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
|
|
|
os << "[^";
|
|
|
|
for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
|
|
|
|
os << *i;
|
|
|
|
return os << ']';
|
|
|
|
}
|
|
|
|
|
|
|
|
Chars chars;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Match any character (/./). */
|
|
|
|
class AnyCharNode : public ImportantNode {
|
|
|
|
public:
|
|
|
|
AnyCharNode() { }
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
void follow(NodeCases& cases)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
if (!cases.otherwise)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
cases.otherwise = new NodeSet;
|
2007-02-27 02:29:16 +00:00
|
|
|
cases.otherwise->insert(followpos.begin(), followpos.end());
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (NodeCases::iterator i = cases.begin(); i != cases.end(); i++)
|
2007-02-27 02:29:16 +00:00
|
|
|
i->second->insert(followpos.begin(), followpos.end());
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<AnyCharNode *>(other))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os) {
|
|
|
|
return os << ".";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Indicate that a regular expression matches. An AcceptNode itself
|
|
|
|
* doesn't match anything, so it will never generate any transitions.
|
|
|
|
*/
|
|
|
|
class AcceptNode : public ImportantNode {
|
|
|
|
public:
|
2007-03-30 20:38:51 +00:00
|
|
|
AcceptNode() {}
|
2010-11-09 11:28:22 -08:00
|
|
|
void release(void)
|
|
|
|
{
|
|
|
|
/* don't delete AcceptNode via release as they are shared,
|
|
|
|
* and will be deleted when the table the are stored in is deleted
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
void follow(NodeCases& cases)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
/* Nothing to follow. */
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
/* requires accept nodes to be common by pointer */
|
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<AcceptNode *>(other))
|
|
|
|
return (this == other);
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Match a pair of consecutive nodes. */
|
2010-11-09 11:34:59 -08:00
|
|
|
class CatNode : public InnerNode {
|
2007-02-27 02:29:16 +00:00
|
|
|
public:
|
|
|
|
CatNode(Node *left, Node *right) :
|
2010-11-09 11:34:59 -08:00
|
|
|
InnerNode(left, right) { }
|
2007-02-27 02:29:16 +00:00
|
|
|
void compute_nullable()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
nullable = child[0]->nullable && child[1]->nullable;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_firstpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
if (child[0]->nullable)
|
|
|
|
firstpos = child[0]->firstpos + child[1]->firstpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
else
|
2008-11-07 13:00:05 +00:00
|
|
|
firstpos = child[0]->firstpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_lastpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
if (child[1]->nullable)
|
|
|
|
lastpos = child[0]->lastpos + child[1]->lastpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
else
|
2008-11-07 13:00:05 +00:00
|
|
|
lastpos = child[1]->lastpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_followpos()
|
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet from = child[0]->lastpos, to = child[1]->firstpos;
|
|
|
|
for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
|
2007-02-27 02:29:16 +00:00
|
|
|
(*i)->followpos.insert(to.begin(), to.end());
|
|
|
|
}
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<CatNode *>(other)) {
|
|
|
|
if (!child[0]->eq(other->child[0]))
|
|
|
|
return 0;
|
|
|
|
return child[1]->eq(other->child[1]);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
child[0]->dump(os);
|
|
|
|
child[1]->dump(os);
|
2007-02-27 02:29:16 +00:00
|
|
|
return os;
|
|
|
|
//return os << ' ';
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Match a node zero or more times. (This is a unary operator.) */
|
2010-11-09 11:34:59 -08:00
|
|
|
class StarNode : public InnerNode {
|
2007-02-27 02:29:16 +00:00
|
|
|
public:
|
|
|
|
StarNode(Node *left) :
|
2010-11-09 11:34:59 -08:00
|
|
|
InnerNode(left)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
nullable = true;
|
|
|
|
}
|
|
|
|
void compute_firstpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
firstpos = child[0]->firstpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_lastpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
lastpos = child[0]->lastpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_followpos()
|
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
|
|
|
|
for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
|
2007-02-27 02:29:16 +00:00
|
|
|
(*i)->followpos.insert(to.begin(), to.end());
|
|
|
|
}
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<StarNode *>(other))
|
|
|
|
return child[0]->eq(other->child[0]);
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
os << '(';
|
|
|
|
child[0]->dump(os);
|
|
|
|
return os << ")*";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Match a node one or more times. (This is a unary operator.) */
|
2010-11-09 11:34:59 -08:00
|
|
|
class PlusNode : public InnerNode {
|
2007-02-27 02:29:16 +00:00
|
|
|
public:
|
|
|
|
PlusNode(Node *left) :
|
2010-11-09 11:34:59 -08:00
|
|
|
InnerNode(left) { }
|
2007-02-27 02:29:16 +00:00
|
|
|
void compute_nullable()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
nullable = child[0]->nullable;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_firstpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
firstpos = child[0]->firstpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_lastpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
lastpos = child[0]->lastpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_followpos()
|
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
|
|
|
|
for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
|
2007-02-27 02:29:16 +00:00
|
|
|
(*i)->followpos.insert(to.begin(), to.end());
|
|
|
|
}
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<PlusNode *>(other))
|
|
|
|
return child[0]->eq(other->child[0]);
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
os << '(';
|
|
|
|
child[0]->dump(os);
|
|
|
|
return os << ")+";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Match one of two alternative nodes. */
|
2010-11-09 11:34:59 -08:00
|
|
|
class AltNode : public InnerNode {
|
2007-02-27 02:29:16 +00:00
|
|
|
public:
|
|
|
|
AltNode(Node *left, Node *right) :
|
2010-11-09 11:34:59 -08:00
|
|
|
InnerNode(left, right) { }
|
2007-02-27 02:29:16 +00:00
|
|
|
void compute_nullable()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
nullable = child[0]->nullable || child[1]->nullable;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_lastpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
lastpos = child[0]->lastpos + child[1]->lastpos;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
void compute_firstpos()
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
firstpos = child[0]->firstpos + child[1]->firstpos;
|
|
|
|
}
|
|
|
|
int eq(Node *other) {
|
|
|
|
if (dynamic_cast<AltNode *>(other)) {
|
|
|
|
if (!child[0]->eq(other->child[0]))
|
|
|
|
return 0;
|
|
|
|
return child[1]->eq(other->child[1]);
|
|
|
|
}
|
|
|
|
return 0;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
os << '(';
|
|
|
|
child[0]->dump(os);
|
|
|
|
os << '|';
|
|
|
|
child[1]->dump(os);
|
|
|
|
os << ')';
|
|
|
|
return os;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
};
|
2008-11-07 13:00:05 +00:00
|
|
|
|
2010-08-04 09:53:46 -07:00
|
|
|
/* Use a single static EpsNode as it carries no node specific information */
|
|
|
|
static EpsNode epsnode;
|
|
|
|
|
2008-11-07 13:00:05 +00:00
|
|
|
/*
|
2010-08-04 10:23:22 -07:00
|
|
|
* Normalize the regex parse tree for factoring and cancelations. Normalization
|
|
|
|
* reorganizes internal (alt and cat) nodes into a fixed "normalized" form that
|
|
|
|
* simplifies factoring code, in that it produces a canonicalized form for
|
|
|
|
* the direction being normalized so that the factoring code does not have
|
|
|
|
* to consider as many cases.
|
|
|
|
*
|
2008-11-07 13:00:05 +00:00
|
|
|
* left normalization (dir == 0) uses these rules
|
|
|
|
* (E | a) -> (a | E)
|
|
|
|
* (a | b) | c -> a | (b | c)
|
|
|
|
* (ab)c -> a(bc)
|
|
|
|
*
|
|
|
|
* right normalization (dir == 1) uses the same rules but reversed
|
|
|
|
* (a | E) -> (E | a)
|
|
|
|
* a | (b | c) -> (a | b) | c
|
|
|
|
* a(bc) -> (ab)c
|
2010-08-04 10:23:22 -07:00
|
|
|
*
|
|
|
|
* Note: This is written iteratively for a given node (the top node stays
|
|
|
|
* fixed and the children are rotated) instead of recursively.
|
|
|
|
* For a given node under examination rotate over nodes from
|
|
|
|
* dir to !dir. Until no dir direction node meets the criterial.
|
|
|
|
* Then recurse to the children (which will have a different node type)
|
|
|
|
* to make sure they are normalized.
|
|
|
|
* Normalization of a child node is guarenteed to not affect the
|
|
|
|
* normalization of the parent.
|
|
|
|
*
|
|
|
|
* For cat nodes the depth first traverse order is guarenteed to be
|
|
|
|
* maintained. This is not necessary for altnodes.
|
|
|
|
*
|
|
|
|
* Eg. For left normalization
|
|
|
|
*
|
|
|
|
* |1 |1
|
|
|
|
* / \ / \
|
|
|
|
* |2 T -> a |2
|
|
|
|
* / \ / \
|
|
|
|
* |3 c b |3
|
|
|
|
* / \ / \
|
|
|
|
* a b c T
|
|
|
|
*
|
2008-11-07 13:00:05 +00:00
|
|
|
*/
|
2008-11-19 16:54:26 +00:00
|
|
|
void normalize_tree(Node *t, int dir)
|
2008-11-07 13:00:05 +00:00
|
|
|
{
|
|
|
|
if (dynamic_cast<ImportantNode *>(t))
|
2008-11-19 16:54:26 +00:00
|
|
|
return;
|
2008-11-07 13:00:05 +00:00
|
|
|
|
2008-11-19 16:54:26 +00:00
|
|
|
for (;;) {
|
2008-11-20 16:19:51 +00:00
|
|
|
if (!dynamic_cast<EpsNode *>(t->child[!dir]) &&
|
|
|
|
((dynamic_cast<AltNode *>(t) &&
|
|
|
|
dynamic_cast<EpsNode *>(t->child[dir])) ||
|
|
|
|
(dynamic_cast<CatNode *>(t) &&
|
|
|
|
dynamic_cast<EpsNode *>(t->child[dir])))) {
|
2008-11-07 13:00:05 +00:00
|
|
|
// (E | a) -> (a | E)
|
|
|
|
// Ea -> aE
|
|
|
|
Node *c = t->child[dir];
|
|
|
|
t->child[dir] = t->child[!dir];
|
|
|
|
t->child[!dir] = c;
|
2010-08-04 10:23:22 -07:00
|
|
|
// Don't break here as 'a' may be a tree that
|
|
|
|
// can be pulled up.
|
2008-11-19 16:54:26 +00:00
|
|
|
} else if ((dynamic_cast<AltNode *>(t) &&
|
2008-11-20 16:19:51 +00:00
|
|
|
dynamic_cast<AltNode *>(t->child[dir])) ||
|
|
|
|
(dynamic_cast<CatNode *>(t) &&
|
|
|
|
dynamic_cast<CatNode *>(t->child[dir]))) {
|
2008-11-07 13:00:05 +00:00
|
|
|
// (a | b) | c -> a | (b | c)
|
|
|
|
// (ab)c -> a(bc)
|
|
|
|
Node *c = t->child[dir];
|
|
|
|
t->child[dir] = c->child[dir];
|
|
|
|
c->child[dir] = c->child[!dir];
|
|
|
|
c->child[!dir] = t->child[!dir];
|
|
|
|
t->child[!dir] = c;
|
2008-11-20 13:23:13 +00:00
|
|
|
} else if (dynamic_cast<AltNode *>(t) &&
|
|
|
|
dynamic_cast<CharSetNode *>(t->child[dir]) &&
|
|
|
|
dynamic_cast<CharNode *>(t->child[!dir])) {
|
2010-08-04 10:23:22 -07:00
|
|
|
// [a] | b -> b | [a]
|
2008-11-20 13:23:13 +00:00
|
|
|
Node *c = t->child[dir];
|
|
|
|
t->child[dir] = t->child[!dir];
|
|
|
|
t->child[!dir] = c;
|
2008-11-07 13:00:05 +00:00
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-11-19 16:54:26 +00:00
|
|
|
if (t->child[dir])
|
|
|
|
normalize_tree(t->child[dir], dir);
|
|
|
|
if (t->child[!dir])
|
|
|
|
normalize_tree(t->child[!dir], dir);
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
//charset conversion is disabled for now,
|
|
|
|
//it hinders tree optimization in some cases, so it need to be either
|
|
|
|
//done post optimization, or have extra factoring rules added
|
|
|
|
#if 0
|
|
|
|
static Node *merge_charset(Node *a, Node *b)
|
|
|
|
{
|
|
|
|
if (dynamic_cast<CharNode *>(a) &&
|
|
|
|
dynamic_cast<CharNode *>(b)) {
|
|
|
|
Chars chars;
|
|
|
|
chars.insert(dynamic_cast<CharNode *>(a)->c);
|
|
|
|
chars.insert(dynamic_cast<CharNode *>(b)->c);
|
|
|
|
CharSetNode *n = new CharSetNode(chars);
|
|
|
|
return n;
|
|
|
|
} else if (dynamic_cast<CharNode *>(a) &&
|
|
|
|
dynamic_cast<CharSetNode *>(b)) {
|
|
|
|
Chars *chars = &dynamic_cast<CharSetNode *>(b)->chars;
|
|
|
|
chars->insert(dynamic_cast<CharNode *>(a)->c);
|
2010-11-09 11:28:22 -08:00
|
|
|
return b;
|
2008-12-03 03:47:31 +00:00
|
|
|
} else if (dynamic_cast<CharSetNode *>(a) &&
|
|
|
|
dynamic_cast<CharSetNode *>(b)) {
|
|
|
|
Chars *from = &dynamic_cast<CharSetNode *>(a)->chars;
|
|
|
|
Chars *to = &dynamic_cast<CharSetNode *>(b)->chars;
|
|
|
|
for (Chars::iterator i = from->begin(); i != from->end(); i++)
|
|
|
|
to->insert(*i);
|
2010-11-09 11:28:22 -08:00
|
|
|
return b;
|
2008-12-03 03:47:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//return ???;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Node *alt_to_charsets(Node *t, int dir)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Node *first = NULL;
|
|
|
|
Node *p = t;
|
|
|
|
Node *i = t;
|
|
|
|
for (;dynamic_cast<AltNode *>(i);) {
|
|
|
|
if (dynamic_cast<CharNode *>(i->child[dir]) ||
|
|
|
|
dynamic_cast<CharNodeSet *>(i->child[dir])) {
|
|
|
|
if (!first) {
|
|
|
|
first = i;
|
|
|
|
p = i;
|
|
|
|
i = i->child[!dir];
|
|
|
|
} else {
|
|
|
|
first->child[dir] = merge_charset(first->child[dir],
|
|
|
|
i->child[dir]);
|
2010-11-09 11:28:22 -08:00
|
|
|
p->child[!dir] = i->child[!dir];
|
2008-12-03 03:47:31 +00:00
|
|
|
Node *tmp = i;
|
2010-11-09 11:28:22 -08:00
|
|
|
i = tmp->child[!dir];
|
|
|
|
tmp->child[!dir] = NULL;
|
2008-12-03 03:47:31 +00:00
|
|
|
tmp->release();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
p = i;
|
|
|
|
i = i->child[!dir];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// last altnode of chain check other dir as well
|
|
|
|
if (first && (dynamic_cast<charNode *>(i) ||
|
|
|
|
dynamic_cast<charNodeSet *>(i))) {
|
|
|
|
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
if (dynamic_cast<CharNode *>(t->child[dir]) ||
|
|
|
|
dynamic_cast<CharSetNode *>(t->child[dir]))
|
|
|
|
char_test = true;
|
|
|
|
(char_test &&
|
|
|
|
(dynamic_cast<CharNode *>(i->child[dir]) ||
|
|
|
|
dynamic_cast<CharSetNode *>(i->child[dir])))) {
|
|
|
|
*/
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static Node *basic_alt_factor(Node *t, int dir)
|
|
|
|
{
|
|
|
|
if (!dynamic_cast<AltNode *>(t))
|
|
|
|
return t;
|
|
|
|
|
|
|
|
if (t->child[dir]->eq(t->child[!dir])) {
|
|
|
|
// (a | a) -> a
|
2010-11-09 11:28:22 -08:00
|
|
|
Node *tmp = t->child[dir];
|
|
|
|
t->child[dir] = NULL;
|
2008-12-03 03:47:31 +00:00
|
|
|
t->release();
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
// (ab) | (ac) -> a(b|c)
|
|
|
|
if (dynamic_cast<CatNode *>(t->child[dir]) &&
|
|
|
|
dynamic_cast<CatNode *>(t->child[!dir]) &&
|
|
|
|
t->child[dir]->child[dir]->eq(t->child[!dir]->child[dir])) {
|
|
|
|
// (ab) | (ac) -> a(b|c)
|
|
|
|
Node *left = t->child[dir];
|
|
|
|
Node *right = t->child[!dir];
|
|
|
|
t->child[dir] = left->child[!dir];
|
2010-11-09 11:28:22 -08:00
|
|
|
t->child[!dir] = right->child[!dir];
|
|
|
|
right->child[!dir] = NULL;
|
2008-12-03 03:47:31 +00:00
|
|
|
right->release();
|
2010-11-09 11:28:22 -08:00
|
|
|
left->child[!dir] = t;
|
2008-12-03 03:47:31 +00:00
|
|
|
return left;
|
|
|
|
}
|
|
|
|
|
|
|
|
// a | (ab) -> a (E | b) -> a (b | E)
|
|
|
|
if (dynamic_cast<CatNode *>(t->child[!dir]) &&
|
|
|
|
t->child[dir]->eq(t->child[!dir]->child[dir])) {
|
|
|
|
Node *c = t->child[!dir];
|
|
|
|
t->child[dir]->release();
|
|
|
|
t->child[dir] = c->child[!dir];
|
2010-11-09 11:28:22 -08:00
|
|
|
t->child[!dir] = &epsnode;
|
2008-12-03 03:47:31 +00:00
|
|
|
c->child[!dir] = t;
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ab | (a) -> a (b | E)
|
|
|
|
if (dynamic_cast<CatNode *>(t->child[dir]) &&
|
|
|
|
t->child[dir]->child[dir]->eq(t->child[!dir])) {
|
|
|
|
Node *c = t->child[dir];
|
|
|
|
t->child[!dir]->release();
|
|
|
|
t->child[dir] = c->child[!dir];
|
2010-11-09 11:28:22 -08:00
|
|
|
t->child[!dir] = &epsnode;
|
2008-12-03 03:47:31 +00:00
|
|
|
c->child[!dir] = t;
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Node *basic_simplify(Node *t, int dir)
|
|
|
|
{
|
|
|
|
if (dynamic_cast<CatNode *>(t) &&
|
|
|
|
dynamic_cast<EpsNode *>(t->child[!dir])) {
|
|
|
|
// aE -> a
|
2010-11-09 11:28:22 -08:00
|
|
|
Node *tmp = t->child[dir];
|
|
|
|
t->child[dir] = NULL;
|
2008-12-03 03:47:31 +00:00
|
|
|
t->release();
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
return basic_alt_factor(t, dir);
|
|
|
|
}
|
|
|
|
|
2008-11-07 13:00:05 +00:00
|
|
|
/*
|
|
|
|
* assumes a normalized tree. reductions shown for left normalization
|
|
|
|
* aE -> a
|
|
|
|
* (a | a) -> a
|
|
|
|
** factoring patterns
|
|
|
|
* a | (a | b) -> (a | b)
|
|
|
|
* a | (ab) -> a (E | b) -> a (b | E)
|
|
|
|
* (ab) | (ac) -> a(b|c)
|
|
|
|
*
|
|
|
|
* returns t - if no simplifications were made
|
|
|
|
* a new root node - if simplifications were made
|
|
|
|
*/
|
2008-11-20 13:21:23 +00:00
|
|
|
Node *simplify_tree_base(Node *t, int dir, bool &mod)
|
2008-11-07 13:00:05 +00:00
|
|
|
{
|
|
|
|
if (dynamic_cast<ImportantNode *>(t))
|
|
|
|
return t;
|
|
|
|
|
|
|
|
for (int i=0; i < 2; i++) {
|
|
|
|
if (t->child[i]) {
|
2008-11-20 13:21:23 +00:00
|
|
|
Node *c = simplify_tree_base(t->child[i], dir, mod);
|
|
|
|
if (c != t->child[i]) {
|
2008-11-07 13:00:05 +00:00
|
|
|
t->child[i] = c;
|
2008-11-20 13:21:23 +00:00
|
|
|
mod = true;
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
// only iterate on loop if modification made
|
|
|
|
for (;; mod = true) {
|
2008-11-20 13:18:30 +00:00
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
Node *tmp = basic_simplify(t, dir);
|
|
|
|
if (tmp != t) {
|
|
|
|
t = tmp;
|
2008-11-20 13:23:13 +00:00
|
|
|
continue;
|
2008-12-03 03:47:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* all tests after this must meet 2 alt node condition */
|
|
|
|
if (!dynamic_cast<AltNode *>(t) ||
|
|
|
|
!dynamic_cast<AltNode *>(t->child[!dir]))
|
|
|
|
break;
|
|
|
|
|
2008-11-07 13:00:05 +00:00
|
|
|
// a | (a | b) -> (a | b)
|
|
|
|
// a | (b | (c | a)) -> (b | (c | a))
|
2008-12-03 03:47:31 +00:00
|
|
|
Node *p = t;
|
2008-11-07 13:00:05 +00:00
|
|
|
Node *i = t->child[!dir];
|
2008-12-03 03:47:31 +00:00
|
|
|
for (;dynamic_cast<AltNode *>(i); p = i, i = i->child[!dir]) {
|
2008-11-07 13:00:05 +00:00
|
|
|
if (t->child[dir]->eq(i->child[dir])) {
|
2010-11-09 11:28:22 -08:00
|
|
|
Node *tmp = t->child[!dir];
|
|
|
|
t->child[!dir] = NULL;
|
2008-12-03 03:47:31 +00:00
|
|
|
t->release();
|
2010-11-09 11:28:22 -08:00
|
|
|
t = tmp;
|
2008-11-20 13:18:30 +00:00
|
|
|
continue;
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// last altnode of chain check other dir as well
|
2008-12-03 03:47:31 +00:00
|
|
|
if (t->child[dir]->eq(p->child[!dir])) {
|
2010-11-09 11:28:22 -08:00
|
|
|
Node *tmp = t->child[!dir];
|
|
|
|
t->child[!dir] = NULL;
|
2008-12-03 03:47:31 +00:00
|
|
|
t->release();
|
2010-11-09 11:28:22 -08:00
|
|
|
t = tmp;
|
2008-11-20 13:18:30 +00:00
|
|
|
continue;
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//exact match didn't work, try factoring front
|
|
|
|
//a | (ac | (ad | () -> (a (E | c)) | (...)
|
|
|
|
//ab | (ac | (...)) -> (a (b | c)) | (...)
|
|
|
|
//ab | (a | (...)) -> (a (b | E)) | (...)
|
2008-12-03 03:47:31 +00:00
|
|
|
Node *pp;
|
|
|
|
int count = 0;
|
|
|
|
Node *subject = t->child[dir];
|
|
|
|
Node *a = subject;
|
2010-11-09 11:28:22 -08:00
|
|
|
if (dynamic_cast<CatNode *>(subject))
|
|
|
|
a = subject->child[dir];
|
2008-11-07 13:00:05 +00:00
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
for (pp = p = t, i = t->child[!dir];
|
|
|
|
dynamic_cast<AltNode *>(i); ) {
|
2008-11-07 13:00:05 +00:00
|
|
|
if ((dynamic_cast<CatNode *>(i->child[dir]) &&
|
|
|
|
a->eq(i->child[dir]->child[dir])) ||
|
|
|
|
(a->eq(i->child[dir]))) {
|
2008-12-03 03:47:31 +00:00
|
|
|
// extract matching alt node
|
|
|
|
p->child[!dir] = i->child[!dir];
|
|
|
|
i->child[!dir] = subject;
|
|
|
|
subject = basic_simplify(i, dir);
|
2010-11-09 11:28:22 -08:00
|
|
|
if (dynamic_cast<CatNode *>(subject))
|
|
|
|
a = subject->child[dir];
|
|
|
|
else
|
|
|
|
a = subject;
|
2008-12-03 03:47:31 +00:00
|
|
|
|
|
|
|
i = p->child[!dir];
|
|
|
|
count++;
|
|
|
|
} else {
|
|
|
|
pp = p; p = i; i = i->child[!dir];
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
// last altnode in chain check other dir as well
|
|
|
|
if ((dynamic_cast<CatNode *>(i) &&
|
|
|
|
a->eq(i->child[dir])) ||
|
|
|
|
(a->eq(i))) {
|
|
|
|
count++;
|
|
|
|
if (t == p) {
|
|
|
|
t->child[dir] = subject;
|
|
|
|
t = basic_simplify(t, dir);
|
|
|
|
} else {
|
|
|
|
t->child[dir] = p->child[dir];
|
|
|
|
p->child[dir] = subject;
|
|
|
|
pp->child[!dir] = basic_simplify(p, dir);
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
} else {
|
2008-12-03 03:47:31 +00:00
|
|
|
t->child[dir] = i;
|
|
|
|
p->child[!dir] = subject;
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
if (count == 0)
|
|
|
|
break;
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
int debug_tree(Node *t)
|
|
|
|
{
|
|
|
|
int nodes = 1;
|
|
|
|
|
|
|
|
if (!dynamic_cast<ImportantNode *>(t)) {
|
|
|
|
if (t->child[0])
|
|
|
|
nodes += debug_tree(t->child[0]);
|
|
|
|
if (t->child[1])
|
|
|
|
nodes += debug_tree(t->child[1]);
|
|
|
|
}
|
|
|
|
return nodes;
|
|
|
|
}
|
|
|
|
|
2010-01-08 02:17:45 -08:00
|
|
|
struct node_counts {
|
|
|
|
int charnode;
|
|
|
|
int charset;
|
|
|
|
int notcharset;
|
|
|
|
int alt;
|
|
|
|
int plus;
|
|
|
|
int star;
|
|
|
|
int any;
|
|
|
|
int cat;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static void count_tree_nodes(Node *t, struct node_counts *counts)
|
|
|
|
{
|
|
|
|
if (dynamic_cast<AltNode *>(t)) {
|
|
|
|
counts->alt++;
|
|
|
|
count_tree_nodes(t->child[0], counts);
|
|
|
|
count_tree_nodes(t->child[1], counts);
|
|
|
|
} else if (dynamic_cast<CatNode *>(t)) {
|
|
|
|
counts->cat++;
|
|
|
|
count_tree_nodes(t->child[0], counts);
|
|
|
|
count_tree_nodes(t->child[1], counts);
|
|
|
|
} else if (dynamic_cast<PlusNode *>(t)) {
|
|
|
|
counts->plus++;
|
|
|
|
count_tree_nodes(t->child[0], counts);
|
|
|
|
} else if (dynamic_cast<StarNode *>(t)) {
|
|
|
|
counts->star++;
|
|
|
|
count_tree_nodes(t->child[0], counts);
|
|
|
|
} else if (dynamic_cast<CharNode *>(t)) {
|
|
|
|
counts->charnode++;
|
|
|
|
} else if (dynamic_cast<AnyCharNode *>(t)) {
|
|
|
|
counts->any++;
|
|
|
|
} else if (dynamic_cast<CharSetNode *>(t)) {
|
|
|
|
counts->charset++;
|
|
|
|
} else if (dynamic_cast<NotCharSetNode *>(t)) {
|
|
|
|
counts->notcharset++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "stdio.h"
|
|
|
|
#include "stdint.h"
|
|
|
|
#include "apparmor_re.h"
|
|
|
|
|
|
|
|
Node *simplify_tree(Node *t, dfaflags_t flags)
|
2008-11-07 13:00:05 +00:00
|
|
|
{
|
2008-11-20 13:21:23 +00:00
|
|
|
bool update;
|
2010-01-08 02:17:45 -08:00
|
|
|
|
|
|
|
if (flags & DFA_DUMP_TREE_STATS) {
|
|
|
|
struct node_counts counts = { };
|
|
|
|
count_tree_nodes(t, &counts);
|
|
|
|
fprintf(stderr, "expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n", counts.charnode, counts.charset, counts.notcharset, counts.alt, counts.plus, counts.star, counts.any, counts.cat);
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
do {
|
2008-12-03 03:47:31 +00:00
|
|
|
update = false;
|
2010-01-08 04:30:56 -08:00
|
|
|
//default to right normalize first as this reduces the number
|
2008-11-07 13:00:05 +00:00
|
|
|
//of trailing nodes which might follow an internal *
|
|
|
|
//or **, which is where state explosion can happen
|
|
|
|
//eg. in one test this makes the difference between
|
|
|
|
// the dfa having about 7 thousands states,
|
|
|
|
// and it having about 1.25 million states
|
2010-01-08 04:30:56 -08:00
|
|
|
int dir = 1;
|
|
|
|
if (flags & DFA_CONTROL_TREE_LEFT)
|
|
|
|
dir = 0;
|
|
|
|
for (int count = 0; count < 2; count++) {
|
2008-12-03 03:47:31 +00:00
|
|
|
bool modified;
|
|
|
|
do {
|
|
|
|
modified = false;
|
2010-11-09 11:23:45 -08:00
|
|
|
if (flags & DFA_CONTROL_TREE_NORMAL)
|
2010-01-08 04:30:56 -08:00
|
|
|
normalize_tree(t, dir);
|
2008-12-03 03:47:31 +00:00
|
|
|
t = simplify_tree_base(t, dir, modified);
|
|
|
|
if (modified)
|
|
|
|
update = true;
|
|
|
|
} while (modified);
|
2010-01-08 04:30:56 -08:00
|
|
|
if (flags & DFA_CONTROL_TREE_LEFT)
|
|
|
|
dir++;
|
|
|
|
else
|
|
|
|
dir--;
|
2008-11-07 13:00:05 +00:00
|
|
|
}
|
|
|
|
} while(update);
|
2010-01-08 02:17:45 -08:00
|
|
|
if (flags & DFA_DUMP_TREE_STATS) {
|
|
|
|
struct node_counts counts = { };
|
|
|
|
count_tree_nodes(t, &counts);
|
|
|
|
fprintf(stderr, "simplified expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n", counts.charnode, counts.charset, counts.notcharset, counts.alt, counts.plus, counts.star, counts.any, counts.cat);
|
|
|
|
}
|
2008-11-07 13:00:05 +00:00
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
%}
|
|
|
|
|
|
|
|
%union {
|
|
|
|
char c;
|
|
|
|
Node *node;
|
|
|
|
Chars *cset;
|
|
|
|
}
|
|
|
|
|
|
|
|
%{
|
2007-03-30 20:38:51 +00:00
|
|
|
void regexp_error(Node **, const char *, const char *);
|
2007-02-27 02:29:16 +00:00
|
|
|
# define YYLEX_PARAM &text
|
|
|
|
int regexp_lex(YYSTYPE *, const char **);
|
|
|
|
|
|
|
|
static inline Chars*
|
|
|
|
insert_char(Chars* cset, uchar a)
|
|
|
|
{
|
|
|
|
cset->insert(a);
|
|
|
|
return cset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline Chars*
|
|
|
|
insert_char_range(Chars* cset, uchar a, uchar b)
|
|
|
|
{
|
|
|
|
if (a > b)
|
|
|
|
swap(a, b);
|
|
|
|
for (uchar i = a; i <= b; i++)
|
|
|
|
cset->insert(i);
|
|
|
|
return cset;
|
|
|
|
}
|
|
|
|
%}
|
|
|
|
|
|
|
|
%pure-parser
|
|
|
|
/* %error-verbose */
|
|
|
|
%parse-param {Node **root}
|
|
|
|
%parse-param {const char *text}
|
|
|
|
%name-prefix = "regexp_"
|
|
|
|
|
|
|
|
%token <c> CHAR
|
|
|
|
%type <c> regex_char cset_char1 cset_char cset_charN
|
|
|
|
%type <cset> charset cset_chars
|
|
|
|
%type <node> regexp expr terms0 terms qterm term
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Note: destroy all nodes upon failure, but *not* the start symbol once
|
|
|
|
* parsing succeeds!
|
|
|
|
*/
|
2007-03-30 14:13:56 +00:00
|
|
|
%destructor { $$->release(); } expr terms0 terms qterm term
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
/* FIXME: Does not parse "[--]", "[---]", "[^^-x]". I don't actually know
|
|
|
|
which precise grammer Perl regexps use, and rediscovering that
|
|
|
|
is proving to be painful. */
|
|
|
|
|
2010-11-09 11:28:22 -08:00
|
|
|
regexp : /* empty */ { *root = $$ = &epsnode; }
|
2007-02-27 02:29:16 +00:00
|
|
|
| expr { *root = $$ = $1; }
|
|
|
|
;
|
|
|
|
|
|
|
|
expr : terms
|
|
|
|
| expr '|' terms0 { $$ = new AltNode($1, $3); }
|
2010-11-09 11:28:22 -08:00
|
|
|
| '|' terms0 { $$ = new AltNode(&epsnode, $2); }
|
2007-02-27 02:29:16 +00:00
|
|
|
;
|
|
|
|
|
2010-11-09 11:28:22 -08:00
|
|
|
terms0 : /* empty */ { $$ = &epsnode; }
|
2007-02-27 02:29:16 +00:00
|
|
|
| terms
|
|
|
|
;
|
|
|
|
|
|
|
|
terms : qterm
|
|
|
|
| terms qterm { $$ = new CatNode($1, $2); }
|
|
|
|
;
|
|
|
|
|
|
|
|
qterm : term
|
2007-03-30 20:38:51 +00:00
|
|
|
| term '*' { $$ = new StarNode($1); }
|
|
|
|
| term '+' { $$ = new PlusNode($1); }
|
2007-02-27 02:29:16 +00:00
|
|
|
;
|
|
|
|
|
2007-03-30 20:38:51 +00:00
|
|
|
term : '.' { $$ = new AnyCharNode; }
|
2007-02-27 02:29:16 +00:00
|
|
|
| regex_char { $$ = new CharNode($1); }
|
|
|
|
| '[' charset ']' { $$ = new CharSetNode(*$2);
|
2007-03-30 20:38:51 +00:00
|
|
|
delete $2; }
|
2007-02-27 02:29:16 +00:00
|
|
|
| '[' '^' charset ']'
|
|
|
|
{ $$ = new NotCharSetNode(*$3);
|
2007-03-30 20:38:51 +00:00
|
|
|
delete $3; }
|
2007-02-27 02:29:16 +00:00
|
|
|
| '[' '^' '^' cset_chars ']'
|
|
|
|
{ $4->insert('^');
|
|
|
|
$$ = new NotCharSetNode(*$4);
|
2007-03-30 20:38:51 +00:00
|
|
|
delete $4; }
|
2007-02-27 02:29:16 +00:00
|
|
|
| '(' regexp ')' { $$ = $2; }
|
|
|
|
;
|
|
|
|
|
|
|
|
regex_char : CHAR
|
|
|
|
| '^' { $$ = '^'; }
|
|
|
|
| '-' { $$ = '-'; }
|
|
|
|
| ']' { $$ = ']'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
charset : cset_char1 cset_chars
|
|
|
|
{ $$ = insert_char($2, $1); }
|
|
|
|
| cset_char1 '-' cset_charN cset_chars
|
|
|
|
{ $$ = insert_char_range($4, $1, $3); }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_chars : /* nothing */ { $$ = new Chars; }
|
|
|
|
| cset_chars cset_charN
|
|
|
|
{ $$ = insert_char($1, $2); }
|
|
|
|
| cset_chars cset_charN '-' cset_charN
|
|
|
|
{ $$ = insert_char_range($1, $2, $4); }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_char1 : cset_char
|
|
|
|
| ']' { $$ = ']'; }
|
|
|
|
| '-' { $$ = '-'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_charN : cset_char
|
|
|
|
| '^' { $$ = '^'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
cset_char : CHAR
|
|
|
|
| '[' { $$ = '['; }
|
|
|
|
| '*' { $$ = '*'; }
|
|
|
|
| '+' { $$ = '+'; }
|
|
|
|
| '.' { $$ = '.'; }
|
|
|
|
| '|' { $$ = '|'; }
|
|
|
|
| '(' { $$ = '('; }
|
|
|
|
| ')' { $$ = ')'; }
|
|
|
|
;
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
|
|
|
|
|
|
|
#include "../immunix.h"
|
|
|
|
|
|
|
|
/* Traverse the syntax tree depth-first in an iterator-like manner. */
|
|
|
|
class depth_first_traversal {
|
|
|
|
vector<Node *> stack;
|
|
|
|
vector<bool> visited;
|
|
|
|
public:
|
|
|
|
depth_first_traversal(Node *node) {
|
|
|
|
stack.push_back(node);
|
2008-11-07 13:00:05 +00:00
|
|
|
while (node->child[0]) {
|
2007-02-27 02:29:16 +00:00
|
|
|
visited.push_back(false);
|
2008-11-07 13:00:05 +00:00
|
|
|
stack.push_back(node->child[0]);
|
|
|
|
node = node->child[0];
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Node *operator*()
|
|
|
|
{
|
|
|
|
return stack.back();
|
|
|
|
}
|
|
|
|
Node* operator->()
|
|
|
|
{
|
|
|
|
return stack.back();
|
|
|
|
}
|
|
|
|
operator bool()
|
|
|
|
{
|
|
|
|
return !stack.empty();
|
|
|
|
}
|
|
|
|
void operator++(int)
|
|
|
|
{
|
|
|
|
stack.pop_back();
|
|
|
|
if (!stack.empty()) {
|
2008-11-07 13:00:05 +00:00
|
|
|
if (!visited.back() && stack.back()->child[1]) {
|
2007-02-27 02:29:16 +00:00
|
|
|
visited.pop_back();
|
|
|
|
visited.push_back(true);
|
2008-11-07 13:00:05 +00:00
|
|
|
stack.push_back(stack.back()->child[1]);
|
|
|
|
while (stack.back()->child[0]) {
|
2007-02-27 02:29:16 +00:00
|
|
|
visited.push_back(false);
|
2008-11-07 13:00:05 +00:00
|
|
|
stack.push_back(stack.back()->child[0]);
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
} else
|
|
|
|
visited.pop_back();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
ostream& operator<<(ostream& os, Node& node)
|
|
|
|
{
|
|
|
|
node.dump(os);
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
|
|
|
ostream& operator<<(ostream& os, uchar c)
|
|
|
|
{
|
|
|
|
const char *search = "\a\033\f\n\r\t|*+[](). ",
|
|
|
|
*replace = "aefnrt|*+[](). ", *s;
|
|
|
|
|
|
|
|
if ((s = strchr(search, c)) && *s != '\0')
|
|
|
|
os << '\\' << replace[s - search];
|
|
|
|
else if (c < 32 || c >= 127)
|
|
|
|
os << '\\' << '0' << char('0' + (c >> 6))
|
|
|
|
<< char('0' + ((c >> 3) & 7)) << char('0' + (c & 7));
|
|
|
|
else
|
|
|
|
os << (char)c;
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
octdigit(char c)
|
|
|
|
{
|
|
|
|
if (c >= '0' && c <= '7')
|
|
|
|
return c - '0';
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
hexdigit(char c)
|
|
|
|
{
|
|
|
|
if (c >= '0' && c <= '9')
|
|
|
|
return c - '0';
|
|
|
|
else if (c >= 'A' && c <= 'F')
|
|
|
|
return 10 + c - 'A';
|
|
|
|
else if (c >= 'a' && c <= 'f')
|
|
|
|
return 10 + c - 'A';
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
regexp_lex(YYSTYPE *val, const char **pos)
|
|
|
|
{
|
|
|
|
int c;
|
|
|
|
|
|
|
|
val->c = **pos;
|
|
|
|
switch(*(*pos)++) {
|
|
|
|
case '\0':
|
|
|
|
(*pos)--;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
case '*': case '+': case '.': case '|': case '^': case '-':
|
|
|
|
case '[': case ']': case '(' : case ')':
|
|
|
|
return *(*pos - 1);
|
|
|
|
|
|
|
|
case '\\':
|
|
|
|
val->c = **pos;
|
|
|
|
switch(*(*pos)++) {
|
|
|
|
case '\0':
|
|
|
|
(*pos)--;
|
2008-12-04 10:44:02 +00:00
|
|
|
/* fall through */
|
|
|
|
case '\\':
|
2007-02-27 02:29:16 +00:00
|
|
|
val->c = '\\';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '0':
|
|
|
|
val->c = 0;
|
|
|
|
if ((c = octdigit(**pos)) >= 0) {
|
|
|
|
val->c = c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
if ((c = octdigit(**pos)) >= 0) {
|
|
|
|
val->c = (val->c << 3) + c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
if ((c = octdigit(**pos)) >= 0) {
|
|
|
|
val->c = (val->c << 3) + c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
val->c = 0;
|
|
|
|
if ((c = hexdigit(**pos)) >= 0) {
|
|
|
|
val->c = c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
if ((c = hexdigit(**pos)) >= 0) {
|
|
|
|
val->c = (val->c << 4) + c;
|
|
|
|
(*pos)++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'a':
|
|
|
|
val->c = '\a';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'e':
|
|
|
|
val->c = 033 /* ESC */;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'f':
|
|
|
|
val->c = '\f';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'n':
|
|
|
|
val->c = '\n';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'r':
|
|
|
|
val->c = '\r';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 't':
|
|
|
|
val->c = '\t';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return CHAR;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2007-03-30 20:38:51 +00:00
|
|
|
regexp_error(Node **, const char *text, const char *error)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
/* We don't want the library to print error messages. */
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assign a consecutive number to each node. This is only needed for
|
|
|
|
* pretty-printing the debug output.
|
2010-08-04 09:53:46 -07:00
|
|
|
*
|
|
|
|
* The epsnode is labeled 0. Start labeling at 1
|
2007-02-27 02:29:16 +00:00
|
|
|
*/
|
|
|
|
void label_nodes(Node *root)
|
|
|
|
{
|
2010-08-04 09:53:46 -07:00
|
|
|
int nodes = 1;
|
2007-02-27 02:29:16 +00:00
|
|
|
for (depth_first_traversal i(root); i; i++)
|
2010-07-10 17:52:13 -07:00
|
|
|
i->label = nodes++;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Text-dump a state (for debugging).
|
|
|
|
*/
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
ostream& operator<<(ostream& os, const NodeSet& state)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
os << '{';
|
|
|
|
if (!state.empty()) {
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
NodeSet::iterator i = state.begin();
|
2007-02-27 02:29:16 +00:00
|
|
|
for(;;) {
|
2010-07-10 17:52:13 -07:00
|
|
|
os << (*i)->label;
|
2007-02-27 02:29:16 +00:00
|
|
|
if (++i == state.end())
|
|
|
|
break;
|
|
|
|
os << ',';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
os << '}';
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Text-dump the syntax tree (for debugging).
|
|
|
|
*/
|
|
|
|
void dump_syntax_tree(ostream& os, Node *node) {
|
|
|
|
for (depth_first_traversal i(node); i; i++) {
|
2010-07-10 17:52:13 -07:00
|
|
|
os << i->label << '\t';
|
2008-11-07 13:00:05 +00:00
|
|
|
if ((*i)->child[0] == 0)
|
2007-02-27 02:29:16 +00:00
|
|
|
os << **i << '\t' << (*i)->followpos << endl;
|
|
|
|
else {
|
2008-11-07 13:00:05 +00:00
|
|
|
if ((*i)->child[1] == 0)
|
2010-07-10 17:52:13 -07:00
|
|
|
os << (*i)->child[0]->label << **i;
|
2007-02-27 02:29:16 +00:00
|
|
|
else
|
2010-07-10 17:52:13 -07:00
|
|
|
os << (*i)->child[0]->label << **i
|
|
|
|
<< (*i)->child[1]->label;
|
2007-02-27 02:29:16 +00:00
|
|
|
os << '\t' << (*i)->firstpos
|
|
|
|
<< (*i)->lastpos << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
/* Comparison operator for sets of <NodeSet *>.
|
2010-11-09 11:20:08 -08:00
|
|
|
* Compare set hashes, and if the sets have the same hash
|
2010-11-09 11:18:46 -08:00
|
|
|
* do compare pointer comparison on set of <Node *>, the pointer comparison
|
2010-07-10 17:53:04 -07:00
|
|
|
* allows us to determine which Sets of <Node *> we have seen already from
|
|
|
|
* new ones when constructing the DFA.
|
|
|
|
*/
|
|
|
|
struct deref_less_than {
|
2010-11-09 11:20:08 -08:00
|
|
|
bool operator()(pair <unsigned long, NodeSet *> const & lhs, pair <unsigned long, NodeSet *> const & rhs) const
|
|
|
|
{
|
|
|
|
if (lhs.first == rhs.first)
|
|
|
|
return *(lhs.second) < *(rhs.second);
|
|
|
|
else
|
|
|
|
return lhs.first < rhs.first;
|
2010-11-09 11:18:46 -08:00
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
};
|
|
|
|
|
2010-11-09 11:20:08 -08:00
|
|
|
unsigned long hash_NodeSet(const NodeSet *ns)
|
|
|
|
{
|
|
|
|
unsigned long hash = 5381;
|
|
|
|
|
|
|
|
for (NodeSet::iterator i = ns->begin(); i != ns->end(); i++) {
|
|
|
|
hash = ((hash << 5) + hash) + (unsigned long) *i;
|
|
|
|
}
|
|
|
|
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
class State;
|
|
|
|
/**
|
|
|
|
* State cases are identical to NodesCases except they map to State *
|
|
|
|
* instead of NodeSet.
|
|
|
|
* Out-edges from a state to another: we store the follow State
|
|
|
|
* for each input character that is not a default match in cases and
|
|
|
|
* default matches in otherwise as well as in all matching explicit cases
|
|
|
|
* This avoids enumerating all the explicit tranitions for default matches.
|
|
|
|
*/
|
|
|
|
typedef struct Cases {
|
|
|
|
typedef map<uchar, State *>::iterator iterator;
|
|
|
|
iterator begin() { return cases.begin(); }
|
|
|
|
iterator end() { return cases.end(); }
|
|
|
|
|
|
|
|
Cases() : otherwise(0) { }
|
|
|
|
map<uchar, State *> cases;
|
|
|
|
State *otherwise;
|
|
|
|
} Cases;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* State - DFA individual state information
|
|
|
|
* audit: the audit permission mask for the state
|
|
|
|
* accept: the accept permissions for the state
|
|
|
|
* cases: set of transitions from this state
|
|
|
|
*/
|
|
|
|
class State {
|
|
|
|
public:
|
|
|
|
State() : label (0), audit(0), accept(0), cases() { }
|
|
|
|
int label;
|
|
|
|
uint32_t audit, accept;
|
|
|
|
Cases cases;
|
|
|
|
};
|
|
|
|
|
|
|
|
ostream& operator<<(ostream& os, const State& state)
|
|
|
|
{
|
|
|
|
/* currently just dump the state ptr */
|
|
|
|
os << '{';
|
|
|
|
os << state.label;
|
|
|
|
os << '}';
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
2010-01-31 23:19:54 -08:00
|
|
|
typedef list<State *> Partition;
|
2010-11-09 11:20:08 -08:00
|
|
|
typedef map<pair<unsigned long, NodeSet *>, State *, deref_less_than > NodeMap;
|
2007-02-27 02:29:16 +00:00
|
|
|
/* Transitions in the DFA. */
|
|
|
|
|
|
|
|
class DFA {
|
|
|
|
public:
|
2010-01-08 02:17:45 -08:00
|
|
|
DFA(Node *root, dfaflags_t flags);
|
2007-02-27 02:29:16 +00:00
|
|
|
virtual ~DFA();
|
2010-01-20 03:32:34 -08:00
|
|
|
void remove_unreachable(dfaflags_t flags);
|
2010-01-31 23:18:14 -08:00
|
|
|
bool same_mappings(map <State *, Partition *> &partition_map, State *s1,
|
2010-01-20 03:32:34 -08:00
|
|
|
State *s2);
|
|
|
|
size_t hash_trans(State *s);
|
|
|
|
void minimize(dfaflags_t flags);
|
2007-02-27 02:29:16 +00:00
|
|
|
void dump(ostream& os);
|
|
|
|
void dump_dot_graph(ostream& os);
|
2010-01-08 02:17:45 -08:00
|
|
|
map<uchar, uchar> equivalence_classes(dfaflags_t flags);
|
2007-02-27 02:29:16 +00:00
|
|
|
void apply_equivalence_classes(map<uchar, uchar>& eq);
|
|
|
|
Node *root;
|
|
|
|
State *nonmatching, *start;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
Partition states;
|
2007-02-27 02:29:16 +00:00
|
|
|
};
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
uint32_t accept_perms(NodeSet *state, uint32_t *audit_ctl, int *error);
|
|
|
|
|
|
|
|
|
|
|
|
/* macro to help out with DFA creation, not done as inlined fn as nearly
|
|
|
|
* every line uses a different map or variable that would have to be passed
|
|
|
|
*/
|
2010-11-09 11:20:08 -08:00
|
|
|
#define update_for_nodes(NODES, TARGET) \
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
do { \
|
2010-11-09 11:20:08 -08:00
|
|
|
pair <unsigned long, NodeSet *> index = make_pair(hash_NodeSet(NODES), NODES); \
|
|
|
|
map<pair <unsigned long, NodeSet *>, State *, deref_less_than>::iterator x = nodemap.find(index); \
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (x == nodemap.end()) { \
|
|
|
|
/* set of nodes isn't known so create new state, and nodes to \
|
|
|
|
* state mapping \
|
|
|
|
*/ \
|
|
|
|
nomatch_count++; \
|
|
|
|
TARGET = new State(); \
|
|
|
|
(TARGET)->label = nomatch_count; \
|
|
|
|
states.push_back(TARGET); \
|
2010-11-09 11:20:08 -08:00
|
|
|
nodemap.insert(make_pair(index, TARGET)); \
|
|
|
|
work_queue.push_back(NODES); \
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
} else { \
|
|
|
|
/* set of nodes already has a mapping so free this one */ \
|
|
|
|
match_count++; \
|
2010-11-09 11:20:08 -08:00
|
|
|
delete (NODES); \
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
TARGET = x->second; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/**
|
|
|
|
* Construct a DFA from a syntax tree.
|
|
|
|
*/
|
2010-01-08 02:17:45 -08:00
|
|
|
DFA::DFA(Node *root, dfaflags_t flags) : root(root)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
int i, match_count, nomatch_count;
|
|
|
|
i = match_count = nomatch_count = 0;
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (flags & DFA_DUMP_PROGRESS)
|
|
|
|
fprintf(stderr, "Creating dfa:\r");
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (depth_first_traversal i(root); i; i++) {
|
|
|
|
(*i)->compute_nullable();
|
|
|
|
(*i)->compute_firstpos();
|
|
|
|
(*i)->compute_lastpos();
|
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (flags & DFA_DUMP_PROGRESS)
|
|
|
|
fprintf(stderr, "Creating dfa: followpos\r");
|
|
|
|
for (depth_first_traversal i(root); i; i++) {
|
|
|
|
(*i)->compute_followpos();
|
|
|
|
}
|
|
|
|
|
|
|
|
NodeMap nodemap;
|
|
|
|
nonmatching = new State;
|
|
|
|
states.push_back(nonmatching);
|
|
|
|
NodeSet *emptynode = new NodeSet;
|
2010-11-09 11:20:08 -08:00
|
|
|
nodemap.insert(make_pair(make_pair(hash_NodeSet(emptynode), emptynode), nonmatching));
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
/* there is no nodemapping for the nonmatching state */
|
|
|
|
|
|
|
|
start = new State;
|
|
|
|
start->label = 1;
|
|
|
|
nomatch_count++;
|
|
|
|
states.push_back(start);
|
|
|
|
NodeSet *first = new NodeSet(root->firstpos);
|
2010-11-09 11:20:08 -08:00
|
|
|
nodemap.insert(make_pair(make_pair(hash_NodeSet(first), first), start));
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
|
|
|
/* the work_queue contains the proto-states (set of nodes that is
|
|
|
|
* the precurser of a state) that need to be computed
|
|
|
|
*
|
|
|
|
* TODO: currently the work_queue is treated in a breadth first
|
|
|
|
* search manner. Test using the work_queue in a depth first
|
|
|
|
* manner, this may help reduce the number of entries on the
|
|
|
|
* work_queue at any given time, thus reducing peak memory use.
|
|
|
|
*/
|
|
|
|
list<NodeSet *> work_queue;
|
|
|
|
work_queue.push_back(first);
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
while (!work_queue.empty()) {
|
|
|
|
if (i % 1000 == 0 && (flags & DFA_DUMP_PROGRESS))
|
|
|
|
fprintf(stderr, "\033[2KCreating dfa: queue %ld\tstates %ld\tmatching %d\tnonmatching %d\r", work_queue.size(), states.size(), match_count, nomatch_count);
|
|
|
|
i++;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
int error;
|
|
|
|
NodeSet *nodes = work_queue.front();
|
|
|
|
work_queue.pop_front();
|
2010-11-09 11:20:08 -08:00
|
|
|
State *from = nodemap[make_pair(hash_NodeSet(nodes), nodes)];
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
/* Compute permissions associated with the State. */
|
|
|
|
from->accept = accept_perms(nodes, &from->audit, &error);
|
|
|
|
if (error) {
|
|
|
|
/* TODO!!!!!!!!!!!!!
|
|
|
|
* permission error checking here
|
|
|
|
*/
|
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
/* Compute possible transitions for `nodes`. This is done by
|
|
|
|
* iterating over all the nodes in nodes and combining the
|
|
|
|
* transitions.
|
|
|
|
*
|
|
|
|
* The resultant transition set is a mapping of characters to
|
|
|
|
* sets of nodes.
|
|
|
|
*/
|
|
|
|
NodeCases cases;
|
|
|
|
for (NodeSet::iterator i = nodes->begin(); i != nodes->end(); i++)
|
|
|
|
(*i)->follow(cases);
|
|
|
|
|
|
|
|
/* Now for each set of nodes in the computed transitions, make
|
|
|
|
* sure that there is a state that maps to it, and add the
|
|
|
|
* matching case to the state.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* check the default transition first */
|
|
|
|
if (cases.otherwise) {
|
|
|
|
State *target;
|
|
|
|
update_for_nodes(cases.otherwise, target);
|
|
|
|
from->cases.otherwise = target;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For each transition from *from, check if the set of nodes it
|
|
|
|
* transitions to already has been mapped to a state
|
|
|
|
*/
|
|
|
|
for (NodeCases::iterator j = cases.begin(); j != cases.end();
|
|
|
|
j++) {
|
|
|
|
State *target;
|
|
|
|
update_for_nodes(j->second, target);
|
|
|
|
/* Don't insert transition that the default transition
|
|
|
|
* already covers
|
|
|
|
*/
|
|
|
|
if (target != from->cases.otherwise)
|
|
|
|
from->cases.cases[j->first] = target;
|
|
|
|
}
|
|
|
|
} /* for (NodeSet *nodes ... */
|
|
|
|
|
|
|
|
/* cleanup Sets of nodes used computing the DFA as they are no longer
|
|
|
|
* needed.
|
|
|
|
*/
|
|
|
|
for (depth_first_traversal i(root); i; i++) {
|
|
|
|
(*i)->firstpos.clear();
|
|
|
|
(*i)->lastpos.clear();
|
|
|
|
(*i)->followpos.clear();
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (NodeMap::iterator i = nodemap.begin(); i != nodemap.end(); i++)
|
2010-11-09 11:20:08 -08:00
|
|
|
delete i->first.second;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
nodemap.clear();
|
2010-07-10 17:47:25 -07:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (flags & (DFA_DUMP_STATS))
|
|
|
|
fprintf(stderr, "\033[2KCreated dfa: states %ld\tmatching %d\tnonmatching %d\n", states.size(), match_count, nomatch_count);
|
2010-07-10 17:47:25 -07:00
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
/* TODO Dump dfa with NODE mapping - or node to dfa mapping */
|
|
|
|
// ??????
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
DFA::~DFA()
|
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++)
|
2007-02-27 02:29:16 +00:00
|
|
|
delete *i;
|
|
|
|
}
|
|
|
|
|
2007-03-30 20:38:51 +00:00
|
|
|
class MatchFlag : public AcceptNode {
|
|
|
|
public:
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
MatchFlag(uint32_t flag, uint32_t audit) : flag(flag), audit(audit) {}
|
2007-03-30 20:38:51 +00:00
|
|
|
ostream& dump(ostream& os)
|
|
|
|
{
|
|
|
|
return os << '<' << flag << '>';
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
|
|
|
|
uint32_t flag;
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
uint32_t audit;
|
2007-12-20 12:56:50 +00:00
|
|
|
};
|
2007-03-30 20:38:51 +00:00
|
|
|
|
|
|
|
class ExactMatchFlag : public MatchFlag {
|
|
|
|
public:
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
ExactMatchFlag(uint32_t flag, uint32_t audit) : MatchFlag(flag, audit) {}
|
2007-03-30 20:38:51 +00:00
|
|
|
};
|
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
class DenyMatchFlag : public MatchFlag {
|
|
|
|
public:
|
|
|
|
DenyMatchFlag(uint32_t flag, uint32_t quiet) : MatchFlag(flag, quiet) {}
|
|
|
|
};
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
/* Remove dead or unreachable states */
|
|
|
|
void DFA::remove_unreachable(dfaflags_t flags)
|
|
|
|
{
|
|
|
|
set <State *> reachable;
|
|
|
|
list <State *> work_queue;
|
|
|
|
|
|
|
|
/* find the set of reachable states */
|
|
|
|
reachable.insert(nonmatching);
|
|
|
|
work_queue.push_back(start);
|
|
|
|
while (!work_queue.empty()) {
|
|
|
|
State *from = work_queue.front();
|
|
|
|
work_queue.pop_front();
|
|
|
|
reachable.insert(from);
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (from->cases.otherwise &&
|
|
|
|
(reachable.find(from->cases.otherwise) == reachable.end()))
|
|
|
|
work_queue.push_back(from->cases.otherwise);
|
2010-01-20 03:32:34 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Cases::iterator j = from->cases.begin();
|
|
|
|
j != from->cases.end(); j++) {
|
2010-01-20 03:32:34 -08:00
|
|
|
if (reachable.find(j->second) == reachable.end())
|
|
|
|
work_queue.push_back(j->second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* walk the set of states and remove any that aren't reachable */
|
|
|
|
if (reachable.size() < states.size()) {
|
|
|
|
int count = 0;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
Partition::iterator i;
|
|
|
|
Partition::iterator next;
|
2010-01-20 03:32:34 -08:00
|
|
|
for (i = states.begin(); i != states.end(); i = next) {
|
|
|
|
next = i;
|
|
|
|
next++;
|
|
|
|
if (reachable.find(*i) == reachable.end()) {
|
|
|
|
if (flags & DFA_DUMP_UNREACHABLE) {
|
|
|
|
cerr << "unreachable: "<< **i;
|
|
|
|
if (*i == start)
|
|
|
|
cerr << " <==";
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if ((*i)->accept) {
|
|
|
|
cerr << " (0x" << hex << (*i)->accept
|
|
|
|
<< " " << (*i)->audit << dec << ')';
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
cerr << endl;
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
State *current = *i;
|
|
|
|
states.erase(i);
|
|
|
|
delete(current);
|
|
|
|
count++;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count && (flags & DFA_DUMP_STATS))
|
|
|
|
cerr << "DFA: states " << states.size() << " removed "
|
|
|
|
<< count << " unreachable states\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* test if two states have the same transitions under partition_map */
|
2010-01-31 23:18:14 -08:00
|
|
|
bool DFA::same_mappings(map <State *, Partition *> &partition_map, State *s1,
|
2010-01-20 03:32:34 -08:00
|
|
|
State *s2)
|
|
|
|
{
|
2010-11-09 11:25:44 -08:00
|
|
|
if (s1->cases.otherwise && s1->cases.otherwise != nonmatching) {
|
|
|
|
if (!s2->cases.otherwise || s2->cases.otherwise == nonmatching)
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
Partition *p1 = partition_map.find(s1->cases.otherwise)->second;
|
|
|
|
Partition *p2 = partition_map.find(s2->cases.otherwise)->second;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (p1 != p2)
|
|
|
|
return false;
|
2010-11-09 11:25:44 -08:00
|
|
|
} else if (s2->cases.otherwise && s2->cases.otherwise != nonmatching) {
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (s1->cases.cases.size() != s2->cases.cases.size())
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Cases::iterator j1 = s1->cases.begin(); j1 != s1->cases.end();
|
2010-01-20 03:32:34 -08:00
|
|
|
j1++){
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
Cases::iterator j2 = s2->cases.cases.find(j1->first);
|
|
|
|
if (j2 == s2->cases.end())
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *p1 = partition_map.find(j1->second)->second;
|
|
|
|
Partition *p2 = partition_map.find(j2->second)->second;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (p1 != p2)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do simple djb2 hashing against a States transition cases
|
|
|
|
* this provides a rough initial guess at state equivalence as if a state
|
|
|
|
* has a different number of transitions or has transitions on different
|
|
|
|
* cases they will never be equivalent.
|
|
|
|
* Note: this only hashes based off of the alphabet (not destination)
|
|
|
|
* as different destinations could end up being equiv
|
|
|
|
*/
|
|
|
|
size_t DFA::hash_trans(State *s)
|
|
|
|
{
|
|
|
|
unsigned long hash = 5381;
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Cases::iterator j = s->cases.begin(); j != s->cases.end(); j++){
|
2010-01-20 03:32:34 -08:00
|
|
|
hash = ((hash << 5) + hash) + j->first;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
State *k = j->second;
|
|
|
|
hash = ((hash << 5) + hash) + k->cases.cases.size();
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (s->cases.otherwise && s->cases.otherwise != nonmatching) {
|
2010-01-20 03:32:34 -08:00
|
|
|
hash = ((hash << 5) + hash) + 5381;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
State *k = s->cases.otherwise;
|
|
|
|
hash = ((hash << 5) + hash) + k->cases.cases.size();
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-01-20 05:10:38 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
hash = (hash << 8) | s->cases.cases.size();
|
2010-01-20 03:32:34 -08:00
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* minimize the number of dfa states */
|
|
|
|
void DFA::minimize(dfaflags_t flags)
|
|
|
|
{
|
2010-01-31 23:18:14 -08:00
|
|
|
map <pair <uint64_t, size_t>, Partition *> perm_map;
|
|
|
|
list <Partition *> partitions;
|
|
|
|
map <State *, Partition *> partition_map;
|
2010-01-20 03:32:34 -08:00
|
|
|
|
2010-11-09 11:22:54 -08:00
|
|
|
/* Set up the initial partitions
|
|
|
|
* minimium of - 1 non accepting, and 1 accepting
|
|
|
|
* if trans hashing is used the accepting and non-accepting partitions
|
|
|
|
* can be further split based on the number and type of transitions
|
|
|
|
* a state makes.
|
|
|
|
* If permission hashing is enabled the accepting partitions can
|
|
|
|
* be further divided by permissions. This can result in not
|
|
|
|
* obtaining a truely minimized dfa but comes close, and can speedup
|
|
|
|
* minimization.
|
2010-01-20 03:32:34 -08:00
|
|
|
*/
|
|
|
|
int accept_count = 0;
|
2010-11-09 11:26:50 -08:00
|
|
|
int final_accept = 0;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
2010-11-09 11:22:54 -08:00
|
|
|
uint64_t perm_hash = 0;
|
|
|
|
if (flags & DFA_CONTROL_MINIMIZE_HASH_PERMS) {
|
|
|
|
/* make every unique perm create a new partition */
|
|
|
|
perm_hash = ((uint64_t)(*i)->audit)<<32 |
|
|
|
|
(uint64_t)(*i)->accept;
|
|
|
|
} else if ((*i)->audit || (*i)->accept) {
|
|
|
|
/* combine all perms together into a single parition */
|
|
|
|
perm_hash = 1;
|
|
|
|
} /* else not an accept state so 0 for perm_hash */
|
|
|
|
|
|
|
|
size_t trans_hash = 0;
|
|
|
|
if (flags & DFA_CONTROL_MINIMIZE_HASH_TRANS)
|
|
|
|
trans_hash = hash_trans(*i);
|
|
|
|
pair <uint64_t, size_t> group = make_pair(perm_hash, trans_hash);
|
2010-01-31 23:18:14 -08:00
|
|
|
map <pair <uint64_t, size_t>, Partition *>::iterator p = perm_map.find(group);
|
2010-01-20 03:32:34 -08:00
|
|
|
if (p == perm_map.end()) {
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *part = new Partition();
|
2010-01-31 23:19:54 -08:00
|
|
|
part->push_back(*i);
|
2010-01-20 03:32:34 -08:00
|
|
|
perm_map.insert(make_pair(group, part));
|
|
|
|
partitions.push_back(part);
|
|
|
|
partition_map.insert(make_pair(*i, part));
|
2010-11-09 11:22:54 -08:00
|
|
|
if (perm_hash)
|
2010-01-20 03:32:34 -08:00
|
|
|
accept_count++;
|
|
|
|
} else {
|
|
|
|
partition_map.insert(make_pair(*i, p->second));
|
2010-01-31 23:19:54 -08:00
|
|
|
p->second->push_back(*i);
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
if ((flags & DFA_DUMP_PROGRESS) &&
|
|
|
|
(partitions.size() % 1000 == 0))
|
2010-11-09 11:26:50 -08:00
|
|
|
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << partitions.size() << " (accept " << accept_count << ")\r";
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
|
2010-11-09 11:26:18 -08:00
|
|
|
/* perm_map is no longer needed so free the memory it is using.
|
|
|
|
* Don't remove - doing it manually here helps reduce peak memory usage.
|
|
|
|
*/
|
|
|
|
perm_map.clear();
|
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
int init_count = partitions.size();
|
|
|
|
if (flags & DFA_DUMP_PROGRESS)
|
2010-11-09 11:26:50 -08:00
|
|
|
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << init_count << " (accept " << accept_count << ")\r";
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
/* Now do repartitioning until each partition contains the set of
|
|
|
|
* states that are the same. This will happen when the partition
|
|
|
|
* splitting stables. With a worse case of 1 state per partition
|
|
|
|
* ie. already minimized.
|
|
|
|
*/
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *new_part;
|
2010-01-20 03:32:34 -08:00
|
|
|
int new_part_count;
|
|
|
|
do {
|
|
|
|
new_part_count = 0;
|
2010-01-31 23:18:14 -08:00
|
|
|
for (list <Partition *>::iterator p = partitions.begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
p != partitions.end(); p++) {
|
|
|
|
new_part = NULL;
|
|
|
|
State *rep = *((*p)->begin());
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition::iterator next;
|
|
|
|
for (Partition::iterator s = ++(*p)->begin();
|
2010-01-31 23:21:00 -08:00
|
|
|
s != (*p)->end(); ) {
|
|
|
|
if (same_mappings(partition_map, rep, *s)) {
|
|
|
|
++s;
|
2010-01-20 03:32:34 -08:00
|
|
|
continue;
|
2010-01-31 23:21:00 -08:00
|
|
|
}
|
2010-01-20 03:32:34 -08:00
|
|
|
if (!new_part) {
|
2010-01-31 23:18:14 -08:00
|
|
|
new_part = new Partition;
|
2010-01-31 23:21:00 -08:00
|
|
|
list <Partition *>::iterator tmp = p;
|
|
|
|
partitions.insert(++tmp, new_part);
|
|
|
|
new_part_count++;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-01-31 23:19:54 -08:00
|
|
|
new_part->push_back(*s);
|
2010-01-31 23:21:00 -08:00
|
|
|
s = (*p)->erase(s);
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-01-31 23:21:00 -08:00
|
|
|
/* remapping partition_map for new_part entries
|
|
|
|
* Do not do this above as it messes up same_mappings
|
|
|
|
*/
|
2010-01-20 03:32:34 -08:00
|
|
|
if (new_part) {
|
2010-01-31 23:18:14 -08:00
|
|
|
for (Partition::iterator m = new_part->begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
m != new_part->end(); m++) {
|
2010-01-31 23:21:00 -08:00
|
|
|
partition_map.erase(*m);
|
|
|
|
partition_map.insert(make_pair(*m, new_part));
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if ((flags & DFA_DUMP_PROGRESS) &&
|
2010-01-31 23:12:33 -08:00
|
|
|
(partitions.size() % 100 == 0))
|
2010-11-09 11:26:50 -08:00
|
|
|
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << init_count << " (accept " << accept_count << ")\r";
|
2010-01-31 23:12:33 -08:00
|
|
|
}
|
2010-01-20 03:32:34 -08:00
|
|
|
} while(new_part_count);
|
|
|
|
|
2010-11-09 11:26:50 -08:00
|
|
|
if (partitions.size() == states.size()) {
|
|
|
|
if (flags & DFA_DUMP_STATS)
|
|
|
|
cerr << "\033[2KDfa minimization no states removed: partitions " << partitions.size() << "\tinit " << init_count << " (accept " << accept_count << ")\n";
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remap the dfa so it uses the representative states
|
|
|
|
* Use the first state of a partition as the representative state
|
|
|
|
* At this point all states with in a partion have transitions
|
2010-11-09 11:27:36 -08:00
|
|
|
* to states within the same partitions, however this can slow
|
|
|
|
* down compressed dfa compression as there are more states,
|
2010-01-20 03:32:34 -08:00
|
|
|
*/
|
2010-01-31 23:18:14 -08:00
|
|
|
for (list <Partition *>::iterator p = partitions.begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
p != partitions.end(); p++) {
|
|
|
|
/* representative state for this partition */
|
|
|
|
State *rep = *((*p)->begin());
|
|
|
|
|
|
|
|
/* update representative state's transitions */
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (rep->cases.otherwise) {
|
|
|
|
map <State *, Partition *>::iterator z = partition_map.find(rep->cases.otherwise);
|
|
|
|
Partition *partition = partition_map.find(rep->cases.otherwise)->second;
|
|
|
|
rep->cases.otherwise = *partition->begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Cases::iterator c = rep->cases.begin();
|
|
|
|
c != rep->cases.end(); c++) {
|
|
|
|
Partition *partition = partition_map.find(c->second)->second;
|
|
|
|
c->second = *partition->begin();
|
|
|
|
}
|
|
|
|
|
|
|
|
//if ((*p)->size() > 1)
|
|
|
|
//cerr << rep->label << ": ";
|
2010-11-09 11:24:51 -08:00
|
|
|
/* clear the state label for all non representative states,
|
|
|
|
* and accumulate permissions */
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = ++(*p)->begin(); i != (*p)->end(); i++) {
|
|
|
|
//cerr << " " << (*i)->label;
|
|
|
|
(*i)->label = -1;
|
2010-11-09 11:24:51 -08:00
|
|
|
rep->accept |= (*i)->accept;
|
|
|
|
rep->audit |= (*i)->audit;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
}
|
2010-11-09 11:26:50 -08:00
|
|
|
if (rep->accept || rep->audit)
|
|
|
|
final_accept++;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
//if ((*p)->size() > 1)
|
|
|
|
//cerr << "\n";
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-11-09 11:26:50 -08:00
|
|
|
if (flags & DFA_DUMP_STATS)
|
|
|
|
cerr << "\033[2KMinimized dfa: final partitions " << partitions.size() << " (accept " << final_accept << ")" << "\tinit " << init_count << " (accept " << accept_count << ")\n";
|
|
|
|
|
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
/* make sure nonmatching and start state are up to date with the
|
|
|
|
* mappings */
|
|
|
|
{
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *partition = partition_map.find(nonmatching)->second;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (*partition->begin() != nonmatching) {
|
|
|
|
nonmatching = *partition->begin();
|
|
|
|
}
|
|
|
|
|
|
|
|
partition = partition_map.find(start)->second;
|
|
|
|
if (*partition->begin() != start) {
|
|
|
|
start = *partition->begin();
|
|
|
|
}
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
/* Now that the states have been remapped, remove all states
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
* that are not the representive states for their partition, they
|
|
|
|
* will have a label == -1
|
2010-01-20 03:32:34 -08:00
|
|
|
*/
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); ) {
|
|
|
|
if ((*i)->label == -1) {
|
2010-03-13 02:23:23 -08:00
|
|
|
State *s = *i;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
i = states.erase(i);
|
2010-03-13 02:23:23 -08:00
|
|
|
delete(s);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
} else
|
|
|
|
i++;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
/* Cleanup */
|
|
|
|
while (!partitions.empty()) {
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *p = partitions.front();
|
2010-01-20 03:32:34 -08:00
|
|
|
partitions.pop_front();
|
|
|
|
delete(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/**
|
|
|
|
* text-dump the DFA (for debugging).
|
|
|
|
*/
|
|
|
|
void DFA::dump(ostream& os)
|
|
|
|
{
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
if (*i == start || (*i)->accept) {
|
2007-02-27 02:29:16 +00:00
|
|
|
os << **i;
|
|
|
|
if (*i == start)
|
|
|
|
os << " <==";
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if ((*i)->accept) {
|
|
|
|
os << " (0x" << hex << (*i)->accept << " " << (*i)->audit << dec << ')';
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
os << endl;
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
if ((*i)->cases.otherwise)
|
|
|
|
os << **i << " -> " << (*i)->cases.otherwise << endl;
|
|
|
|
for (Cases::iterator j = (*i)->cases.begin(); j != (*i)->cases.end(); j++) {
|
|
|
|
os << **i << " -> " << j->second << ": " << j->first << endl;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a dot (graphviz) graph from the DFA (for debugging).
|
|
|
|
*/
|
|
|
|
void DFA::dump_dot_graph(ostream& os)
|
|
|
|
{
|
|
|
|
os << "digraph \"dfa\" {" << endl;
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
2007-02-27 02:29:16 +00:00
|
|
|
if (*i == nonmatching)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
os << "\t\"" << **i << "\" [" << endl;
|
|
|
|
if (*i == start) {
|
|
|
|
os << "\t\tstyle=bold" << endl;
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
uint32_t perms = (*i)->accept;
|
2007-02-27 02:29:16 +00:00
|
|
|
if (perms) {
|
|
|
|
os << "\t\tlabel=\"" << **i << "\\n("
|
|
|
|
<< perms << ")\"" << endl;
|
|
|
|
}
|
|
|
|
os << "\t]" << endl;
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
Cases& cases = (*i)->cases;
|
2007-02-27 02:29:16 +00:00
|
|
|
Chars excluded;
|
|
|
|
|
|
|
|
for (Cases::iterator j = cases.begin(); j != cases.end(); j++) {
|
|
|
|
if (j->second == nonmatching)
|
|
|
|
excluded.insert(j->first);
|
|
|
|
else {
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
os << "\t\"" << **i << "\" -> \"";
|
|
|
|
os << j->second << "\" [" << endl;
|
|
|
|
os << "\t\tlabel=\"" << j->first << "\"" << endl;
|
|
|
|
os << "\t]" << endl;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (cases.otherwise && cases.otherwise != nonmatching) {
|
|
|
|
os << "\t\"" << **i << "\" -> \"" << cases.otherwise
|
2007-02-27 02:29:16 +00:00
|
|
|
<< "\" [" << endl;
|
|
|
|
if (!excluded.empty()) {
|
|
|
|
os << "\t\tlabel=\"[^";
|
|
|
|
for (Chars::iterator i = excluded.begin();
|
|
|
|
i != excluded.end();
|
|
|
|
i++) {
|
|
|
|
os << *i;
|
|
|
|
}
|
|
|
|
os << "]\"" << endl;
|
|
|
|
}
|
|
|
|
os << "\t]" << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
os << '}' << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute character equivalence classes in the DFA to save space in the
|
|
|
|
* transition table.
|
|
|
|
*/
|
2010-01-08 02:17:45 -08:00
|
|
|
map<uchar, uchar> DFA::equivalence_classes(dfaflags_t flags)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
map<uchar, uchar> classes;
|
|
|
|
uchar next_class = 1;
|
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
Cases& cases = (*i)->cases;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
/* Group edges to the same next state together */
|
|
|
|
map<const State *, Chars> node_sets;
|
|
|
|
for (Cases::iterator j = cases.begin(); j != cases.end(); j++)
|
|
|
|
node_sets[j->second].insert(j->first);
|
|
|
|
|
|
|
|
for (map<const State *, Chars>::iterator j = node_sets.begin();
|
|
|
|
j != node_sets.end();
|
|
|
|
j++) {
|
|
|
|
/* Group edges to the same next state together by class */
|
|
|
|
map<uchar, Chars> node_classes;
|
|
|
|
bool class_used = false;
|
|
|
|
for (Chars::iterator k = j->second.begin();
|
|
|
|
k != j->second.end();
|
|
|
|
k++) {
|
|
|
|
pair<map<uchar, uchar>::iterator, bool> x =
|
|
|
|
classes.insert(make_pair(*k, next_class));
|
|
|
|
if (x.second)
|
|
|
|
class_used = true;
|
|
|
|
pair<map<uchar, Chars>::iterator, bool> y =
|
|
|
|
node_classes.insert(make_pair(x.first->second, Chars()));
|
|
|
|
y.first->second.insert(*k);
|
|
|
|
}
|
|
|
|
if (class_used) {
|
|
|
|
next_class++;
|
|
|
|
class_used = false;
|
|
|
|
}
|
|
|
|
for (map<uchar, Chars>::iterator k = node_classes.begin();
|
|
|
|
k != node_classes.end();
|
|
|
|
k++) {
|
|
|
|
/**
|
|
|
|
* If any other characters are in the same class, move
|
|
|
|
* the characters in this class into their own new class
|
|
|
|
*/
|
|
|
|
map<uchar, uchar>::iterator l;
|
|
|
|
for (l = classes.begin(); l != classes.end(); l++) {
|
|
|
|
if (l->second == k->first &&
|
|
|
|
k->second.find(l->first) == k->second.end()) {
|
|
|
|
class_used = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (class_used) {
|
|
|
|
for (Chars::iterator l = k->second.begin();
|
|
|
|
l != k->second.end();
|
|
|
|
l++) {
|
|
|
|
classes[*l] = next_class;
|
|
|
|
}
|
|
|
|
next_class++;
|
|
|
|
class_used = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
|
|
|
|
if (flags & DFA_DUMP_EQUIV_STATS)
|
|
|
|
fprintf(stderr, "Equiv class reduces to %d classes\n", next_class - 1);
|
2007-02-27 02:29:16 +00:00
|
|
|
return classes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Text-dump the equivalence classes (for debugging).
|
|
|
|
*/
|
|
|
|
void dump_equivalence_classes(ostream& os, map<uchar, uchar>& eq)
|
|
|
|
{
|
|
|
|
map<uchar, Chars> rev;
|
|
|
|
|
|
|
|
for (map<uchar, uchar>::iterator i = eq.begin(); i != eq.end(); i++) {
|
|
|
|
Chars& chars = rev.insert(make_pair(i->second,
|
|
|
|
Chars())).first->second;
|
|
|
|
chars.insert(i->first);
|
|
|
|
}
|
|
|
|
os << "(eq):" << endl;
|
|
|
|
for (map<uchar, Chars>::iterator i = rev.begin(); i != rev.end(); i++) {
|
|
|
|
os << (int)i->first << ':';
|
|
|
|
Chars& chars = i->second;
|
|
|
|
for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
|
|
|
|
os << ' ' << *j;
|
|
|
|
}
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Replace characters with classes (which are also represented as
|
|
|
|
* characters) in the DFA transition table.
|
|
|
|
*/
|
|
|
|
void DFA::apply_equivalence_classes(map<uchar, uchar>& eq)
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* Note: We only transform the transition table; the nodes continue to
|
|
|
|
* contain the original characters.
|
|
|
|
*/
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
2007-02-27 02:29:16 +00:00
|
|
|
map<uchar, State *> tmp;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
tmp.swap((*i)->cases.cases);
|
2007-02-27 02:29:16 +00:00
|
|
|
for (Cases::iterator j = tmp.begin(); j != tmp.end(); j++)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
(*i)->cases.cases.insert(make_pair(eq[j->first], j->second));
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Flip the children of all cat nodes. This causes strings to be matched
|
|
|
|
* back-forth.
|
|
|
|
*/
|
|
|
|
void flip_tree(Node *node)
|
|
|
|
{
|
|
|
|
for (depth_first_traversal i(node); i; i++) {
|
|
|
|
if (CatNode *cat = dynamic_cast<CatNode *>(*i)) {
|
2008-11-07 13:00:05 +00:00
|
|
|
swap(cat->child[0], cat->child[1]);
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class TransitionTable {
|
|
|
|
typedef vector<pair<const State *, size_t> > DefaultBase;
|
|
|
|
typedef vector<pair<const State *, const State *> > NextCheck;
|
|
|
|
public:
|
2010-01-08 02:17:45 -08:00
|
|
|
TransitionTable(DFA& dfa, map<uchar, uchar>& eq, dfaflags_t flags);
|
2007-02-27 02:29:16 +00:00
|
|
|
void dump(ostream& os);
|
|
|
|
void flex_table(ostream& os, const char *name);
|
2010-01-27 17:20:13 -08:00
|
|
|
void init_free_list(vector <pair<size_t, size_t> > &free_list, size_t prev, size_t start);
|
|
|
|
bool fits_in(vector <pair<size_t, size_t> > &free_list,
|
|
|
|
size_t base, Cases& cases);
|
|
|
|
void insert_state(vector <pair<size_t, size_t> > &free_list,
|
|
|
|
State *state, DFA& dfa);
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
vector<uint32_t> accept;
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
vector<uint32_t> accept2;
|
2007-02-27 02:29:16 +00:00
|
|
|
DefaultBase default_base;
|
|
|
|
NextCheck next_check;
|
|
|
|
map<const State *, size_t> num;
|
|
|
|
map<uchar, uchar>& eq;
|
|
|
|
uchar max_eq;
|
2010-01-27 17:20:13 -08:00
|
|
|
size_t first_free;
|
2007-02-27 02:29:16 +00:00
|
|
|
};
|
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
|
|
|
|
void TransitionTable::init_free_list(vector <pair<size_t, size_t> > &free_list,
|
|
|
|
size_t prev, size_t start) {
|
|
|
|
for (size_t i = start; i < free_list.size(); i++) {
|
|
|
|
if (prev)
|
|
|
|
free_list[prev].second = i;
|
|
|
|
free_list[i].first = prev;
|
|
|
|
prev = i;
|
|
|
|
}
|
|
|
|
free_list[free_list.size() -1].second = 0;
|
|
|
|
}
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/**
|
2010-01-27 17:20:13 -08:00
|
|
|
* new Construct the transition table.
|
2007-02-27 02:29:16 +00:00
|
|
|
*/
|
2010-01-08 02:17:45 -08:00
|
|
|
TransitionTable::TransitionTable(DFA& dfa, map<uchar, uchar>& eq,
|
|
|
|
dfaflags_t flags)
|
2010-01-27 17:20:13 -08:00
|
|
|
: eq(eq)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2010-01-08 02:17:45 -08:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
if (flags & DFA_DUMP_TRANS_PROGRESS)
|
|
|
|
fprintf(stderr, "Creating trans table:\r");
|
2010-01-08 02:17:45 -08:00
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
if (eq.empty())
|
|
|
|
max_eq = 255;
|
|
|
|
else {
|
|
|
|
max_eq = 0;
|
|
|
|
for(map<uchar, uchar>::iterator i = eq.begin(); i != eq.end(); i++) {
|
|
|
|
if (i->second > max_eq)
|
|
|
|
max_eq = i->second;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
/* Do initial setup adding up all the transitions and sorting by
|
|
|
|
* transition count.
|
|
|
|
*/
|
|
|
|
size_t optimal = 2;
|
|
|
|
multimap <size_t, State *> order;
|
|
|
|
vector <pair<size_t, size_t> > free_list;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) {
|
|
|
|
if (*i == dfa.start || *i == dfa.nonmatching)
|
2010-01-27 17:20:13 -08:00
|
|
|
continue;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
optimal += (*i)->cases.cases.size();
|
2010-01-27 17:20:13 -08:00
|
|
|
if (flags & DFA_CONTROL_TRANS_HIGH) {
|
|
|
|
size_t range = 0;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if ((*i)->cases.cases.size())
|
|
|
|
range = (*i)->cases.cases.rbegin()->first - (*i)->cases.begin()->first;
|
|
|
|
size_t ord = ((256 - (*i)->cases.cases.size()) << 8) |
|
2010-01-27 17:20:13 -08:00
|
|
|
(256 - range);
|
|
|
|
/* reverse sort by entry count, most entries first */
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
order.insert(make_pair(ord, *i));
|
2010-01-27 17:20:13 -08:00
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
/* Insert the dummy nonmatching transition by hand */
|
|
|
|
next_check.push_back(make_pair(dfa.nonmatching, dfa.nonmatching));
|
|
|
|
default_base.push_back(make_pair(dfa.nonmatching, 0));
|
|
|
|
num.insert(make_pair(dfa.nonmatching, num.size()));
|
2010-01-08 02:17:45 -08:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
accept.resize(dfa.states.size());
|
|
|
|
accept2.resize(dfa.states.size());
|
|
|
|
next_check.resize(optimal);
|
|
|
|
free_list.resize(optimal);
|
2010-01-08 05:29:25 -08:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
accept[0] = 0;
|
|
|
|
accept2[0] = 0;
|
|
|
|
first_free = 1;
|
|
|
|
init_free_list(free_list, 0, 1);
|
|
|
|
|
|
|
|
insert_state(free_list, dfa.start, dfa);
|
|
|
|
accept[1] = 0;
|
|
|
|
accept2[1] = 0;
|
|
|
|
num.insert(make_pair(dfa.start, num.size()));
|
|
|
|
|
|
|
|
int count = 2;
|
|
|
|
|
|
|
|
if (!(flags & DFA_CONTROL_TRANS_HIGH)) {
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = dfa.states.begin(); i != dfa.states.end();
|
2010-01-27 17:20:13 -08:00
|
|
|
i++) {
|
|
|
|
if (*i != dfa.nonmatching && *i != dfa.start) {
|
|
|
|
insert_state(free_list, *i, dfa);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
accept[num.size()] = (*i)->accept;
|
|
|
|
accept2[num.size()] = (*i)->audit;
|
2010-01-27 17:20:13 -08:00
|
|
|
num.insert(make_pair(*i, num.size()));
|
|
|
|
}
|
|
|
|
if (flags & (DFA_DUMP_TRANS_PROGRESS)) {
|
|
|
|
count++;
|
|
|
|
if (count % 100 == 0)
|
|
|
|
fprintf(stderr, "\033[2KCreating trans table: insert state: %d/%ld\r", count, dfa.states.size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (multimap <size_t, State *>::iterator i = order.begin();
|
|
|
|
i != order.end(); i++) {
|
|
|
|
if (i->second != dfa.nonmatching && i->second != dfa.start) {
|
|
|
|
insert_state(free_list, i->second, dfa);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
accept[num.size()] = i->second->accept;
|
|
|
|
accept2[num.size()] = i->second->audit;
|
2010-01-27 17:20:13 -08:00
|
|
|
num.insert(make_pair(i->second, num.size()));
|
|
|
|
}
|
|
|
|
if (flags & (DFA_DUMP_TRANS_PROGRESS)) {
|
|
|
|
count++;
|
|
|
|
if (count % 100 == 0)
|
|
|
|
fprintf(stderr, "\033[2KCreating trans table: insert state: %d/%ld\r", count, dfa.states.size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & (DFA_DUMP_TRANS_STATS | DFA_DUMP_TRANS_PROGRESS)) {
|
|
|
|
ssize_t size = 4 * next_check.size() + 6 * dfa.states.size();
|
|
|
|
fprintf(stderr, "\033[2KCreated trans table: states %ld, next/check %ld, optimal next/check %ld avg/state %.2f, compression %ld/%ld = %.2f %%\n", dfa.states.size(), next_check.size(), optimal, (float)next_check.size()/(float)dfa.states.size(), size, 512 * dfa.states.size(), 100.0 - ((float) size * 100.0 / (float)(512 * dfa.states.size())));
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/**
|
|
|
|
* Does <cases> fit into position <base> of the transition table?
|
|
|
|
*/
|
2010-01-27 17:20:13 -08:00
|
|
|
bool TransitionTable::fits_in(vector <pair<size_t, size_t> > &free_list,
|
|
|
|
size_t pos, Cases& cases)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2010-01-27 17:20:13 -08:00
|
|
|
size_t c, base = pos - cases.begin()->first;
|
|
|
|
for (Cases::iterator i = cases.begin(); i != cases.end(); i++) {
|
|
|
|
c = base + i->first;
|
|
|
|
/* if it overflows the next_check array it fits in as we will
|
|
|
|
* resize */
|
|
|
|
if (c >= next_check.size())
|
2010-07-23 04:29:54 +02:00
|
|
|
return true;
|
2010-01-27 17:20:13 -08:00
|
|
|
if (next_check[c].second)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Insert <state> of <dfa> into the transition table.
|
|
|
|
*/
|
2010-01-27 17:20:13 -08:00
|
|
|
void TransitionTable::insert_state(vector <pair<size_t, size_t> > &free_list,
|
|
|
|
State *from, DFA& dfa)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2010-01-27 17:20:13 -08:00
|
|
|
State *default_state = dfa.nonmatching;
|
|
|
|
size_t base = 0;
|
2010-07-23 04:29:54 +02:00
|
|
|
int resize;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
Cases& cases = from->cases;
|
2010-01-27 17:20:13 -08:00
|
|
|
size_t c = cases.begin()->first;
|
|
|
|
size_t prev = 0;
|
|
|
|
size_t x = first_free;
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
if (cases.otherwise)
|
2010-01-27 17:20:13 -08:00
|
|
|
default_state = cases.otherwise;
|
2007-02-27 02:29:16 +00:00
|
|
|
if (cases.cases.empty())
|
2010-01-27 17:20:13 -08:00
|
|
|
goto do_insert;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
repeat:
|
2010-07-23 04:29:54 +02:00
|
|
|
resize = 0;
|
2010-01-27 17:20:13 -08:00
|
|
|
/* get the first free entry that won't underflow */
|
|
|
|
while (x && (x < c)) {
|
|
|
|
prev = x;
|
|
|
|
x = free_list[x].second;
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-01-27 17:20:13 -08:00
|
|
|
/* try inserting until we succeed. */
|
|
|
|
while (x && !fits_in(free_list, x, cases)) {
|
|
|
|
prev = x;
|
|
|
|
x = free_list[x].second;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2010-01-27 17:20:13 -08:00
|
|
|
if (!x) {
|
2010-07-23 04:30:31 +02:00
|
|
|
resize = 256 - cases.begin()->first;
|
2010-01-27 17:20:13 -08:00
|
|
|
x = free_list.size();
|
2010-07-23 04:29:54 +02:00
|
|
|
/* set prev to last free */
|
2010-07-23 04:30:31 +02:00
|
|
|
} else if (x + 255 - cases.begin()->first >= next_check.size()) {
|
|
|
|
resize = (255 - cases.begin()->first - (next_check.size() - 1 - x));
|
2010-07-23 04:29:54 +02:00
|
|
|
for (size_t y = x; y; y = free_list[y].second)
|
|
|
|
prev = y;
|
|
|
|
}
|
|
|
|
if (resize) {
|
|
|
|
/* expand next_check and free_list */
|
|
|
|
size_t old_size = free_list.size();
|
|
|
|
next_check.resize(next_check.size() + resize);
|
|
|
|
free_list.resize(free_list.size() + resize);
|
|
|
|
init_free_list(free_list, prev, old_size);
|
2010-01-27 17:20:13 -08:00
|
|
|
if (!first_free)
|
2010-07-23 04:29:54 +02:00
|
|
|
first_free = old_size;;
|
|
|
|
if (x == old_size)
|
|
|
|
goto repeat;
|
2010-01-27 17:20:13 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
base = x - c;
|
|
|
|
for (Cases::iterator j = cases.begin(); j != cases.end(); j++) {
|
|
|
|
next_check[base + j->first] = make_pair(j->second, from);
|
|
|
|
size_t prev = free_list[base + j->first].first;
|
|
|
|
size_t next = free_list[base + j->first].second;
|
|
|
|
if (prev)
|
|
|
|
free_list[prev].second = next;
|
|
|
|
if (next)
|
|
|
|
free_list[next].first = prev;
|
|
|
|
if (base + j->first == first_free)
|
|
|
|
first_free = next;
|
|
|
|
}
|
|
|
|
|
|
|
|
do_insert:
|
|
|
|
default_base.push_back(make_pair(default_state, base));
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Text-dump the transition table (for debugging).
|
|
|
|
*/
|
|
|
|
void TransitionTable::dump(ostream& os)
|
|
|
|
{
|
|
|
|
map<size_t, const State *> st;
|
|
|
|
for (map<const State *, size_t>::iterator i = num.begin();
|
|
|
|
i != num.end();
|
|
|
|
i++) {
|
|
|
|
st.insert(make_pair(i->second, i->first));
|
|
|
|
}
|
|
|
|
|
2010-07-23 04:29:29 +02:00
|
|
|
os << "size=" << default_base.size() << " (accept, default, base): {state} -> {default state}" << endl;
|
2007-02-27 02:29:16 +00:00
|
|
|
for (size_t i = 0; i < default_base.size(); i++) {
|
2010-07-10 17:49:32 -07:00
|
|
|
os << i << ": ";
|
2007-02-27 02:29:16 +00:00
|
|
|
os << "(" << accept[i] << ", "
|
|
|
|
<< num[default_base[i].first] << ", "
|
|
|
|
<< default_base[i].second << ")";
|
|
|
|
if (st[i])
|
|
|
|
os << " " << *st[i];
|
|
|
|
if (default_base[i].first)
|
|
|
|
os << " -> " << *default_base[i].first;
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
|
2010-07-23 04:29:29 +02:00
|
|
|
os << "size=" << next_check.size() << " (next, check): {check state} -> {next state} : offset from base" << endl;
|
2007-02-27 02:29:16 +00:00
|
|
|
for (size_t i = 0; i < next_check.size(); i++) {
|
|
|
|
if (!next_check[i].second)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
os << i << ": ";
|
|
|
|
if (next_check[i].second) {
|
|
|
|
os << "(" << num[next_check[i].first] << ", "
|
|
|
|
<< num[next_check[i].second] << ")" << " "
|
|
|
|
<< *next_check[i].second << " -> "
|
|
|
|
<< *next_check[i].first << ": ";
|
|
|
|
|
|
|
|
size_t offs = i - default_base[num[next_check[i].second]].second;
|
|
|
|
if (eq.size())
|
|
|
|
os << offs;
|
|
|
|
else
|
|
|
|
os << (uchar)offs;
|
|
|
|
}
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
template<class Iter>
|
|
|
|
class FirstIterator {
|
|
|
|
public:
|
|
|
|
FirstIterator(Iter pos) : pos(pos) { }
|
|
|
|
typename Iter::value_type::first_type operator*() { return pos->first; }
|
|
|
|
bool operator!=(FirstIterator<Iter>& i) { return pos != i.pos; }
|
|
|
|
void operator++() { ++pos; }
|
|
|
|
ssize_t operator-(FirstIterator<Iter> i) { return pos - i.pos; }
|
|
|
|
private:
|
|
|
|
Iter pos;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<class Iter>
|
|
|
|
FirstIterator<Iter> first_iterator(Iter iter)
|
|
|
|
{
|
|
|
|
return FirstIterator<Iter>(iter);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class Iter>
|
|
|
|
class SecondIterator {
|
|
|
|
public:
|
|
|
|
SecondIterator(Iter pos) : pos(pos) { }
|
|
|
|
typename Iter::value_type::second_type operator*() { return pos->second; }
|
|
|
|
bool operator!=(SecondIterator<Iter>& i) { return pos != i.pos; }
|
|
|
|
void operator++() { ++pos; }
|
|
|
|
ssize_t operator-(SecondIterator<Iter> i) { return pos - i.pos; }
|
|
|
|
private:
|
|
|
|
Iter pos;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<class Iter>
|
|
|
|
SecondIterator<Iter> second_iterator(Iter iter)
|
|
|
|
{
|
|
|
|
return SecondIterator<Iter>(iter);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a flex-style binary dump of the DFA tables. The table format
|
|
|
|
* was partly reverse engineered from the flex sources and from
|
|
|
|
* examining the tables that flex creates with its --tables-file option.
|
|
|
|
* (Only the -Cf and -Ce formats are currently supported.)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "flex-tables.h"
|
|
|
|
#include "regexp.h"
|
|
|
|
|
|
|
|
static inline size_t pad64(size_t i)
|
|
|
|
{
|
|
|
|
return (i + (size_t)7) & ~(size_t)7;
|
|
|
|
}
|
|
|
|
|
|
|
|
string fill64(size_t i)
|
|
|
|
{
|
|
|
|
const char zeroes[8] = { };
|
|
|
|
string fill(zeroes, (i & 7) ? 8 - (i & 7) : 0);
|
|
|
|
return fill;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class Iter>
|
|
|
|
size_t flex_table_size(Iter pos, Iter end)
|
|
|
|
{
|
|
|
|
return pad64(sizeof(struct table_header) + sizeof(*pos) * (end - pos));
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class Iter>
|
|
|
|
void write_flex_table(ostream& os, int id, Iter pos, Iter end)
|
|
|
|
{
|
|
|
|
struct table_header td = { };
|
|
|
|
size_t size = end - pos;
|
|
|
|
|
|
|
|
td.td_id = htons(id);
|
|
|
|
td.td_flags = htons(sizeof(*pos));
|
|
|
|
td.td_lolen = htonl(size);
|
|
|
|
os.write((char *)&td, sizeof(td));
|
|
|
|
|
|
|
|
for (; pos != end; ++pos) {
|
|
|
|
switch(sizeof(*pos)) {
|
|
|
|
case 4:
|
|
|
|
os.put((char)(*pos >> 24));
|
|
|
|
os.put((char)(*pos >> 16));
|
|
|
|
case 2:
|
|
|
|
os.put((char)(*pos >> 8));
|
|
|
|
case 1:
|
|
|
|
os.put((char)*pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
os << fill64(sizeof(td) + sizeof(*pos) * size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TransitionTable::flex_table(ostream& os, const char *name)
|
|
|
|
{
|
|
|
|
const char th_version[] = "notflex";
|
|
|
|
struct table_set_header th = { };
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Change the following two data types to adjust the maximum flex
|
|
|
|
* table size.
|
|
|
|
*/
|
|
|
|
typedef uint16_t state_t;
|
|
|
|
typedef uint32_t trans_t;
|
|
|
|
|
|
|
|
if (default_base.size() >= (state_t)-1) {
|
|
|
|
cerr << "Too many states (" << default_base.size() << ") for "
|
|
|
|
"type state_t" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (next_check.size() >= (trans_t)-1) {
|
|
|
|
cerr << "Too many transitions (" << next_check.size() << ") for "
|
|
|
|
"type trans_t" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create copies of the data structures so that we can dump the tables
|
|
|
|
* using the generic write_flex_table() routine.
|
|
|
|
*/
|
|
|
|
vector<uint8_t> equiv_vec;
|
|
|
|
if (eq.size()) {
|
|
|
|
equiv_vec.resize(256);
|
|
|
|
for (map<uchar, uchar>::iterator i = eq.begin(); i != eq.end(); i++) {
|
|
|
|
equiv_vec[i->first] = i->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<state_t> default_vec;
|
|
|
|
vector<trans_t> base_vec;
|
|
|
|
for (DefaultBase::iterator i = default_base.begin();
|
|
|
|
i != default_base.end();
|
|
|
|
i++) {
|
|
|
|
default_vec.push_back(num[i->first]);
|
|
|
|
base_vec.push_back(i->second);
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<state_t> next_vec;
|
|
|
|
vector<state_t> check_vec;
|
|
|
|
for (NextCheck::iterator i = next_check.begin();
|
|
|
|
i != next_check.end();
|
|
|
|
i++) {
|
|
|
|
next_vec.push_back(num[i->first]);
|
|
|
|
check_vec.push_back(num[i->second]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Write the actual flex parser table. */
|
|
|
|
|
|
|
|
size_t hsize = pad64(sizeof(th) + sizeof(th_version) + strlen(name) + 1);
|
|
|
|
th.th_magic = htonl(YYTH_REGEXP_MAGIC);
|
|
|
|
th.th_hsize = htonl(hsize);
|
|
|
|
th.th_ssize = htonl(hsize +
|
|
|
|
flex_table_size(accept.begin(), accept.end()) +
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
flex_table_size(accept2.begin(), accept2.end()) +
|
2007-02-27 02:29:16 +00:00
|
|
|
(eq.size() ?
|
|
|
|
flex_table_size(equiv_vec.begin(), equiv_vec.end()) : 0) +
|
|
|
|
flex_table_size(base_vec.begin(), base_vec.end()) +
|
|
|
|
flex_table_size(default_vec.begin(), default_vec.end()) +
|
|
|
|
flex_table_size(next_vec.begin(), next_vec.end()) +
|
|
|
|
flex_table_size(check_vec.begin(), check_vec.end()));
|
|
|
|
os.write((char *)&th, sizeof(th));
|
|
|
|
os << th_version << (char)0 << name << (char)0;
|
|
|
|
os << fill64(sizeof(th) + sizeof(th_version) + strlen(name) + 1);
|
|
|
|
|
|
|
|
|
|
|
|
write_flex_table(os, YYTD_ID_ACCEPT, accept.begin(), accept.end());
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
write_flex_table(os, YYTD_ID_ACCEPT2, accept2.begin(), accept2.end());
|
2007-02-27 02:29:16 +00:00
|
|
|
if (eq.size())
|
|
|
|
write_flex_table(os, YYTD_ID_EC, equiv_vec.begin(), equiv_vec.end());
|
|
|
|
write_flex_table(os, YYTD_ID_BASE, base_vec.begin(), base_vec.end());
|
|
|
|
write_flex_table(os, YYTD_ID_DEF, default_vec.begin(), default_vec.end());
|
|
|
|
write_flex_table(os, YYTD_ID_NXT, next_vec.begin(), next_vec.end());
|
|
|
|
write_flex_table(os, YYTD_ID_CHK, check_vec.begin(), check_vec.end());
|
|
|
|
}
|
|
|
|
|
2007-03-30 15:20:57 +00:00
|
|
|
#if 0
|
2007-02-27 02:29:16 +00:00
|
|
|
typedef set<ImportantNode *> AcceptNodes;
|
|
|
|
map<ImportantNode *, AcceptNodes> dominance(DFA& dfa)
|
|
|
|
{
|
|
|
|
map<ImportantNode *, AcceptNodes> is_dominated;
|
|
|
|
|
|
|
|
for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) {
|
|
|
|
AcceptNodes set1;
|
|
|
|
for (State::iterator j = (*i)->begin(); j != (*i)->end(); j++) {
|
|
|
|
if (AcceptNode *accept = dynamic_cast<AcceptNode *>(*j))
|
|
|
|
set1.insert(accept);
|
|
|
|
}
|
|
|
|
for (AcceptNodes::iterator j = set1.begin(); j != set1.end(); j++) {
|
|
|
|
pair<map<ImportantNode *, AcceptNodes>::iterator, bool> x =
|
|
|
|
is_dominated.insert(make_pair(*j, set1));
|
|
|
|
if (!x.second) {
|
|
|
|
AcceptNodes &set2(x.first->second), set3;
|
|
|
|
for (AcceptNodes::iterator l = set2.begin();
|
|
|
|
l != set2.end();
|
|
|
|
l++) {
|
|
|
|
if (set1.find(*l) != set1.end())
|
|
|
|
set3.insert(*l);
|
|
|
|
}
|
|
|
|
set3.swap(set2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return is_dominated;
|
|
|
|
}
|
2007-03-30 15:20:57 +00:00
|
|
|
#endif
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
void dump_regexp_rec(ostream& os, Node *tree)
|
|
|
|
{
|
2008-11-07 13:00:05 +00:00
|
|
|
if (tree->child[0])
|
|
|
|
dump_regexp_rec(os, tree->child[0]);
|
2007-02-27 02:29:16 +00:00
|
|
|
os << *tree;
|
2008-11-07 13:00:05 +00:00
|
|
|
if (tree->child[1])
|
|
|
|
dump_regexp_rec(os, tree->child[1]);
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void dump_regexp(ostream& os, Node *tree)
|
|
|
|
{
|
|
|
|
dump_regexp_rec(os, tree);
|
|
|
|
os << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
#include <sstream>
|
|
|
|
#include <ext/stdio_filebuf.h>
|
|
|
|
|
|
|
|
struct aare_ruleset {
|
|
|
|
int reverse;
|
|
|
|
Node *root;
|
|
|
|
};
|
|
|
|
|
2007-03-30 14:13:56 +00:00
|
|
|
extern "C" aare_ruleset_t *aare_new_ruleset(int reverse)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
aare_ruleset_t *container = (aare_ruleset_t *) malloc(sizeof(aare_ruleset_t));
|
|
|
|
if (!container)
|
|
|
|
return NULL;
|
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
container->root = NULL;
|
2007-02-27 02:29:16 +00:00
|
|
|
container->reverse = reverse;
|
|
|
|
|
|
|
|
return container;
|
|
|
|
}
|
|
|
|
|
2007-03-30 14:13:56 +00:00
|
|
|
extern "C" void aare_delete_ruleset(aare_ruleset_t *rules)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
if (rules) {
|
2008-12-03 03:47:31 +00:00
|
|
|
if (rules->root)
|
|
|
|
rules->root->release();
|
2007-02-27 02:29:16 +00:00
|
|
|
free(rules);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-11-16 09:27:34 +00:00
|
|
|
static inline int diff_qualifiers(uint32_t perm1, uint32_t perm2)
|
|
|
|
{
|
2008-04-16 04:44:21 +00:00
|
|
|
return ((perm1 & AA_EXEC_TYPE) && (perm2 & AA_EXEC_TYPE) &&
|
|
|
|
(perm1 & AA_EXEC_TYPE) != (perm2 & AA_EXEC_TYPE));
|
2007-11-16 09:27:34 +00:00
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the permission flags that this state corresponds to. If we
|
|
|
|
* have any exact matches, then they override the execute and safe
|
|
|
|
* execute flags.
|
|
|
|
*/
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
uint32_t accept_perms(NodeSet *state, uint32_t *audit_ctl, int *error)
|
2007-03-30 20:38:51 +00:00
|
|
|
{
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
uint32_t perms = 0, exact_match_perms = 0, audit = 0, exact_audit = 0,
|
|
|
|
quiet = 0, deny = 0;
|
2007-03-30 20:38:51 +00:00
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
if (error)
|
|
|
|
*error = 0;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (NodeSet::iterator i = state->begin(); i != state->end(); i++) {
|
2007-11-16 09:27:34 +00:00
|
|
|
MatchFlag *match;
|
|
|
|
if (!(match= dynamic_cast<MatchFlag *>(*i)))
|
|
|
|
continue;
|
|
|
|
if (dynamic_cast<ExactMatchFlag *>(match)) {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
/* exact match only ever happens with x */
|
2007-11-16 09:35:57 +00:00
|
|
|
if (!is_merged_x_consistent(exact_match_perms,
|
2010-01-20 03:32:34 -08:00
|
|
|
match->flag) && error)
|
2008-04-09 09:04:08 +00:00
|
|
|
*error = 1;;
|
2007-11-16 09:27:34 +00:00
|
|
|
exact_match_perms |= match->flag;
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
exact_audit |= match->audit;
|
|
|
|
} else if (dynamic_cast<DenyMatchFlag *>(match)) {
|
|
|
|
deny |= match->flag;
|
|
|
|
quiet |= match->audit;
|
2007-11-16 09:27:34 +00:00
|
|
|
} else {
|
2010-01-20 03:32:34 -08:00
|
|
|
if (!is_merged_x_consistent(perms, match->flag) && error)
|
2008-04-09 09:04:08 +00:00
|
|
|
*error = 1;
|
2007-11-16 09:27:34 +00:00
|
|
|
perms |= match->flag;
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
audit |= match->audit;
|
2007-11-16 09:27:34 +00:00
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
}
|
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
//if (audit || quiet)
|
|
|
|
//fprintf(stderr, "perms: 0x%x, audit: 0x%x exact: 0x%x eaud: 0x%x deny: 0x%x quiet: 0x%x\n", perms, audit, exact_match_perms, exact_audit, deny, quiet);
|
|
|
|
|
2007-11-16 09:35:57 +00:00
|
|
|
perms |= exact_match_perms &
|
2007-11-29 18:06:53 +00:00
|
|
|
~(AA_USER_EXEC_TYPE | AA_OTHER_EXEC_TYPE);
|
2007-11-16 09:35:57 +00:00
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (exact_match_perms & AA_USER_EXEC_TYPE) {
|
2007-11-16 09:35:57 +00:00
|
|
|
perms = (exact_match_perms & AA_USER_EXEC_TYPE) |
|
|
|
|
(perms & ~AA_USER_EXEC_TYPE);
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
audit = (exact_audit & AA_USER_EXEC_TYPE) |
|
|
|
|
(audit & ~ AA_USER_EXEC_TYPE);
|
|
|
|
}
|
|
|
|
if (exact_match_perms & AA_OTHER_EXEC_TYPE) {
|
2007-11-16 09:35:57 +00:00
|
|
|
perms = (exact_match_perms & AA_OTHER_EXEC_TYPE) |
|
|
|
|
(perms & ~AA_OTHER_EXEC_TYPE);
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
audit = (exact_audit & AA_OTHER_EXEC_TYPE) |
|
|
|
|
(audit & ~AA_OTHER_EXEC_TYPE);
|
|
|
|
}
|
|
|
|
if (perms & AA_USER_EXEC & deny)
|
|
|
|
perms &= ~AA_USER_EXEC_TYPE;
|
2007-11-16 09:35:57 +00:00
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (perms & AA_OTHER_EXEC & deny)
|
|
|
|
perms &= ~AA_OTHER_EXEC_TYPE;
|
|
|
|
|
|
|
|
perms &= ~deny;
|
|
|
|
|
|
|
|
if (audit_ctl)
|
|
|
|
*audit_ctl = PACK_AUDIT_CTL(audit, quiet & deny);
|
|
|
|
|
|
|
|
// if (perms & AA_ERROR_BIT) {
|
|
|
|
// fprintf(stderr, "error bit 0x%x\n", perms);
|
|
|
|
// exit(255);
|
|
|
|
//}
|
2008-03-13 16:46:53 +00:00
|
|
|
|
|
|
|
//if (perms & AA_EXEC_BITS)
|
|
|
|
//fprintf(stderr, "accept perm: 0x%x\n", perms);
|
2007-11-16 09:27:34 +00:00
|
|
|
/*
|
|
|
|
if (perms & ~AA_VALID_PERMS)
|
|
|
|
yyerror(_("Internal error accumulated invalid perm 0x%llx\n"), perms);
|
|
|
|
*/
|
2008-04-06 18:52:47 +00:00
|
|
|
|
|
|
|
//if (perms & AA_CHANGE_HAT)
|
|
|
|
// fprintf(stderr, "change_hat 0x%x\n", perms);
|
|
|
|
|
2007-03-30 20:38:51 +00:00
|
|
|
return perms;
|
|
|
|
}
|
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
extern "C" int aare_add_rule(aare_ruleset_t *rules, char *rule, int deny,
|
2010-07-23 13:29:35 +02:00
|
|
|
uint32_t perms, uint32_t audit, dfaflags_t flags)
|
2008-03-13 16:46:19 +00:00
|
|
|
{
|
2010-07-23 13:29:35 +02:00
|
|
|
return aare_add_rule_vec(rules, deny, perms, audit, 1, &rule, flags);
|
2008-03-13 16:46:19 +00:00
|
|
|
}
|
|
|
|
|
2009-07-24 07:33:09 +00:00
|
|
|
#define FLAGS_WIDTH 2
|
|
|
|
#define MATCH_FLAGS_SIZE (sizeof(uint32_t) * 8 - 1)
|
|
|
|
MatchFlag *match_flags[FLAGS_WIDTH][MATCH_FLAGS_SIZE];
|
|
|
|
DenyMatchFlag *deny_flags[FLAGS_WIDTH][MATCH_FLAGS_SIZE];
|
|
|
|
#define EXEC_MATCH_FLAGS_SIZE ((AA_EXEC_COUNT << 2) * 2)
|
|
|
|
MatchFlag *exec_match_flags[FLAGS_WIDTH][EXEC_MATCH_FLAGS_SIZE]; /* mods + unsafe + ix *u::o*/
|
|
|
|
ExactMatchFlag *exact_match_flags[FLAGS_WIDTH][EXEC_MATCH_FLAGS_SIZE];/* mods + unsafe +ix *u::o*/
|
|
|
|
|
|
|
|
extern "C" void aare_reset_matchflags(void)
|
|
|
|
{
|
|
|
|
uint32_t i, j;
|
|
|
|
#define RESET_FLAGS(group, size) { \
|
|
|
|
for (i = 0; i < FLAGS_WIDTH; i++) { \
|
|
|
|
for (j = 0; j < size; j++) { \
|
2010-11-09 11:28:22 -08:00
|
|
|
if ((group)[i][j]) delete (group)[i][j]; \
|
2009-07-24 07:33:09 +00:00
|
|
|
(group)[i][j] = NULL; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
RESET_FLAGS(match_flags,MATCH_FLAGS_SIZE);
|
|
|
|
RESET_FLAGS(deny_flags,MATCH_FLAGS_SIZE);
|
|
|
|
RESET_FLAGS(exec_match_flags,EXEC_MATCH_FLAGS_SIZE);
|
|
|
|
RESET_FLAGS(exact_match_flags,EXEC_MATCH_FLAGS_SIZE);
|
|
|
|
#undef RESET_FLAGS
|
|
|
|
}
|
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
extern "C" int aare_add_rule_vec(aare_ruleset_t *rules, int deny,
|
|
|
|
uint32_t perms, uint32_t audit,
|
2010-07-23 13:29:35 +02:00
|
|
|
int count, char **rulev,
|
|
|
|
dfaflags_t flags)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2008-03-13 16:46:19 +00:00
|
|
|
Node *tree = NULL, *accept;
|
2007-03-30 20:38:51 +00:00
|
|
|
int exact_match;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2007-03-30 20:38:51 +00:00
|
|
|
assert(perms != 0);
|
|
|
|
|
2008-03-13 16:46:19 +00:00
|
|
|
if (regexp_parse(&tree, rulev[0]))
|
2007-02-27 02:29:16 +00:00
|
|
|
return 0;
|
2008-03-13 16:46:19 +00:00
|
|
|
for (int i = 1; i < count; i++) {
|
|
|
|
Node *subtree = NULL;
|
|
|
|
Node *node = new CharNode(0);
|
|
|
|
if (!node)
|
|
|
|
return 0;
|
|
|
|
tree = new CatNode(tree, node);
|
|
|
|
if (regexp_parse(&subtree, rulev[i]))
|
|
|
|
return 0;
|
|
|
|
tree = new CatNode(tree, subtree);
|
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if we have an expression with or without wildcards. This
|
|
|
|
* determines how exec modifiers are merged in accept_perms() based
|
|
|
|
* on how we split permission bitmasks here.
|
|
|
|
*/
|
|
|
|
exact_match = 1;
|
|
|
|
for (depth_first_traversal i(tree); i; i++) {
|
|
|
|
if (dynamic_cast<StarNode *>(*i) ||
|
|
|
|
dynamic_cast<PlusNode *>(*i) ||
|
|
|
|
dynamic_cast<AnyCharNode *>(*i) ||
|
|
|
|
dynamic_cast<CharSetNode *>(*i) ||
|
|
|
|
dynamic_cast<NotCharSetNode *>(*i))
|
|
|
|
exact_match = 0;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (rules->reverse)
|
|
|
|
flip_tree(tree);
|
2007-03-30 20:38:51 +00:00
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
|
2008-04-16 04:44:21 +00:00
|
|
|
/* 0x3f == 4 bits x mods + 1 bit unsafe mask + 1 bit ix, after shift */
|
2008-03-13 16:46:53 +00:00
|
|
|
#define EXTRACT_X_INDEX(perm, shift) (((perm) >> (shift + 8)) & 0x3f)
|
2007-11-16 09:35:57 +00:00
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
//if (perms & ALL_AA_EXEC_TYPE && (!perms & AA_EXEC_BITS))
|
|
|
|
// fprintf(stderr, "adding X rule without MAY_EXEC: 0x%x %s\n", perms, rulev[0]);
|
2007-12-20 12:56:50 +00:00
|
|
|
|
2008-03-13 16:46:53 +00:00
|
|
|
//if (perms & ALL_EXEC_TYPE)
|
|
|
|
// fprintf(stderr, "adding X rule %s 0x%x\n", rulev[0], perms);
|
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
//if (audit)
|
|
|
|
//fprintf(stderr, "adding rule with audit bits set: 0x%x %s\n", audit, rulev[0]);
|
|
|
|
|
2008-04-06 18:52:47 +00:00
|
|
|
//if (perms & AA_CHANGE_HAT)
|
|
|
|
// fprintf(stderr, "adding change_hat rule %s\n", rulev[0]);
|
|
|
|
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
/* the permissions set is assumed to be non-empty if any audit
|
|
|
|
* bits are specified */
|
2007-03-30 20:38:51 +00:00
|
|
|
accept = NULL;
|
2008-04-09 09:04:08 +00:00
|
|
|
for (unsigned int n = 0; perms && n < (sizeof(perms) * 8) ; n++) {
|
2007-03-30 20:38:51 +00:00
|
|
|
uint32_t mask = 1 << n;
|
|
|
|
|
|
|
|
if (perms & mask) {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
int ai = audit & mask ? 1 : 0;
|
2007-03-30 20:38:51 +00:00
|
|
|
perms &= ~mask;
|
|
|
|
|
|
|
|
Node *flag;
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (mask & ALL_AA_EXEC_TYPE)
|
|
|
|
/* these cases are covered by EXEC_BITS */
|
|
|
|
continue;
|
|
|
|
if (deny) {
|
|
|
|
if (deny_flags[ai][n]) {
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = deny_flags[ai][n];
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
} else {
|
|
|
|
//fprintf(stderr, "Adding deny ai %d mask 0x%x audit 0x%x\n", ai, mask, audit & mask);
|
|
|
|
deny_flags[ai][n] = new DenyMatchFlag(mask, audit&mask);
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = deny_flags[ai][n];
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
}
|
|
|
|
} else if (mask & AA_EXEC_BITS) {
|
2007-11-16 09:35:57 +00:00
|
|
|
uint32_t eperm = 0;
|
|
|
|
uint32_t index = 0;
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (mask & AA_USER_EXEC) {
|
2007-12-20 12:56:50 +00:00
|
|
|
eperm = mask | (perms & AA_USER_EXEC_TYPE);
|
|
|
|
index = EXTRACT_X_INDEX(eperm, AA_USER_SHIFT);
|
2007-11-16 09:35:57 +00:00
|
|
|
} else {
|
2007-12-20 12:56:50 +00:00
|
|
|
eperm = mask | (perms & AA_OTHER_EXEC_TYPE);
|
2008-04-16 04:44:21 +00:00
|
|
|
index = EXTRACT_X_INDEX(eperm, AA_OTHER_SHIFT) + (AA_EXEC_COUNT << 2);
|
2007-11-16 09:35:57 +00:00
|
|
|
}
|
2008-03-13 16:46:53 +00:00
|
|
|
//fprintf(stderr, "index %d eperm 0x%x\n", index, eperm);
|
2007-11-16 09:27:34 +00:00
|
|
|
if (exact_match) {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (exact_match_flags[ai][index]) {
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = exact_match_flags[ai][index];
|
2007-12-20 12:56:50 +00:00
|
|
|
} else {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
exact_match_flags[ai][index] = new ExactMatchFlag(eperm, audit&mask);
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = exact_match_flags[ai][index];
|
2007-11-16 09:27:34 +00:00
|
|
|
}
|
|
|
|
} else {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (exec_match_flags[ai][index]) {
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = exec_match_flags[ai][index];
|
2007-12-20 12:56:50 +00:00
|
|
|
} else {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
exec_match_flags[ai][index] = new MatchFlag(eperm, audit&mask);
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = exec_match_flags[ai][index];
|
2007-11-16 09:27:34 +00:00
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
}
|
|
|
|
} else {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
if (match_flags[ai][n]) {
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = match_flags[ai][n];
|
2007-12-20 12:56:50 +00:00
|
|
|
} else {
|
Add Audit control to AppArmor through, the use of audit and deny
key words. Deny is also used to subtract permissions from the
profiles permission set.
the audit key word can be prepended to any file, network, or capability
rule, to force a selective audit when that rule is matched. Audit
permissions accumulate just like standard permissions.
eg.
audit /bin/foo rw,
will force an audit message when the file /bin/foo is opened for
read or write.
audit /etc/shadow w,
/etc/shadow r,
will force an audit message when /etc/shadow is opened for writing.
The audit message is per permission bit so only opening the file
for read access will not, force an audit message.
audit can also be used in block form instead of prepending audit
to every rule.
audit {
/bin/foo rw,
/etc/shadow w,
}
/etc/shadow r, # don't audit r access to /etc/shadow
the deny key word can be prepended to file, network and capability
rules, to result in a denial of permissions when matching that rule.
The deny rule specifically does 3 things
- it gives AppArmor the ability to remember what has been denied
so that the tools don't prompt for what has been denied in
previous profiling sessions.
- it subtracts globally from the allowed permissions. Deny permissions
accumulate in the the deny set just as allow permissions accumulate
then, the deny set is subtracted from the allow set.
- it quiets known rejects. The default audit behavior of deny rules
is to quiet known rejects so that audit logs are not flooded
with already known rejects. To have known rejects logged prepend
the audit keyword to the deny rule. Deny rules do not have a
block form.
eg.
deny /foo/bar rw,
audit deny /etc/shadow w,
audit {
deny owner /blah w,
deny other /foo w,
deny /etc/shadow w,
}
2008-03-13 17:39:03 +00:00
|
|
|
match_flags[ai][n] = new MatchFlag(mask, audit&mask);
|
2010-11-09 11:28:22 -08:00
|
|
|
flag = match_flags[ai][n];
|
2007-03-30 20:38:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (accept)
|
|
|
|
accept = new AltNode(accept, flag);
|
|
|
|
else
|
|
|
|
accept = flag;
|
|
|
|
}
|
|
|
|
}
|
2007-12-20 12:56:50 +00:00
|
|
|
|
2010-07-23 13:29:35 +02:00
|
|
|
if (flags & DFA_DUMP_RULE_EXPR) {
|
|
|
|
cerr << "rule: ";
|
|
|
|
cerr << rulev[0];
|
|
|
|
for (int i = 1; i < count; i++) {
|
|
|
|
cerr << "\\x00";
|
|
|
|
cerr << rulev[i];
|
|
|
|
}
|
|
|
|
cerr << " -> ";
|
|
|
|
tree->dump(cerr);
|
|
|
|
cerr << "\n\n";
|
|
|
|
}
|
|
|
|
|
2008-12-03 03:47:31 +00:00
|
|
|
if (rules->root)
|
|
|
|
rules->root = new AltNode(rules->root, new CatNode(tree, accept));
|
|
|
|
else
|
|
|
|
rules->root = new CatNode(tree, accept);
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
2007-11-16 09:35:31 +00:00
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/* create a dfa from the ruleset
|
|
|
|
* returns: buffer contain dfa tables, @size set to the size of the tables
|
|
|
|
* else NULL on failure
|
|
|
|
*/
|
2010-01-08 04:30:56 -08:00
|
|
|
extern "C" void *aare_create_dfa(aare_ruleset_t *rules, size_t *size, dfaflags_t flags)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
char *buffer = NULL;
|
|
|
|
|
|
|
|
label_nodes(rules->root);
|
2010-01-07 16:21:02 -08:00
|
|
|
if (flags & DFA_DUMP_TREE) {
|
|
|
|
cerr << "\nDFA: Expression Tree\n";
|
|
|
|
rules->root->dump(cerr);
|
|
|
|
cerr << "\n\n";
|
|
|
|
}
|
|
|
|
|
2010-11-09 11:23:45 -08:00
|
|
|
if (flags & DFA_CONTROL_TREE_SIMPLE) {
|
2010-01-08 04:30:56 -08:00
|
|
|
rules->root = simplify_tree(rules->root, flags);
|
2010-01-07 16:21:02 -08:00
|
|
|
|
2010-01-08 04:30:56 -08:00
|
|
|
if (flags & DFA_DUMP_SIMPLE_TREE) {
|
|
|
|
cerr << "\nDFA: Simplified Expression Tree\n";
|
|
|
|
rules->root->dump(cerr);
|
|
|
|
cerr << "\n\n";
|
|
|
|
}
|
2010-01-07 16:21:02 -08:00
|
|
|
}
|
|
|
|
|
2010-01-08 02:17:45 -08:00
|
|
|
DFA dfa(rules->root, flags);
|
|
|
|
|
2010-11-09 11:28:56 -08:00
|
|
|
if (flags & DFA_CONTROL_MINIMIZE)
|
|
|
|
dfa.minimize(flags);
|
|
|
|
|
|
|
|
//if (flags & DFA_CONTROL_REMOVE_UNREACHABLE)
|
|
|
|
// remove_unreachable(flags);
|
|
|
|
|
2010-01-08 02:17:45 -08:00
|
|
|
if (flags & DFA_DUMP_STATES)
|
|
|
|
dfa.dump(cerr);
|
|
|
|
|
|
|
|
if (flags & DFA_DUMP_GRAPH)
|
|
|
|
dfa.dump_dot_graph(cerr);
|
2007-12-20 12:56:50 +00:00
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
map<uchar, uchar> eq;
|
2010-01-08 04:30:56 -08:00
|
|
|
if (flags & DFA_CONTROL_EQUIV) {
|
2010-01-08 02:17:45 -08:00
|
|
|
eq = dfa.equivalence_classes(flags);
|
2007-02-27 02:29:16 +00:00
|
|
|
dfa.apply_equivalence_classes(eq);
|
2010-01-08 02:17:45 -08:00
|
|
|
|
2010-01-08 04:30:56 -08:00
|
|
|
if (flags & DFA_DUMP_EQUIV) {
|
2010-01-08 02:17:45 -08:00
|
|
|
cerr << "\nDFA equivalence class\n";
|
|
|
|
dump_equivalence_classes(cerr, eq);
|
2010-01-08 04:30:56 -08:00
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
} else if (flags & DFA_DUMP_EQUIV)
|
|
|
|
cerr << "\nDFA did not generate an equivalence class\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
// TODO: perm verification needs to be moved into dfa creation
|
|
|
|
// if (dfa.verify_perms()) {
|
|
|
|
// *size = 0;
|
|
|
|
// return NULL;
|
|
|
|
// }
|
2007-02-27 02:29:16 +00:00
|
|
|
|
|
|
|
stringstream stream;
|
2010-01-08 02:17:45 -08:00
|
|
|
TransitionTable transition_table(dfa, eq, flags);
|
|
|
|
if (flags & DFA_DUMP_TRANS_TABLE)
|
|
|
|
transition_table.dump(cerr);
|
2007-02-27 02:29:16 +00:00
|
|
|
transition_table.flex_table(stream, "");
|
|
|
|
|
|
|
|
stringbuf *buf = stream.rdbuf();
|
|
|
|
|
|
|
|
buf->pubseekpos(0);
|
|
|
|
*size = buf->in_avail();
|
|
|
|
|
|
|
|
buffer = (char *)malloc(*size);
|
|
|
|
if (!buffer)
|
|
|
|
return NULL;
|
|
|
|
buf->sgetn(buffer, *size);
|
|
|
|
return buffer;
|
|
|
|
}
|