2007-02-27 02:29:16 +00:00
|
|
|
/*
|
2007-04-11 08:12:51 +00:00
|
|
|
* (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
|
2011-03-13 05:46:29 -07:00
|
|
|
* Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
|
2012-02-24 04:21:59 -08:00
|
|
|
* Copyright 2009-2012 Canonical Ltd.
|
2007-02-27 02:29:16 +00:00
|
|
|
*
|
2011-03-13 05:46:29 -07:00
|
|
|
* The libapparmor library is licensed under the terms of the GNU
|
|
|
|
* Lesser General Public License, version 2.1. Please see the file
|
|
|
|
* COPYING.LGPL.
|
2010-08-04 10:23:22 -07:00
|
|
|
*
|
2011-03-13 05:46:29 -07:00
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Lesser General Public License for more details.
|
2010-08-04 10:23:22 -07:00
|
|
|
*
|
2011-03-13 05:46:29 -07:00
|
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2010-08-04 10:23:22 -07:00
|
|
|
*
|
|
|
|
*
|
2011-03-13 05:46:29 -07:00
|
|
|
* Base of implementation based on the Lexical Analysis chapter of:
|
|
|
|
* Alfred V. Aho, Ravi Sethi, Jeffrey D. Ullman:
|
|
|
|
* Compilers: Principles, Techniques, and Tools (The "Dragon Book"),
|
|
|
|
* Addison-Wesley, 1986.
|
2008-11-07 13:00:05 +00:00
|
|
|
*/
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2011-03-13 05:46:29 -07:00
|
|
|
#include <list>
|
|
|
|
#include <vector>
|
|
|
|
#include <stack>
|
|
|
|
#include <set>
|
|
|
|
#include <map>
|
|
|
|
#include <ostream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
2012-01-06 09:03:20 -08:00
|
|
|
#include <string.h>
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2011-03-13 05:46:29 -07:00
|
|
|
#include "expr-tree.h"
|
2011-03-13 05:50:34 -07:00
|
|
|
#include "hfa.h"
|
2007-02-27 02:29:16 +00:00
|
|
|
#include "../immunix.h"
|
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
|
|
|
|
ostream &operator<<(ostream &os, const CacheStats &cache)
|
|
|
|
{
|
|
|
|
/* dump the state label */
|
|
|
|
os << "cache: size=";
|
|
|
|
os << cache.size();
|
|
|
|
os << " dups=";
|
|
|
|
os << cache.dup;
|
|
|
|
os << " longest=";
|
|
|
|
os << cache.max;
|
|
|
|
if (cache.size()) {
|
|
|
|
os << " avg=";
|
|
|
|
os << cache.sum / cache.size();
|
|
|
|
}
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
2011-12-15 05:12:30 -08:00
|
|
|
ostream &operator<<(ostream &os, const ProtoState &proto)
|
|
|
|
{
|
|
|
|
/* dump the state label */
|
|
|
|
os << '{';
|
2011-12-15 05:14:37 -08:00
|
|
|
os << proto.nnodes;
|
|
|
|
os << ',';
|
|
|
|
os << proto.anodes;
|
2011-12-15 05:12:30 -08:00
|
|
|
os << '}';
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
ostream &operator<<(ostream &os, const State &state)
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
{
|
2010-11-11 16:06:52 -08:00
|
|
|
/* dump the state label */
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
os << '{';
|
|
|
|
os << state.label;
|
|
|
|
os << '}';
|
|
|
|
return os;
|
|
|
|
}
|
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
static void split_node_types(NodeSet *nodes, NodeSet **anodes, NodeSet **nnodes
|
|
|
|
)
|
2010-11-11 16:16:38 -08:00
|
|
|
{
|
2011-12-15 05:14:37 -08:00
|
|
|
*anodes = *nnodes = NULL;
|
|
|
|
for (NodeSet::iterator i = nodes->begin(); i != nodes->end(); ) {
|
|
|
|
if ((*i)->is_accept()) {
|
|
|
|
if (!*anodes)
|
|
|
|
*anodes = new NodeSet;
|
|
|
|
(*anodes)->insert(*i);
|
|
|
|
NodeSet::iterator k = i++;
|
|
|
|
nodes->erase(k);
|
|
|
|
} else
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
*nnodes = nodes;
|
2010-11-11 16:16:38 -08:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
State *DFA::add_new_state(NodeSet *nodes, State *other)
|
2010-11-11 16:20:32 -08:00
|
|
|
{
|
2011-12-15 05:14:37 -08:00
|
|
|
/* The splitting of nodes should probably get pushed down into
|
|
|
|
* follow(), ie. put in separate lists from the start
|
|
|
|
*/
|
|
|
|
NodeSet *anodes, *nnodes;
|
2011-12-15 05:16:03 -08:00
|
|
|
hashedNodeVec *nnodev;
|
2011-12-15 05:14:37 -08:00
|
|
|
split_node_types(nodes, &anodes, &nnodes);
|
2011-12-15 05:16:03 -08:00
|
|
|
nnodev = nnodes_cache.insert(nnodes);
|
2011-12-15 05:14:37 -08:00
|
|
|
anodes = anodes_cache.insert(anodes);
|
2010-11-11 16:20:32 -08:00
|
|
|
|
2011-12-15 05:16:03 -08:00
|
|
|
ProtoState proto(nnodev, anodes);
|
2011-12-15 05:14:37 -08:00
|
|
|
State *state = new State(node_map.size(), proto, other);
|
|
|
|
pair<NodeMap::iterator,bool> x = node_map.insert(proto, state);
|
|
|
|
if (x.second == false) {
|
|
|
|
delete state;
|
2010-11-11 16:20:32 -08:00
|
|
|
} else {
|
2011-12-15 05:14:37 -08:00
|
|
|
states.push_back(state);
|
|
|
|
work_queue.push_back(state);
|
2010-11-11 16:20:32 -08:00
|
|
|
}
|
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
return x.first->second;
|
2010-11-11 16:20:32 -08:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
void DFA::update_state_transitions(State *state)
|
2010-11-11 16:18:48 -08:00
|
|
|
{
|
|
|
|
/* Compute possible transitions for state->nodes. This is done by
|
|
|
|
* iterating over all the nodes in state->nodes and combining the
|
|
|
|
* transitions.
|
|
|
|
*
|
|
|
|
* The resultant transition set is a mapping of characters to
|
|
|
|
* sets of nodes.
|
2011-12-15 05:14:37 -08:00
|
|
|
*
|
|
|
|
* Note: the follow set for accept nodes is always empty so we don't
|
|
|
|
* need to compute follow for the accept nodes in a protostate
|
2010-11-11 16:18:48 -08:00
|
|
|
*/
|
2011-12-15 04:59:55 -08:00
|
|
|
Cases cases;
|
2011-12-15 05:16:03 -08:00
|
|
|
for (hashedNodeVec::iterator i = state->proto.nnodes->begin(); i != state->proto.nnodes->end(); i++)
|
2010-11-11 16:18:48 -08:00
|
|
|
(*i)->follow(cases);
|
|
|
|
|
|
|
|
/* Now for each set of nodes in the computed transitions, make
|
|
|
|
* sure that there is a state that maps to it, and add the
|
|
|
|
* matching case to the state.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* check the default transition first */
|
2010-11-11 16:20:32 -08:00
|
|
|
if (cases.otherwise)
|
2011-12-15 05:14:37 -08:00
|
|
|
state->otherwise = add_new_state(cases.otherwise, nonmatching);
|
2011-12-15 05:01:35 -08:00
|
|
|
else
|
|
|
|
state->otherwise = nonmatching;
|
2010-11-11 16:18:48 -08:00
|
|
|
|
|
|
|
/* For each transition from *from, check if the set of nodes it
|
|
|
|
* transitions to already has been mapped to a state
|
|
|
|
*/
|
2011-12-15 04:59:55 -08:00
|
|
|
for (Cases::iterator j = cases.begin(); j != cases.end(); j++) {
|
2010-11-11 16:18:48 -08:00
|
|
|
State *target;
|
2011-12-15 05:14:37 -08:00
|
|
|
target = add_new_state(j->second, nonmatching);
|
2010-11-11 16:20:32 -08:00
|
|
|
|
2011-12-15 04:58:33 -08:00
|
|
|
/* Don't insert transition that the otherwise transition
|
2010-11-11 16:18:48 -08:00
|
|
|
* already covers
|
|
|
|
*/
|
2011-12-15 04:58:33 -08:00
|
|
|
if (target != state->otherwise)
|
|
|
|
state->trans[j->first] = target;
|
2010-11-11 16:18:48 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-11-11 16:08:02 -08:00
|
|
|
/* WARNING: This routine can only be called from within DFA creation as
|
|
|
|
* the nodes value is only valid during dfa construction.
|
|
|
|
*/
|
|
|
|
void DFA::dump_node_to_dfa(void)
|
2010-11-09 11:55:40 -08:00
|
|
|
{
|
|
|
|
cerr << "Mapping of States to expr nodes\n"
|
|
|
|
" State <= Nodes\n"
|
|
|
|
"-------------------\n";
|
2010-11-11 16:08:02 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++)
|
2011-12-15 05:12:30 -08:00
|
|
|
cerr << " " << (*i)->label << " <= " << (*i)->proto << "\n";
|
2010-11-09 11:55:40 -08:00
|
|
|
}
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/**
|
|
|
|
* Construct a DFA from a syntax tree.
|
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
DFA::DFA(Node *root, dfaflags_t flags): root(root)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2010-11-11 16:12:50 -08:00
|
|
|
int i = 0;
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (flags & DFA_DUMP_PROGRESS)
|
|
|
|
fprintf(stderr, "Creating dfa:\r");
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (depth_first_traversal i(root); i; i++) {
|
|
|
|
(*i)->compute_nullable();
|
|
|
|
(*i)->compute_firstpos();
|
|
|
|
(*i)->compute_lastpos();
|
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if (flags & DFA_DUMP_PROGRESS)
|
|
|
|
fprintf(stderr, "Creating dfa: followpos\r");
|
|
|
|
for (depth_first_traversal i(root); i; i++) {
|
|
|
|
(*i)->compute_followpos();
|
|
|
|
}
|
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
nonmatching = add_new_state(new NodeSet, NULL);
|
|
|
|
start = add_new_state(new NodeSet(root->firstpos), nonmatching);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2010-11-11 16:19:47 -08:00
|
|
|
/* the work_queue contains the states that need to have their
|
|
|
|
* transitions computed. This could be done with a recursive
|
|
|
|
* algorithm instead of a work_queue, but it would be slightly slower
|
|
|
|
* and consume more memory.
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*
|
|
|
|
* TODO: currently the work_queue is treated in a breadth first
|
|
|
|
* search manner. Test using the work_queue in a depth first
|
|
|
|
* manner, this may help reduce the number of entries on the
|
|
|
|
* work_queue at any given time, thus reducing peak memory use.
|
|
|
|
*/
|
2010-11-11 16:19:47 -08:00
|
|
|
work_queue.push_back(start);
|
2007-02-27 02:29:16 +00:00
|
|
|
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
while (!work_queue.empty()) {
|
2011-12-15 05:14:37 -08:00
|
|
|
if (i % 1000 == 0 && (flags & DFA_DUMP_PROGRESS)) {
|
|
|
|
cerr << "\033[2KCreating dfa: queue "
|
|
|
|
<< work_queue.size()
|
|
|
|
<< "\tstates "
|
|
|
|
<< states.size()
|
|
|
|
<< "\teliminated duplicates "
|
|
|
|
<< node_map.dup
|
|
|
|
<< "\r";
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
i++;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-11-11 16:19:47 -08:00
|
|
|
State *from = work_queue.front();
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
work_queue.pop_front();
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2010-11-11 16:18:48 -08:00
|
|
|
/* Update 'from's transitions, and if it transitions to any
|
|
|
|
* unknown State create it and add it to the work_queue
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
*/
|
2011-12-15 05:14:37 -08:00
|
|
|
update_state_transitions(from);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
} /* while (!work_queue.empty()) */
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
|
|
|
/* cleanup Sets of nodes used computing the DFA as they are no longer
|
|
|
|
* needed.
|
|
|
|
*/
|
|
|
|
for (depth_first_traversal i(root); i; i++) {
|
|
|
|
(*i)->firstpos.clear();
|
|
|
|
(*i)->lastpos.clear();
|
|
|
|
(*i)->followpos.clear();
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2010-11-09 11:55:40 -08:00
|
|
|
|
|
|
|
if (flags & DFA_DUMP_NODE_TO_DFA)
|
2010-11-11 16:08:02 -08:00
|
|
|
dump_node_to_dfa();
|
2010-11-09 11:55:40 -08:00
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
if (flags & (DFA_DUMP_STATS)) {
|
|
|
|
cerr << "\033[2KCreated dfa: states "
|
|
|
|
<< states.size()
|
|
|
|
<< " proto { "
|
|
|
|
<< node_map
|
|
|
|
<< " }, nnodes { "
|
|
|
|
<< nnodes_cache
|
|
|
|
<< " }, anodes { "
|
|
|
|
<< anodes_cache
|
|
|
|
<< " }\n";
|
|
|
|
}
|
2010-07-10 17:47:25 -07:00
|
|
|
|
2011-12-15 05:14:37 -08:00
|
|
|
/* Clear out uniq_nnodes as they are no longer needed.
|
|
|
|
* Do not clear out uniq_anodes, as we need them for minimizations
|
|
|
|
* diffs, unions, ...
|
|
|
|
*/
|
|
|
|
nnodes_cache.clear();
|
|
|
|
node_map.clear();
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DFA::~DFA()
|
|
|
|
{
|
2011-12-15 05:14:37 -08:00
|
|
|
anodes_cache.clear();
|
|
|
|
nnodes_cache.clear();
|
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++)
|
|
|
|
delete *i;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
2012-01-06 09:03:20 -08:00
|
|
|
State *DFA::match_len(State *state, const char *str, size_t len)
|
|
|
|
{
|
|
|
|
for (; len > 0; ++str, --len)
|
|
|
|
state = state->next(*str);
|
|
|
|
|
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
2012-01-06 09:04:36 -08:00
|
|
|
State *DFA::match_until(State *state, const char *str, const char term)
|
|
|
|
{
|
|
|
|
while (*str != term)
|
|
|
|
state = state->next(*str++);
|
|
|
|
|
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
2012-01-06 09:03:20 -08:00
|
|
|
State *DFA::match(const char *str)
|
|
|
|
{
|
2012-01-06 09:04:36 -08:00
|
|
|
return match_until(start, str, 0);
|
2012-01-06 09:03:20 -08:00
|
|
|
}
|
|
|
|
|
2010-11-09 11:56:28 -08:00
|
|
|
void DFA::dump_uniq_perms(const char *s)
|
|
|
|
{
|
2012-02-16 07:40:21 -08:00
|
|
|
set<perms_t> uniq;
|
2010-11-09 11:56:28 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++)
|
2012-02-16 07:40:21 -08:00
|
|
|
uniq.insert((*i)->perms);
|
2010-11-09 11:56:28 -08:00
|
|
|
|
|
|
|
cerr << "Unique Permission sets: " << s << " (" << uniq.size() << ")\n";
|
|
|
|
cerr << "----------------------\n";
|
2012-02-16 07:40:21 -08:00
|
|
|
for (set<perms_t >::iterator i = uniq.begin(); i != uniq.end(); i++) {
|
|
|
|
cerr << " allow:" << hex << i->allow << " deny:"
|
|
|
|
<< i->deny << " audit:" << i->audit
|
|
|
|
<< " quiet:" << i->quiet << dec << "\n";
|
2010-11-09 11:56:28 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
/* Remove dead or unreachable states */
|
|
|
|
void DFA::remove_unreachable(dfaflags_t flags)
|
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
set<State *> reachable;
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
/* find the set of reachable states */
|
|
|
|
reachable.insert(nonmatching);
|
|
|
|
work_queue.push_back(start);
|
|
|
|
while (!work_queue.empty()) {
|
|
|
|
State *from = work_queue.front();
|
|
|
|
work_queue.pop_front();
|
|
|
|
reachable.insert(from);
|
|
|
|
|
2011-12-15 05:01:35 -08:00
|
|
|
if (from->otherwise != nonmatching &&
|
|
|
|
reachable.find(from->otherwise) == reachable.end())
|
2011-12-15 04:58:33 -08:00
|
|
|
work_queue.push_back(from->otherwise);
|
2010-01-20 03:32:34 -08:00
|
|
|
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator j = from->trans.begin(); j != from->trans.end(); j++) {
|
2010-01-20 03:32:34 -08:00
|
|
|
if (reachable.find(j->second) == reachable.end())
|
|
|
|
work_queue.push_back(j->second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* walk the set of states and remove any that aren't reachable */
|
|
|
|
if (reachable.size() < states.size()) {
|
|
|
|
int count = 0;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
Partition::iterator i;
|
|
|
|
Partition::iterator next;
|
2010-01-20 03:32:34 -08:00
|
|
|
for (i = states.begin(); i != states.end(); i = next) {
|
|
|
|
next = i;
|
|
|
|
next++;
|
|
|
|
if (reachable.find(*i) == reachable.end()) {
|
|
|
|
if (flags & DFA_DUMP_UNREACHABLE) {
|
2011-03-13 05:55:25 -07:00
|
|
|
cerr << "unreachable: " << **i;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (*i == start)
|
|
|
|
cerr << " <==";
|
2012-02-16 07:40:21 -08:00
|
|
|
if (!(*i)->perms.is_null()) {
|
2011-03-13 05:55:25 -07:00
|
|
|
cerr << " (0x" << hex
|
2012-02-16 07:40:21 -08:00
|
|
|
<< (*i)->perms.allow << " "
|
|
|
|
<< (*i)->perms.deny << " "
|
|
|
|
<< (*i)->perms.audit << " "
|
|
|
|
<< (*i)->perms.quiet << dec
|
2011-03-13 05:55:25 -07:00
|
|
|
<< ')';
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
cerr << "\n";
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
State *current = *i;
|
|
|
|
states.erase(i);
|
|
|
|
delete(current);
|
|
|
|
count++;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count && (flags & DFA_DUMP_STATS))
|
|
|
|
cerr << "DFA: states " << states.size() << " removed "
|
|
|
|
<< count << " unreachable states\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* test if two states have the same transitions under partition_map */
|
2010-11-09 11:57:43 -08:00
|
|
|
bool DFA::same_mappings(State *s1, State *s2)
|
2010-01-20 03:32:34 -08:00
|
|
|
{
|
2011-12-15 05:01:35 -08:00
|
|
|
if (s1->otherwise != nonmatching) {
|
|
|
|
if (s2->otherwise == nonmatching)
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
2011-12-15 04:58:33 -08:00
|
|
|
Partition *p1 = s1->otherwise->partition;
|
|
|
|
Partition *p2 = s2->otherwise->partition;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (p1 != p2)
|
|
|
|
return false;
|
2011-12-15 05:01:35 -08:00
|
|
|
} else if (s2->otherwise != nonmatching) {
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-12-15 04:58:33 -08:00
|
|
|
if (s1->trans.size() != s2->trans.size())
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator j1 = s1->trans.begin(); j1 != s1->trans.end(); j1++) {
|
|
|
|
StateTrans::iterator j2 = s2->trans.find(j1->first);
|
|
|
|
if (j2 == s2->trans.end())
|
2010-01-20 03:32:34 -08:00
|
|
|
return false;
|
2010-11-09 11:57:43 -08:00
|
|
|
Partition *p1 = j1->second->partition;
|
|
|
|
Partition *p2 = j2->second->partition;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (p1 != p2)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do simple djb2 hashing against a States transition cases
|
|
|
|
* this provides a rough initial guess at state equivalence as if a state
|
|
|
|
* has a different number of transitions or has transitions on different
|
2011-12-15 04:58:33 -08:00
|
|
|
* trans they will never be equivalent.
|
2010-01-20 03:32:34 -08:00
|
|
|
* Note: this only hashes based off of the alphabet (not destination)
|
|
|
|
* as different destinations could end up being equiv
|
|
|
|
*/
|
|
|
|
size_t DFA::hash_trans(State *s)
|
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
unsigned long hash = 5381;
|
2010-01-20 03:32:34 -08:00
|
|
|
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator j = s->trans.begin(); j != s->trans.end(); j++) {
|
2010-01-20 03:32:34 -08:00
|
|
|
hash = ((hash << 5) + hash) + j->first;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
State *k = j->second;
|
2011-12-15 04:58:33 -08:00
|
|
|
hash = ((hash << 5) + hash) + k->trans.size();
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
|
2011-12-15 05:01:35 -08:00
|
|
|
if (s->otherwise != nonmatching) {
|
2010-01-20 03:32:34 -08:00
|
|
|
hash = ((hash << 5) + hash) + 5381;
|
2011-12-15 04:58:33 -08:00
|
|
|
State *k = s->otherwise;
|
|
|
|
hash = ((hash << 5) + hash) + k->trans.size();
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-01-20 05:10:38 -08:00
|
|
|
|
2011-12-15 04:58:33 -08:00
|
|
|
hash = (hash << 8) | s->trans.size();
|
2011-03-13 05:55:25 -07:00
|
|
|
return hash;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
|
2012-02-16 07:41:40 -08:00
|
|
|
int DFA::apply_and_clear_deny(void)
|
|
|
|
{
|
|
|
|
int c = 0;
|
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++)
|
|
|
|
c += (*i)->apply_and_clear_deny();
|
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
/* minimize the number of dfa states */
|
|
|
|
void DFA::minimize(dfaflags_t flags)
|
|
|
|
{
|
2012-02-16 07:41:40 -08:00
|
|
|
map<size_t, Partition *> perm_map;
|
2011-03-13 05:55:25 -07:00
|
|
|
list<Partition *> partitions;
|
|
|
|
|
2010-11-09 11:22:54 -08:00
|
|
|
/* Set up the initial partitions
|
|
|
|
* minimium of - 1 non accepting, and 1 accepting
|
|
|
|
* if trans hashing is used the accepting and non-accepting partitions
|
|
|
|
* can be further split based on the number and type of transitions
|
|
|
|
* a state makes.
|
|
|
|
* If permission hashing is enabled the accepting partitions can
|
|
|
|
* be further divided by permissions. This can result in not
|
|
|
|
* obtaining a truely minimized dfa but comes close, and can speedup
|
|
|
|
* minimization.
|
2010-01-20 03:32:34 -08:00
|
|
|
*/
|
|
|
|
int accept_count = 0;
|
2010-11-09 11:26:50 -08:00
|
|
|
int final_accept = 0;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
2012-02-16 07:41:40 -08:00
|
|
|
size_t hash = 0;
|
|
|
|
if (!(*i)->perms.is_null())
|
|
|
|
/* combine all states carrying accept info together
|
|
|
|
into an single initial parition */
|
|
|
|
hash = 1;
|
2010-11-09 11:22:54 -08:00
|
|
|
if (flags & DFA_CONTROL_MINIMIZE_HASH_TRANS)
|
2012-02-16 07:41:40 -08:00
|
|
|
hash |= hash_trans(*i) << 1;
|
|
|
|
map<size_t, Partition *>::iterator p = perm_map.find(hash);
|
2010-01-20 03:32:34 -08:00
|
|
|
if (p == perm_map.end()) {
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *part = new Partition();
|
2010-01-31 23:19:54 -08:00
|
|
|
part->push_back(*i);
|
2012-02-16 07:41:40 -08:00
|
|
|
perm_map.insert(make_pair(hash, part));
|
2010-01-20 03:32:34 -08:00
|
|
|
partitions.push_back(part);
|
2010-11-09 11:57:43 -08:00
|
|
|
(*i)->partition = part;
|
2012-02-16 07:41:40 -08:00
|
|
|
if (hash & 1)
|
2010-01-20 03:32:34 -08:00
|
|
|
accept_count++;
|
|
|
|
} else {
|
2010-11-09 11:57:43 -08:00
|
|
|
(*i)->partition = p->second;
|
2010-01-31 23:19:54 -08:00
|
|
|
p->second->push_back(*i);
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
if ((flags & DFA_DUMP_PROGRESS) && (partitions.size() % 1000 == 0))
|
|
|
|
cerr << "\033[2KMinimize dfa: partitions "
|
|
|
|
<< partitions.size() << "\tinit " << partitions.size()
|
|
|
|
<< " (accept " << accept_count << ")\r";
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2012-02-16 07:41:40 -08:00
|
|
|
|
2010-11-09 11:26:18 -08:00
|
|
|
/* perm_map is no longer needed so free the memory it is using.
|
|
|
|
* Don't remove - doing it manually here helps reduce peak memory usage.
|
|
|
|
*/
|
|
|
|
perm_map.clear();
|
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
int init_count = partitions.size();
|
|
|
|
if (flags & DFA_DUMP_PROGRESS)
|
2011-03-13 05:55:25 -07:00
|
|
|
cerr << "\033[2KMinimize dfa: partitions " << partitions.size()
|
|
|
|
<< "\tinit " << init_count << " (accept "
|
|
|
|
<< accept_count << ")\r";
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
/* Now do repartitioning until each partition contains the set of
|
|
|
|
* states that are the same. This will happen when the partition
|
|
|
|
* splitting stables. With a worse case of 1 state per partition
|
|
|
|
* ie. already minimized.
|
|
|
|
*/
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *new_part;
|
2010-01-20 03:32:34 -08:00
|
|
|
int new_part_count;
|
|
|
|
do {
|
|
|
|
new_part_count = 0;
|
2011-03-13 05:55:25 -07:00
|
|
|
for (list<Partition *>::iterator p = partitions.begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
p != partitions.end(); p++) {
|
|
|
|
new_part = NULL;
|
|
|
|
State *rep = *((*p)->begin());
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition::iterator next;
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator s = ++(*p)->begin(); s != (*p)->end();) {
|
2010-11-09 11:57:43 -08:00
|
|
|
if (same_mappings(rep, *s)) {
|
2010-01-31 23:21:00 -08:00
|
|
|
++s;
|
2010-01-20 03:32:34 -08:00
|
|
|
continue;
|
2010-01-31 23:21:00 -08:00
|
|
|
}
|
2010-01-20 03:32:34 -08:00
|
|
|
if (!new_part) {
|
2010-01-31 23:18:14 -08:00
|
|
|
new_part = new Partition;
|
2011-03-13 05:55:25 -07:00
|
|
|
list<Partition *>::iterator tmp = p;
|
2010-01-31 23:21:00 -08:00
|
|
|
partitions.insert(++tmp, new_part);
|
|
|
|
new_part_count++;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-01-31 23:19:54 -08:00
|
|
|
new_part->push_back(*s);
|
2010-01-31 23:21:00 -08:00
|
|
|
s = (*p)->erase(s);
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-01-31 23:21:00 -08:00
|
|
|
/* remapping partition_map for new_part entries
|
|
|
|
* Do not do this above as it messes up same_mappings
|
|
|
|
*/
|
2010-01-20 03:32:34 -08:00
|
|
|
if (new_part) {
|
2010-01-31 23:18:14 -08:00
|
|
|
for (Partition::iterator m = new_part->begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
m != new_part->end(); m++) {
|
2010-11-09 11:57:43 -08:00
|
|
|
(*m)->partition = new_part;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
if ((flags & DFA_DUMP_PROGRESS) && (partitions.size() % 100 == 0))
|
|
|
|
cerr << "\033[2KMinimize dfa: partitions "
|
|
|
|
<< partitions.size() << "\tinit "
|
|
|
|
<< init_count << " (accept "
|
|
|
|
<< accept_count << ")\r";
|
2010-01-31 23:12:33 -08:00
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
} while (new_part_count);
|
2010-01-20 03:32:34 -08:00
|
|
|
|
2010-11-09 11:26:50 -08:00
|
|
|
if (partitions.size() == states.size()) {
|
|
|
|
if (flags & DFA_DUMP_STATS)
|
2011-03-13 05:55:25 -07:00
|
|
|
cerr << "\033[2KDfa minimization no states removed: partitions "
|
|
|
|
<< partitions.size() << "\tinit " << init_count
|
|
|
|
<< " (accept " << accept_count << ")\n";
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remap the dfa so it uses the representative states
|
|
|
|
* Use the first state of a partition as the representative state
|
|
|
|
* At this point all states with in a partion have transitions
|
2010-11-09 11:27:36 -08:00
|
|
|
* to states within the same partitions, however this can slow
|
|
|
|
* down compressed dfa compression as there are more states,
|
2010-01-20 03:32:34 -08:00
|
|
|
*/
|
2011-05-20 09:26:44 -07:00
|
|
|
if (flags & DFA_DUMP_MIN_PARTS)
|
|
|
|
cerr << "Partitions after minimization\n";
|
2011-03-13 05:55:25 -07:00
|
|
|
for (list<Partition *>::iterator p = partitions.begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
p != partitions.end(); p++) {
|
|
|
|
/* representative state for this partition */
|
|
|
|
State *rep = *((*p)->begin());
|
2011-05-20 09:26:44 -07:00
|
|
|
if (flags & DFA_DUMP_MIN_PARTS)
|
|
|
|
cerr << *rep << " : ";
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
/* update representative state's transitions */
|
2011-12-15 05:01:35 -08:00
|
|
|
if (rep->otherwise != nonmatching) {
|
2011-12-15 04:58:33 -08:00
|
|
|
Partition *partition = rep->otherwise->partition;
|
|
|
|
rep->otherwise = *partition->begin();
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator c = rep->trans.begin(); c != rep->trans.end(); c++) {
|
2010-11-09 11:57:43 -08:00
|
|
|
Partition *partition = c->second->partition;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
c->second = *partition->begin();
|
|
|
|
}
|
|
|
|
|
|
|
|
//if ((*p)->size() > 1)
|
|
|
|
//cerr << rep->label << ": ";
|
2010-11-09 11:24:51 -08:00
|
|
|
/* clear the state label for all non representative states,
|
|
|
|
* and accumulate permissions */
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
for (Partition::iterator i = ++(*p)->begin(); i != (*p)->end(); i++) {
|
|
|
|
//cerr << " " << (*i)->label;
|
2011-05-20 09:26:44 -07:00
|
|
|
if (flags & DFA_DUMP_MIN_PARTS)
|
|
|
|
cerr << **i << ", ";
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
(*i)->label = -1;
|
2012-02-16 07:41:40 -08:00
|
|
|
rep->perms.add((*i)->perms);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
}
|
2012-02-16 07:41:40 -08:00
|
|
|
if (!rep->perms.is_null())
|
2010-11-09 11:26:50 -08:00
|
|
|
final_accept++;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
//if ((*p)->size() > 1)
|
|
|
|
//cerr << "\n";
|
2011-05-20 09:26:44 -07:00
|
|
|
if (flags & DFA_DUMP_MIN_PARTS)
|
|
|
|
cerr << "\n";
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
2010-11-09 11:26:50 -08:00
|
|
|
if (flags & DFA_DUMP_STATS)
|
2011-03-13 05:55:25 -07:00
|
|
|
cerr << "\033[2KMinimized dfa: final partitions "
|
|
|
|
<< partitions.size() << " (accept " << final_accept
|
|
|
|
<< ")" << "\tinit " << init_count << " (accept "
|
|
|
|
<< accept_count << ")\n";
|
2010-01-20 03:32:34 -08:00
|
|
|
|
|
|
|
/* make sure nonmatching and start state are up to date with the
|
|
|
|
* mappings */
|
|
|
|
{
|
2010-11-09 11:57:43 -08:00
|
|
|
Partition *partition = nonmatching->partition;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (*partition->begin() != nonmatching) {
|
|
|
|
nonmatching = *partition->begin();
|
|
|
|
}
|
|
|
|
|
2010-11-09 11:57:43 -08:00
|
|
|
partition = start->partition;
|
2010-01-20 03:32:34 -08:00
|
|
|
if (*partition->begin() != start) {
|
|
|
|
start = *partition->begin();
|
|
|
|
}
|
|
|
|
}
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
|
2010-01-20 03:32:34 -08:00
|
|
|
/* Now that the states have been remapped, remove all states
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
* that are not the representive states for their partition, they
|
|
|
|
* will have a label == -1
|
2010-01-20 03:32:34 -08:00
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end();) {
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
if ((*i)->label == -1) {
|
2010-03-13 02:23:23 -08:00
|
|
|
State *s = *i;
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
i = states.erase(i);
|
2010-03-13 02:23:23 -08:00
|
|
|
delete(s);
|
This patch reworks the internal structures used to compute the dfa. It is on
the large side, and I experimented with different ways to split this up but in
the end, anything I could do would result in a series of dependent patches
that would require all of them to be applied to get meaningful functional
changes.
The patch structural reworks the dfa so that
- there is a new State class, it takes the place of sets of nodes in the
dfa, and allows storing state information within the state
- removes the dfa transition table, which mapped sets of nodes to a
transition table, by moving the transition into the new state class
- computes dfa state permissions once (stored in the state)
- expression tree nodes are independent from a created dfa. This allows
computed expression trees, and sets of Nodes (used as protostates when
computing the dfa). To be managed independent of the dfa life time.
This will allow reducing the amount of memory used, in the future,
and will also allow separating the expression tree logic out into
its own file.
The patch has some effect on reducing peak memory usage, and computation
time. The actual amount of reduction is dependent on the number of states
in the dfa with larger saving being achieved on larger dfas. Eg. for
the test evince profile I was using it makes the parser about 7% faster with a
peak memory usage about 12% less.
This patch changes the initial partition hashing of minimization resulting
in slightly smaller dfas.
2010-11-09 11:14:55 -08:00
|
|
|
} else
|
|
|
|
i++;
|
2010-01-20 03:32:34 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
/* Cleanup */
|
|
|
|
while (!partitions.empty()) {
|
2010-01-31 23:18:14 -08:00
|
|
|
Partition *p = partitions.front();
|
2010-01-20 03:32:34 -08:00
|
|
|
partitions.pop_front();
|
|
|
|
delete(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-02-27 02:29:16 +00:00
|
|
|
/**
|
|
|
|
* text-dump the DFA (for debugging).
|
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
void DFA::dump(ostream & os)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
2012-02-16 07:40:21 -08:00
|
|
|
if (*i == start || !(*i)->perms.is_null()) {
|
2011-03-13 05:55:25 -07:00
|
|
|
os << **i;
|
|
|
|
if (*i == start)
|
2012-03-09 04:17:47 -08:00
|
|
|
os << " <== (allow/deny/audit/quiet)";
|
|
|
|
if (!(*i)->perms.is_null()) {
|
|
|
|
os << " (0x " << hex << (*i)->perms.allow << "/"
|
|
|
|
<< (*i)->perms.deny << "/"
|
|
|
|
<< (*i)->perms.audit << "/"
|
|
|
|
<< (*i)->perms.quiet << ')';
|
2011-03-13 05:55:25 -07:00
|
|
|
}
|
|
|
|
os << "\n";
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
os << "\n";
|
|
|
|
|
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
2011-12-15 05:01:35 -08:00
|
|
|
if ((*i)->otherwise != nonmatching)
|
2012-03-09 04:14:34 -08:00
|
|
|
os << **i << " -> " << *(*i)->otherwise << "\n";
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator j = (*i)->trans.begin();
|
|
|
|
j != (*i)->trans.end(); j++) {
|
2012-03-09 04:14:34 -08:00
|
|
|
os << **i << " -> " << *(j)->second << ": "
|
2011-03-13 05:55:25 -07:00
|
|
|
<< j->first << "\n";
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
os << "\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Create a dot (graphviz) graph from the DFA (for debugging).
|
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
void DFA::dump_dot_graph(ostream & os)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
os << "digraph \"dfa\" {" << "\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
if (*i == nonmatching)
|
|
|
|
continue;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
os << "\t\"" << **i << "\" [" << "\n";
|
|
|
|
if (*i == start) {
|
|
|
|
os << "\t\tstyle=bold" << "\n";
|
|
|
|
}
|
2012-03-09 04:17:47 -08:00
|
|
|
if (!(*i)->perms.is_null()) {
|
|
|
|
os << "\t\tlabel=\"" << **i << "\\n(0x " << hex
|
|
|
|
<< (*i)->perms.allow << "/"
|
|
|
|
<< (*i)->perms.deny << "/"
|
|
|
|
<< (*i)->perms.audit << "/"
|
|
|
|
<< (*i)->perms.quiet << ")\"\n";
|
2011-03-13 05:55:25 -07:00
|
|
|
}
|
|
|
|
os << "\t]" << "\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
Chars excluded;
|
|
|
|
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator j = (*i)->trans.begin(); j != (*i)->trans.end(); j++) {
|
2011-03-13 05:55:25 -07:00
|
|
|
if (j->second == nonmatching)
|
|
|
|
excluded.insert(j->first);
|
|
|
|
else {
|
2011-05-20 09:24:40 -07:00
|
|
|
os << "\t\"" << **i << "\" -> \"" << *j->second
|
2011-03-13 05:55:25 -07:00
|
|
|
<< "\" [" << "\n";
|
2011-05-20 09:24:40 -07:00
|
|
|
os << "\t\tlabel=\"" << j->first << "\"\n";
|
2011-03-13 05:55:25 -07:00
|
|
|
os << "\t]" << "\n";
|
|
|
|
}
|
|
|
|
}
|
2011-12-15 05:01:35 -08:00
|
|
|
if ((*i)->otherwise != nonmatching) {
|
2011-12-15 04:58:33 -08:00
|
|
|
os << "\t\"" << **i << "\" -> \"" << *(*i)->otherwise
|
2011-03-13 05:55:25 -07:00
|
|
|
<< "\" [" << "\n";
|
|
|
|
if (!excluded.empty()) {
|
|
|
|
os << "\t\tlabel=\"[^";
|
|
|
|
for (Chars::iterator i = excluded.begin();
|
|
|
|
i != excluded.end(); i++) {
|
|
|
|
os << *i;
|
|
|
|
}
|
|
|
|
os << "]\"" << "\n";
|
|
|
|
}
|
|
|
|
os << "\t]" << "\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
os << '}' << "\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute character equivalence classes in the DFA to save space in the
|
|
|
|
* transition table.
|
|
|
|
*/
|
2010-01-08 02:17:45 -08:00
|
|
|
map<uchar, uchar> DFA::equivalence_classes(dfaflags_t flags)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
map<uchar, uchar> classes;
|
|
|
|
uchar next_class = 1;
|
|
|
|
|
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
/* Group edges to the same next state together */
|
|
|
|
map<const State *, Chars> node_sets;
|
2011-12-15 04:58:33 -08:00
|
|
|
for (StateTrans::iterator j = (*i)->trans.begin(); j != (*i)->trans.end(); j++)
|
2011-03-13 05:55:25 -07:00
|
|
|
node_sets[j->second].insert(j->first);
|
|
|
|
|
|
|
|
for (map<const State *, Chars>::iterator j = node_sets.begin();
|
|
|
|
j != node_sets.end(); j++) {
|
|
|
|
/* Group edges to the same next state together by class */
|
|
|
|
map<uchar, Chars> node_classes;
|
|
|
|
bool class_used = false;
|
|
|
|
for (Chars::iterator k = j->second.begin();
|
|
|
|
k != j->second.end(); k++) {
|
|
|
|
pair<map<uchar, uchar>::iterator, bool> x = classes.insert(make_pair(*k, next_class));
|
|
|
|
if (x.second)
|
|
|
|
class_used = true;
|
|
|
|
pair<map<uchar, Chars>::iterator, bool> y = node_classes.insert(make_pair(x.first->second, Chars()));
|
|
|
|
y.first->second.insert(*k);
|
|
|
|
}
|
|
|
|
if (class_used) {
|
|
|
|
next_class++;
|
|
|
|
class_used = false;
|
|
|
|
}
|
|
|
|
for (map<uchar, Chars>::iterator k = node_classes.begin();
|
|
|
|
k != node_classes.end(); k++) {
|
|
|
|
/**
|
|
|
|
* If any other characters are in the same class, move
|
|
|
|
* the characters in this class into their own new
|
|
|
|
* class
|
|
|
|
*/
|
|
|
|
map<uchar, uchar>::iterator l;
|
|
|
|
for (l = classes.begin(); l != classes.end(); l++) {
|
|
|
|
if (l->second == k->first &&
|
|
|
|
k->second.find(l->first) == k->second.end()) {
|
|
|
|
class_used = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (class_used) {
|
|
|
|
for (Chars::iterator l = k->second.begin();
|
|
|
|
l != k->second.end(); l++) {
|
|
|
|
classes[*l] = next_class;
|
|
|
|
}
|
|
|
|
next_class++;
|
|
|
|
class_used = false;
|
|
|
|
}
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
2010-01-08 02:17:45 -08:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
if (flags & DFA_DUMP_EQUIV_STATS)
|
|
|
|
fprintf(stderr, "Equiv class reduces to %d classes\n",
|
|
|
|
next_class - 1);
|
|
|
|
return classes;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Text-dump the equivalence classes (for debugging).
|
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
void dump_equivalence_classes(ostream &os, map<uchar, uchar> &eq)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
map<uchar, Chars> rev;
|
|
|
|
|
|
|
|
for (map<uchar, uchar>::iterator i = eq.begin(); i != eq.end(); i++) {
|
|
|
|
Chars &chars = rev.insert(make_pair(i->second, Chars())).first->second;
|
|
|
|
chars.insert(i->first);
|
|
|
|
}
|
|
|
|
os << "(eq):" << "\n";
|
|
|
|
for (map<uchar, Chars>::iterator i = rev.begin(); i != rev.end(); i++) {
|
|
|
|
os << (int)i->first << ':';
|
|
|
|
Chars &chars = i->second;
|
|
|
|
for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
|
|
|
|
os << ' ' << *j;
|
|
|
|
}
|
|
|
|
os << "\n";
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Replace characters with classes (which are also represented as
|
|
|
|
* characters) in the DFA transition table.
|
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
void DFA::apply_equivalence_classes(map<uchar, uchar> &eq)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
|
|
|
/**
|
|
|
|
* Note: We only transform the transition table; the nodes continue to
|
|
|
|
* contain the original characters.
|
|
|
|
*/
|
2011-03-13 05:55:25 -07:00
|
|
|
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
|
|
|
|
map<uchar, State *> tmp;
|
2011-12-15 04:58:33 -08:00
|
|
|
tmp.swap((*i)->trans);
|
|
|
|
for (StateTrans::iterator j = tmp.begin(); j != tmp.end(); j++)
|
|
|
|
(*i)->trans.insert(make_pair(eq[j->first], j->second));
|
2011-03-13 05:55:25 -07:00
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
|
2007-03-30 15:20:57 +00:00
|
|
|
#if 0
|
2011-03-13 05:55:25 -07:00
|
|
|
typedef set <ImportantNode *>AcceptNodes;
|
|
|
|
map<ImportantNode *, AcceptNodes> dominance(DFA & dfa)
|
2007-02-27 02:29:16 +00:00
|
|
|
{
|
2011-03-13 05:55:25 -07:00
|
|
|
map<ImportantNode *, AcceptNodes> is_dominated;
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) {
|
|
|
|
AcceptNodes set1;
|
|
|
|
for (State::iterator j = (*i)->begin(); j != (*i)->end(); j++) {
|
|
|
|
if (AcceptNode * accept = dynamic_cast<AcceptNode *>(*j))
|
|
|
|
set1.insert(accept);
|
|
|
|
}
|
|
|
|
for (AcceptNodes::iterator j = set1.begin(); j != set1.end(); j++) {
|
|
|
|
pair<map<ImportantNode *, AcceptNodes>::iterator, bool> x = is_dominated.insert(make_pair(*j, set1));
|
|
|
|
if (!x.second) {
|
|
|
|
AcceptNodes & set2(x.first->second), set3;
|
|
|
|
for (AcceptNodes::iterator l = set2.begin();
|
|
|
|
l != set2.end(); l++) {
|
|
|
|
if (set1.find(*l) != set1.end())
|
|
|
|
set3.insert(*l);
|
|
|
|
}
|
|
|
|
set3.swap(set2);
|
|
|
|
}
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
|
|
|
}
|
2011-03-13 05:55:25 -07:00
|
|
|
return is_dominated;
|
2007-02-27 02:29:16 +00:00
|
|
|
}
|
2007-03-30 15:20:57 +00:00
|
|
|
#endif
|
2007-02-27 02:29:16 +00:00
|
|
|
|
2007-11-16 09:27:34 +00:00
|
|
|
static inline int diff_qualifiers(uint32_t perm1, uint32_t perm2)
|
|
|
|
{
|
2008-04-16 04:44:21 +00:00
|
|
|
return ((perm1 & AA_EXEC_TYPE) && (perm2 & AA_EXEC_TYPE) &&
|
|
|
|
(perm1 & AA_EXEC_TYPE) != (perm2 & AA_EXEC_TYPE));
|
2007-11-16 09:27:34 +00:00
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the permission flags that this state corresponds to. If we
|
|
|
|
* have any exact matches, then they override the execute and safe
|
|
|
|
* execute flags.
|
|
|
|
*/
|
2012-02-16 07:40:21 -08:00
|
|
|
int accept_perms(NodeSet *state, perms_t &perms)
|
2007-03-30 20:38:51 +00:00
|
|
|
{
|
2012-02-16 07:40:21 -08:00
|
|
|
int error = 0;
|
2012-02-16 07:41:40 -08:00
|
|
|
uint32_t exact_match_allow = 0;
|
|
|
|
uint32_t exact_audit = 0;
|
2011-03-13 05:55:25 -07:00
|
|
|
|
2012-02-16 07:40:21 -08:00
|
|
|
perms.clear();
|
|
|
|
|
|
|
|
if (!state)
|
|
|
|
return error;
|
2011-12-15 05:14:37 -08:00
|
|
|
|
2011-03-13 05:55:25 -07:00
|
|
|
for (NodeSet::iterator i = state->begin(); i != state->end(); i++) {
|
|
|
|
MatchFlag *match;
|
|
|
|
if (!(match = dynamic_cast<MatchFlag *>(*i)))
|
|
|
|
continue;
|
|
|
|
if (dynamic_cast<ExactMatchFlag *>(match)) {
|
|
|
|
/* exact match only ever happens with x */
|
2012-02-16 07:40:21 -08:00
|
|
|
if (!is_merged_x_consistent(exact_match_allow,
|
|
|
|
match->flag))
|
|
|
|
error = 1;;
|
|
|
|
exact_match_allow |= match->flag;
|
2011-03-13 05:55:25 -07:00
|
|
|
exact_audit |= match->audit;
|
|
|
|
} else if (dynamic_cast<DenyMatchFlag *>(match)) {
|
2012-02-16 07:41:40 -08:00
|
|
|
perms.deny |= match->flag;
|
|
|
|
perms.quiet |= match->audit;
|
2011-03-13 05:55:25 -07:00
|
|
|
} else {
|
2012-02-16 07:41:40 -08:00
|
|
|
if (!is_merged_x_consistent(perms.allow, match->flag))
|
2012-02-16 07:40:21 -08:00
|
|
|
error = 1;
|
2012-02-16 07:41:40 -08:00
|
|
|
perms.allow |= match->flag;
|
|
|
|
perms.audit |= match->audit;
|
2011-03-13 05:55:25 -07:00
|
|
|
}
|
|
|
|
}
|
2007-03-30 20:38:51 +00:00
|
|
|
|
2012-02-16 07:41:40 -08:00
|
|
|
perms.allow |= exact_match_allow & ~(ALL_AA_EXEC_TYPE);
|
2007-11-16 09:35:57 +00:00
|
|
|
|
2012-02-16 07:40:21 -08:00
|
|
|
if (exact_match_allow & AA_USER_EXEC_TYPE) {
|
2012-02-16 07:41:40 -08:00
|
|
|
perms.allow = (exact_match_allow & AA_USER_EXEC_TYPE) |
|
|
|
|
(perms.allow & ~AA_USER_EXEC_TYPE);
|
|
|
|
perms.audit = (exact_audit & AA_USER_EXEC_TYPE) |
|
|
|
|
(perms.audit & ~AA_USER_EXEC_TYPE);
|
|
|
|
perms.exact = AA_USER_EXEC_TYPE;
|
2011-03-13 05:55:25 -07:00
|
|
|
}
|
2012-02-16 07:40:21 -08:00
|
|
|
if (exact_match_allow & AA_OTHER_EXEC_TYPE) {
|
2012-02-16 07:41:40 -08:00
|
|
|
perms.allow = (exact_match_allow & AA_OTHER_EXEC_TYPE) |
|
|
|
|
(perms.allow & ~AA_OTHER_EXEC_TYPE);
|
|
|
|
perms.audit = (exact_audit & AA_OTHER_EXEC_TYPE) |
|
|
|
|
(perms.audit & ~AA_OTHER_EXEC_TYPE);
|
|
|
|
perms.exact |= AA_OTHER_EXEC_TYPE;
|
|
|
|
}
|
|
|
|
if (AA_USER_EXEC & perms.deny)
|
|
|
|
perms.deny |= AA_USER_EXEC_TYPE;
|
|
|
|
|
|
|
|
if (AA_OTHER_EXEC & perms.deny)
|
|
|
|
perms.deny |= AA_OTHER_EXEC_TYPE;
|
2008-04-06 18:52:47 +00:00
|
|
|
|
2012-02-16 07:41:40 -08:00
|
|
|
perms.allow &= ~perms.deny;
|
|
|
|
perms.quiet &= perms.deny;
|
2008-04-06 18:52:47 +00:00
|
|
|
|
2012-02-16 07:40:21 -08:00
|
|
|
if (error)
|
2011-03-13 05:55:25 -07:00
|
|
|
fprintf(stderr, "profile has merged rule with conflicting x modifiers\n");
|
2011-02-22 03:47:03 -08:00
|
|
|
|
2012-02-16 07:40:21 -08:00
|
|
|
return error;
|
2007-03-30 20:38:51 +00:00
|
|
|
}
|