Merge parser: improve dfa generation

speedup and reduce memory usage of dfa generation

A variety of changes to improve dfa generation
- By switching to Nodevec instead of Node sets we can reduce memory usage slightly and reduce code
- By using charsets for chars we reduce code and increase chances of node merging/reduction which reduces memory usage slightly
- By merging charsets we reduce the number of nodes

Signed-off-by: John Johansen <john.johansen@canonical.com>

MR: https://gitlab.com/apparmor/apparmor/-/merge_requests/1066
Approved-by: John Johansen <john@jjmx.net>
Merged-by: John Johansen <john@jjmx.net>
This commit is contained in:
John Johansen 2023-07-11 02:26:10 +00:00
commit 74b101faa8
4 changed files with 22 additions and 88 deletions

View file

@ -100,7 +100,6 @@ bool aare_rules::add_rule_vec(int deny, uint32_t perms, uint32_t audit,
if ((*i)->is_type(NODE_TYPE_STAR) ||
(*i)->is_type(NODE_TYPE_PLUS) ||
(*i)->is_type(NODE_TYPE_ANYCHAR) ||
(*i)->is_type(NODE_TYPE_CHARSET) ||
(*i)->is_type(NODE_TYPE_NOTCHARSET))
exact_match = 0;
}

View file

@ -964,35 +964,7 @@ unsigned long hash_NodeSet(NodeSet *ns);
void flip_tree(Node *node);
/*
* hashedNodes - for efficient set comparison
*/
class hashedNodeSet {
public:
unsigned long hash;
NodeSet *nodes;
hashedNodeSet(NodeSet *n): nodes(n)
{
hash = hash_NodeSet(n);
}
bool operator<(hashedNodeSet const &rhs)const
{
if (hash == rhs.hash) {
if (nodes->size() == rhs.nodes->size())
return *nodes < *(rhs.nodes);
else
return nodes->size() < rhs.nodes->size();
} else {
return hash < rhs.hash;
}
}
};
class hashedNodeVec {
class NodeVec {
public:
typedef ImportantNode ** iterator;
iterator begin() { return nodes; }
@ -1002,7 +974,7 @@ public:
unsigned long len;
ImportantNode **nodes;
hashedNodeVec(NodeSet *n)
NodeVec(NodeSet *n)
{
hash = hash_NodeSet(n);
len = n->size();
@ -1014,7 +986,7 @@ public:
}
}
hashedNodeVec(NodeSet *n, unsigned long h): hash(h)
NodeVec(NodeSet *n, unsigned long h): hash(h)
{
len = n->size();
nodes = new ImportantNode *[n->size()];
@ -1024,14 +996,14 @@ public:
}
}
~hashedNodeVec()
~NodeVec()
{
delete [] nodes;
}
unsigned long size()const { return len; }
bool operator<(hashedNodeVec const &rhs)const
bool operator<(NodeVec const &rhs)const
{
if (hash == rhs.hash) {
if (len == rhs.size()) {
@ -1057,45 +1029,8 @@ public:
virtual unsigned long size(void) const = 0;
};
class NodeCache: public CacheStats {
public:
set<hashedNodeSet> cache;
NodeCache(void): cache() { };
~NodeCache() { clear(); };
virtual unsigned long size(void) const { return cache.size(); }
void clear()
{
for (set<hashedNodeSet>::iterator i = cache.begin();
i != cache.end(); i++) {
delete i->nodes;
}
cache.clear();
CacheStats::clear();
}
NodeSet *insert(NodeSet *nodes)
{
if (!nodes)
return NULL;
pair<set<hashedNodeSet>::iterator,bool> uniq;
uniq = cache.insert(hashedNodeSet(nodes));
if (uniq.second == false) {
delete(nodes);
dup++;
} else {
sum += nodes->size();
if (nodes->size() > max)
max = nodes->size();
}
return uniq.first->nodes;
}
};
struct deref_less_than {
bool operator()(hashedNodeVec * const &lhs, hashedNodeVec * const &rhs)const
bool operator()(NodeVec * const &lhs, NodeVec * const &rhs)const
{
return *lhs < *rhs;
}
@ -1103,7 +1038,7 @@ struct deref_less_than {
class NodeVecCache: public CacheStats {
public:
set<hashedNodeVec *, deref_less_than> cache;
set<NodeVec *, deref_less_than> cache;
NodeVecCache(void): cache() { };
~NodeVecCache() { clear(); };
@ -1112,7 +1047,7 @@ public:
void clear()
{
for (set<hashedNodeVec *>::iterator i = cache.begin();
for (set<NodeVec *>::iterator i = cache.begin();
i != cache.end(); i++) {
delete *i;
}
@ -1120,12 +1055,12 @@ public:
CacheStats::clear();
}
hashedNodeVec *insert(NodeSet *nodes)
NodeVec *insert(NodeSet *nodes)
{
if (!nodes)
return NULL;
pair<set<hashedNodeVec *>::iterator,bool> uniq;
hashedNodeVec *nv = new hashedNodeVec(nodes);
pair<set<NodeVec *>::iterator,bool> uniq;
NodeVec *nv = new NodeVec(nodes);
uniq = cache.insert(nv);
if (uniq.second == false) {
delete nv;

View file

@ -303,12 +303,12 @@ static void split_node_types(NodeSet *nodes, NodeSet **anodes, NodeSet **nnodes
State *DFA::add_new_state(NodeSet *anodes, NodeSet *nnodes, State *other)
{
hashedNodeVec *nnodev;
NodeVec *nnodev, *anodev;
nnodev = nnodes_cache.insert(nnodes);
anodes = anodes_cache.insert(anodes);
anodev = anodes_cache.insert(anodes);
ProtoState proto;
proto.init(nnodev, anodes);
proto.init(nnodev, anodev);
State *state = new State(node_map.size(), proto, other, filedfa);
pair<NodeMap::iterator,bool> x = node_map.insert(proto, state);
if (x.second == false) {
@ -347,7 +347,7 @@ void DFA::update_state_transitions(State *state)
* need to compute follow for the accept nodes in a protostate
*/
Cases cases;
for (hashedNodeVec::iterator i = state->proto.nnodes->begin(); i != state->proto.nnodes->end(); i++)
for (NodeVec::iterator i = state->proto.nnodes->begin(); i != state->proto.nnodes->end(); i++)
(*i)->follow(cases);
/* Now for each set of nodes in the computed transitions, make
@ -1340,7 +1340,7 @@ static inline int diff_qualifiers(uint32_t perm1, uint32_t perm2)
* have any exact matches, then they override the execute and safe
* execute flags.
*/
int accept_perms(NodeSet *state, perms_t &perms, bool filedfa)
int accept_perms(NodeVec *state, perms_t &perms, bool filedfa)
{
int error = 0;
uint32_t exact_match_allow = 0;
@ -1351,7 +1351,7 @@ int accept_perms(NodeSet *state, perms_t &perms, bool filedfa)
if (!state)
return error;
for (NodeSet::iterator i = state->begin(); i != state->end(); i++) {
for (NodeVec::iterator i = state->begin(); i != state->end(); i++) {
if (!(*i)->is_type(NODE_TYPE_MATCHFLAG))
continue;

View file

@ -133,20 +133,20 @@ public:
uint32_t allow, deny, audit, quiet, exact;
};
int accept_perms(NodeSet *state, perms_t &perms, bool filedfa);
int accept_perms(NodeVec *state, perms_t &perms, bool filedfa);
/*
* ProtoState - NodeSet and ancillery information used to create a state
*/
class ProtoState {
public:
hashedNodeVec *nnodes;
NodeSet *anodes;
NodeVec *nnodes;
NodeVec *anodes;
/* init is used instead of a constructor because ProtoState is used
* in a union
*/
void init(hashedNodeVec *n, NodeSet *a = NULL)
void init(NodeVec *n, NodeVec *a = NULL)
{
nnodes = n;
anodes = a;
@ -312,7 +312,7 @@ class DFA {
unsigned int &max);
/* temporary values used during computations */
NodeCache anodes_cache;
NodeVecCache anodes_cache;
NodeVecCache nnodes_cache;
NodeMap node_map;
list<State *> work_queue;