Merge parser: improve dfa generation

speedup and reduce memory usage of dfa generation A variety of changes to improve dfa generation - By switching to Nodevec instead of Node sets we can reduce memory usage slightly and reduce code - By using charsets for chars we reduce code and increase chances of node merging/reduction which reduces memory usage slightly - By merging charsets we reduce the number of nodes Signed-off-by: John Johansen <john.johansen@canonical.com> MR: https://gitlab.com/apparmor/apparmor/-/merge_requests/1066 Approved-by: John Johansen <john@jjmx.net> Merged-by: John Johansen <john@jjmx.net>
2025-03-04 08:24:42 +01:00 · 2023-07-11 02:26:10 +00:00 · 2023-07-11 02:26:10 +00:00 · 74b101faa8
commit 74b101faa8
parent 1dfd26aea7 24f834b128
4 changed files with 22 additions and 88 deletions
--- a/parser/libapparmor_re/aare_rules.cc
+++ b/parser/libapparmor_re/aare_rules.cc
@ -100,7 +100,6 @@ bool aare_rules::add_rule_vec(int deny, uint32_t perms, uint32_t audit,
 		if ((*i)->is_type(NODE_TYPE_STAR) ||
 		    (*i)->is_type(NODE_TYPE_PLUS) ||
 		    (*i)->is_type(NODE_TYPE_ANYCHAR) ||
-		    (*i)->is_type(NODE_TYPE_CHARSET) ||
 		    (*i)->is_type(NODE_TYPE_NOTCHARSET))
 			exact_match = 0;
 	}
--- a/parser/libapparmor_re/expr-tree.h
+++ b/parser/libapparmor_re/expr-tree.h
@ -964,35 +964,7 @@ unsigned long hash_NodeSet(NodeSet *ns);
 void flip_tree(Node *node);


-
-/*
- * hashedNodes - for efficient set comparison
- */
-class hashedNodeSet {
-public:
-	unsigned long hash;
-	NodeSet *nodes;
-
-	hashedNodeSet(NodeSet *n): nodes(n)
-	{
-		hash = hash_NodeSet(n);
-	}
-
-	bool operator<(hashedNodeSet const &rhs)const
-	{
-		if (hash == rhs.hash) {
-			if (nodes->size() == rhs.nodes->size())
-				return *nodes < *(rhs.nodes);
-			else
-				return nodes->size() < rhs.nodes->size();
-		} else {
-			return hash < rhs.hash;
-		}
-	}
-};
-
-
-class hashedNodeVec {
+class NodeVec {
 public:
 	typedef ImportantNode ** iterator;
 	iterator begin() { return nodes; }
@ -1002,7 +974,7 @@ public:
 	unsigned long len;
 	ImportantNode **nodes;

-	hashedNodeVec(NodeSet *n)
+	NodeVec(NodeSet *n)
 	{
 		hash = hash_NodeSet(n);
 		len = n->size();
@ -1014,7 +986,7 @@ public:
 		}
 	}

-	hashedNodeVec(NodeSet *n, unsigned long h): hash(h)
+	NodeVec(NodeSet *n, unsigned long h): hash(h)
 	{
 		len = n->size();
 		nodes = new ImportantNode *[n->size()];
@ -1024,14 +996,14 @@ public:
 		}
 	}

-	~hashedNodeVec()
+	~NodeVec()
 	{
 		delete [] nodes;
 	}

 	unsigned long size()const { return len; }

-	bool operator<(hashedNodeVec const &rhs)const
+	bool operator<(NodeVec const &rhs)const
 	{
 		if (hash == rhs.hash) {
 			if (len == rhs.size()) {
@ -1057,45 +1029,8 @@ public:
 	virtual unsigned long size(void) const = 0;
 };

-class NodeCache: public CacheStats {
-public:
-	set<hashedNodeSet> cache;
-
-	NodeCache(void): cache() { };
-	~NodeCache() { clear(); };
-
-	virtual unsigned long size(void) const { return cache.size(); }
-
-	void clear()
-	{
-		for (set<hashedNodeSet>::iterator i = cache.begin();
-		     i != cache.end(); i++) {
-			delete i->nodes;
-		}
-		cache.clear();
-		CacheStats::clear();
-	}
-
-	NodeSet *insert(NodeSet *nodes)
-	{
-		if (!nodes)
-			return NULL;
-		pair<set<hashedNodeSet>::iterator,bool> uniq;
-		uniq = cache.insert(hashedNodeSet(nodes));
-		if (uniq.second == false) {
-			delete(nodes);
-			dup++;
-		} else {
-			sum += nodes->size();
-			if (nodes->size() > max)
-				max = nodes->size();
-		}
-		return uniq.first->nodes;
-	}
-};
-
 struct deref_less_than {
-       bool operator()(hashedNodeVec * const &lhs, hashedNodeVec * const &rhs)const
+       bool operator()(NodeVec * const &lhs, NodeVec * const &rhs)const
 		{
 			return *lhs < *rhs;
 		}
@ -1103,7 +1038,7 @@ struct deref_less_than {

 class NodeVecCache: public CacheStats {
 public:
-	set<hashedNodeVec *, deref_less_than> cache;
+	set<NodeVec *, deref_less_than> cache;

 	NodeVecCache(void): cache() { };
 	~NodeVecCache() { clear(); };
@ -1112,7 +1047,7 @@ public:

 	void clear()
 	{
-		for (set<hashedNodeVec *>::iterator i = cache.begin();
+		for (set<NodeVec *>::iterator i = cache.begin();
 		     i != cache.end(); i++) {
 			delete *i;
 		}
@ -1120,12 +1055,12 @@ public:
 		CacheStats::clear();
 	}

-	hashedNodeVec *insert(NodeSet *nodes)
+	NodeVec *insert(NodeSet *nodes)
 	{
 		if (!nodes)
 			return NULL;
-		pair<set<hashedNodeVec *>::iterator,bool> uniq;
-		hashedNodeVec *nv = new hashedNodeVec(nodes);
+		pair<set<NodeVec *>::iterator,bool> uniq;
+		NodeVec *nv = new NodeVec(nodes);
 		uniq = cache.insert(nv);
 		if (uniq.second == false) {
 			delete nv;
--- a/parser/libapparmor_re/hfa.cc
+++ b/parser/libapparmor_re/hfa.cc
@ -303,12 +303,12 @@ static void split_node_types(NodeSet *nodes, NodeSet **anodes, NodeSet **nnodes

 State *DFA::add_new_state(NodeSet *anodes, NodeSet *nnodes, State *other)
 {
-	hashedNodeVec *nnodev;
+	NodeVec *nnodev, *anodev;
 	nnodev = nnodes_cache.insert(nnodes);
-	anodes = anodes_cache.insert(anodes);
+	anodev = anodes_cache.insert(anodes);

 	ProtoState proto;
-	proto.init(nnodev, anodes);
+	proto.init(nnodev, anodev);
 	State *state = new State(node_map.size(), proto, other, filedfa);
 	pair<NodeMap::iterator,bool> x = node_map.insert(proto, state);
 	if (x.second == false) {
@ -347,7 +347,7 @@ void DFA::update_state_transitions(State *state)
 	 * need to compute follow for the accept nodes in a protostate
 	 */
 	Cases cases;
-	for (hashedNodeVec::iterator i = state->proto.nnodes->begin(); i != state->proto.nnodes->end(); i++)
+	for (NodeVec::iterator i = state->proto.nnodes->begin(); i != state->proto.nnodes->end(); i++)
 		(*i)->follow(cases);

 	/* Now for each set of nodes in the computed transitions, make
@ -1340,7 +1340,7 @@ static inline int diff_qualifiers(uint32_t perm1, uint32_t perm2)
 * have any exact matches, then they override the execute and safe
 * execute flags.
 */
-int accept_perms(NodeSet *state, perms_t &perms, bool filedfa)
+int accept_perms(NodeVec *state, perms_t &perms, bool filedfa)
 {
 	int error = 0;
 	uint32_t exact_match_allow = 0;
@ -1351,7 +1351,7 @@ int accept_perms(NodeSet *state, perms_t &perms, bool filedfa)
 	if (!state)
 		return error;

-	for (NodeSet::iterator i = state->begin(); i != state->end(); i++) {
+	for (NodeVec::iterator i = state->begin(); i != state->end(); i++) {
 		if (!(*i)->is_type(NODE_TYPE_MATCHFLAG))
 			continue;

--- a/parser/libapparmor_re/hfa.h
+++ b/parser/libapparmor_re/hfa.h
@ -133,20 +133,20 @@ public:
 	uint32_t allow, deny, audit, quiet, exact;
 };

-int accept_perms(NodeSet *state, perms_t &perms, bool filedfa);
+int accept_perms(NodeVec *state, perms_t &perms, bool filedfa);

 /*
 * ProtoState - NodeSet and ancillery information used to create a state
 */
 class ProtoState {
 public:
-	hashedNodeVec *nnodes;
-	NodeSet *anodes;
+	NodeVec *nnodes;
+	NodeVec *anodes;

 	/* init is used instead of a constructor because ProtoState is used
 	 * in a union
 	 */
-	void init(hashedNodeVec *n, NodeSet *a = NULL)
+	void init(NodeVec *n, NodeVec *a = NULL)
 	{
 		nnodes = n;
 		anodes = a;
@ -312,7 +312,7 @@ class DFA {
 			     unsigned int &max);

 	/* temporary values used during computations */
-	NodeCache anodes_cache;
+	NodeVecCache anodes_cache;
 	NodeVecCache nnodes_cache;
 	NodeMap node_map;
 	list<State *> work_queue;