apparmor/parser/libapparmor_re/hfa.cc
John Johansen 53d00b4d2b rename hashedNodeVec to NodeVec
Shorten the name length by dropping the leading "hashed".

Signed-off-by: John Johansen <john.johansen@canonical.com>
2023-07-03 00:49:56 -07:00

1407 lines
39 KiB
C++

/*
* (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
* Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
* Copyright 2009-2012 Canonical Ltd.
*
* The libapparmor library is licensed under the terms of the GNU
* Lesser General Public License, version 2.1. Please see the file
* COPYING.LGPL.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
* Base of implementation based on the Lexical Analysis chapter of:
* Alfred V. Aho, Ravi Sethi, Jeffrey D. Ullman:
* Compilers: Principles, Techniques, and Tools (The "Dragon Book"),
* Addison-Wesley, 1986.
*/
#include <list>
#include <vector>
#include <stack>
#include <set>
#include <map>
#include <ostream>
#include <iostream>
#include <fstream>
#include <string.h>
#include "expr-tree.h"
#include "hfa.h"
#include "../immunix.h"
ostream &operator<<(ostream &os, const CacheStats &cache)
{
/* dump the state label */
os << "cache: size=";
os << cache.size();
os << " dups=";
os << cache.dup;
os << " longest=";
os << cache.max;
if (cache.size()) {
os << " avg=";
os << cache.sum / cache.size();
}
return os;
}
ostream &operator<<(ostream &os, const ProtoState &proto)
{
/* dump the state label */
os << '{';
os << proto.nnodes;
os << ',';
os << proto.anodes;
os << '}';
return os;
}
ostream &operator<<(ostream &os, const State &state)
{
/* dump the state label */
os << '{';
os << state.label;
os << '}';
return os;
}
ostream &operator<<(ostream &os, State &state)
{
/* dump the state label */
os << '{';
os << state.label;
os << '}';
return os;
}
/**
* diff_weight - Find differential compression distance between @rel and @this
* @rel: State to compare too
* Returns: An integer indicating how good rel is as a base, larger == better
*
* Find the relative weighted difference for differential state compression
* with queried state being compressed against @rel
*
* +1 for each transition that matches (char and dest - saves a transition)
* 0 for each transition that doesn't match and exists in both states
* 0 for transition that self has and @other doesn't (no extra required)
* -1 for each transition that is in @rel and not in @this (have to override)
*
* @rel should not be a state that has already been made differential or it may
* introduce extra transitions as it does not recurse to find all transitions
*
* Should be applied after state minimization
*/
int State::diff_weight(State *rel, int max_range, int upper_bound)
{
int weight = 0;
int first = 0;
if (this == rel)
return 0;
if (rel->diff->rel) {
/* Can only be diff encoded against states that are relative
* to a state of a lower depth. ie, at most one sibling in
* the chain
*/
if (rel->diff->rel->diff->depth >= this->diff->depth)
return 0;
} else if (rel->diff->depth >= this->diff->depth)
return 0;
if (rel->trans.begin()->first.c < first)
first = rel->trans.begin()->first.c;
if (rel->flags & DiffEncodeFlag) {
for (int i = first; i < upper_bound; i++) {
State *state = rel->next(i);
StateTrans::iterator j = trans.find(i);
if (j != trans.end()) {
if (state == j->second)
weight++;
/* else
0 - keep transition to mask
*/
} else if (state == otherwise) {
/* 0 - match of default against @rel
* We don't save a transition but don't have
* to mask either
*/
} else {
/* @rel has transition not covered by @this.
* Need to add a transition to mask it
*/
weight--;
}
}
return weight;
}
unsigned int count = 0;
for (StateTrans::iterator i = rel->trans.begin(); i != rel->trans.end();
i++) {
StateTrans::iterator j = trans.find(i->first);
if (j != trans.end()) {
if (i->second == j->second)
weight++;
/* } else {
0 - keep transition to mask
*/
count++;
} else if (i->second == otherwise) {
/* 0 - match of default against @rel
* We don't save a transition but don't have to
* mask either
*/
} else {
/* rel has transition not covered by @this. Need to
* add a transition to mask
*/
weight--;
}
}
/* cover transitions in @this but not in @rel */
unsigned int this_count = 0;
if (count < trans.size()) {
for (StateTrans::iterator i = trans.begin(); i != trans.end(); i++) {
StateTrans::iterator j = rel->trans.find(i->first);
if (j == rel->trans.end()) {
this_count++;
if (i->second == rel->otherwise)
/* replaced by rel->cases.otherwise */
weight++;
}
}
}
if (rel->otherwise != otherwise) {
/* rel default transitions have to be masked with transitions
* This covers all transitions not covered above
*/
weight -= (max_range) - (rel->trans.size() + this_count);
}
return weight;
}
/**
* make_relative - Make this state relative to @rel
* @rel: state to make this state relative too
* @upper_bound: the largest value for an input transition (256 for a byte).
*
* @rel can be a relative (differentially compressed state)
*/
int State::make_relative(State *rel, int upper_bound)
{
int weight = 0;
int first = 0;
if (this == rel || !rel)
return 0;
if (flags & DiffEncodeFlag)
return 0;
if (rel->trans.begin()->first.c < 0)
first = rel->trans.begin()->first.c;
flags |= DiffEncodeFlag;
for (int i = first; i < upper_bound ; i++) {
State *next = rel->next(i);
StateTrans::iterator j = trans.find(i);
if (j != trans.end()) {
if (j->second == next) {
trans.erase(j);
weight++;
}
/* else keep transition to mask */
} else if (otherwise == next) {
/* do nothing, otherwise transition disappears when
* reassigned
*/
} else {
/* need a new transition to mask those in lower state */
trans[i] = otherwise;
weight--;
}
}
otherwise = rel;
return weight;
}
/**
* flatten_differential - remove differential encode from this state
* @nonmatching: the nonmatching state for the state machine
* @upper_bound: the largest value for an input transition (256 for a byte).
*/
void State::flatten_relative(State *nonmatching, int upper_bound)
{
if (!(flags & DiffEncodeFlag))
return;
map<State *, int> count;
int first = 0;
if (next(-1) != nonmatching)
first = -1;
for (int i = first; i < upper_bound; i++)
count[next(i)] += 1;
int j = first;
State *def = next(first);
for (int i = first + 1; i < upper_bound; i++) {
if (count[next(i)] > count[next(j)]) {
j = i;
def = next(i);
}
}
for (int i = first; i < upper_bound; i++) {
if (trans.find(i) != trans.end()) {
if (trans[i] == def)
trans.erase(i);
} else {
if (trans[i] != def)
trans[i] = next(i);
}
}
otherwise = def;
flags = flags & ~DiffEncodeFlag;
}
static void split_node_types(NodeSet *nodes, NodeSet **anodes, NodeSet **nnodes
)
{
*anodes = *nnodes = NULL;
for (NodeSet::iterator i = nodes->begin(); i != nodes->end(); ) {
if ((*i)->is_accept()) {
if (!*anodes)
*anodes = new NodeSet;
(*anodes)->insert(*i);
NodeSet::iterator k = i++;
nodes->erase(k);
} else
i++;
}
*nnodes = nodes;
}
State *DFA::add_new_state(NodeSet *anodes, NodeSet *nnodes, State *other)
{
NodeVec *nnodev;
nnodev = nnodes_cache.insert(nnodes);
anodes = anodes_cache.insert(anodes);
ProtoState proto;
proto.init(nnodev, anodes);
State *state = new State(node_map.size(), proto, other, filedfa);
pair<NodeMap::iterator,bool> x = node_map.insert(proto, state);
if (x.second == false) {
delete state;
} else {
states.push_back(state);
work_queue.push_back(state);
}
return x.first->second;
}
State *DFA::add_new_state(NodeSet *nodes, State *other)
{
/* The splitting of nodes should probably get pushed down into
* follow(), ie. put in separate lists from the start
*/
NodeSet *anodes, *nnodes;
split_node_types(nodes, &anodes, &nnodes);
State *state = add_new_state(anodes, nnodes, other);
return state;
}
void DFA::update_state_transitions(State *state)
{
/* Compute possible transitions for state->nodes. This is done by
* iterating over all the nodes in state->nodes and combining the
* transitions.
*
* The resultant transition set is a mapping of characters to
* sets of nodes.
*
* Note: the follow set for accept nodes is always empty so we don't
* need to compute follow for the accept nodes in a protostate
*/
Cases cases;
for (NodeVec::iterator i = state->proto.nnodes->begin(); i != state->proto.nnodes->end(); i++)
(*i)->follow(cases);
/* Now for each set of nodes in the computed transitions, make
* sure that there is a state that maps to it, and add the
* matching case to the state.
*/
/* check the default transition first */
if (cases.otherwise)
state->otherwise = add_new_state(cases.otherwise, nonmatching);
else
state->otherwise = nonmatching;
/* For each transition from *from, check if the set of nodes it
* transitions to already has been mapped to a state
*/
for (Cases::iterator j = cases.begin(); j != cases.end(); j++) {
State *target;
target = add_new_state(j->second, nonmatching);
/* Don't insert transition that the otherwise transition
* already covers
*/
if (target != state->otherwise) {
state->trans[j->first] = target;
if (j->first.c < 0 && -j->first.c > oob_range)
oob_range = -j->first.c;
}
}
}
/* WARNING: This routine can only be called from within DFA creation as
* the nodes value is only valid during dfa construction.
*/
void DFA::dump_node_to_dfa(void)
{
cerr << "Mapping of States to expr nodes\n"
" State <= Nodes\n"
"-------------------\n";
for (Partition::iterator i = states.begin(); i != states.end(); i++)
cerr << " " << (*i)->label << " <= " << (*i)->proto << "\n";
}
void DFA::process_work_queue(const char *header, dfaflags_t flags)
{
int i = 0;
while (!work_queue.empty()) {
if (i % 1000 == 0 && (flags & DFA_DUMP_PROGRESS)) {
cerr << "\033[2K" << header << ": queue "
<< work_queue.size()
<< "\tstates "
<< states.size()
<< "\teliminated duplicates "
<< node_map.dup
<< "\r";
}
i++;
State *from = work_queue.front();
work_queue.pop_front();
/* Update 'from's transitions, and if it transitions to any
* unknown State create it and add it to the work_queue
*/
update_state_transitions(from);
} /* while (!work_queue.empty()) */
}
/**
* Construct a DFA from a syntax tree.
*/
DFA::DFA(Node *root, dfaflags_t flags, bool buildfiledfa): root(root), filedfa(buildfiledfa)
{
diffcount = 0; /* set by diff_encode */
max_range = 256;
upper_bound = 256;
oob_range = 0;
ord_range = 8;
if (flags & DFA_DUMP_PROGRESS)
fprintf(stderr, "Creating dfa:\r");
for (depth_first_traversal i(root); i; i++) {
(*i)->compute_nullable();
(*i)->compute_firstpos();
(*i)->compute_lastpos();
}
if (flags & DFA_DUMP_PROGRESS)
fprintf(stderr, "Creating dfa: followpos\r");
for (depth_first_traversal i(root); i; i++) {
(*i)->compute_followpos();
}
nonmatching = add_new_state(new NodeSet, NULL);
start = add_new_state(new NodeSet(root->firstpos), nonmatching);
/* the work_queue contains the states that need to have their
* transitions computed. This could be done with a recursive
* algorithm instead of a work_queue, but it would be slightly slower
* and consume more memory.
*
* TODO: currently the work_queue is treated in a breadth first
* search manner. Test using the work_queue in a depth first
* manner, this may help reduce the number of entries on the
* work_queue at any given time, thus reducing peak memory use.
*/
work_queue.push_back(start);
process_work_queue("Creating dfa", flags);
max_range += oob_range;
/* if oob_range is ever greater than 256 need to move to computing this */
if (oob_range)
ord_range = 9;
/* cleanup Sets of nodes used computing the DFA as they are no longer
* needed.
*/
for (depth_first_traversal i(root); i; i++) {
(*i)->firstpos.clear();
(*i)->lastpos.clear();
(*i)->followpos.clear();
}
if (flags & DFA_DUMP_NODE_TO_DFA)
dump_node_to_dfa();
if (flags & (DFA_DUMP_STATS)) {
cerr << "\033[2KCreated dfa: states "
<< states.size()
<< " proto { "
<< node_map
<< " }, nnodes { "
<< nnodes_cache
<< " }, anodes { "
<< anodes_cache
<< " }\n";
}
/* Clear out uniq_nnodes as they are no longer needed.
* Do not clear out uniq_anodes, as we need them for minimizations
* diffs, unions, ...
*/
nnodes_cache.clear();
node_map.clear();
}
DFA::~DFA()
{
anodes_cache.clear();
nnodes_cache.clear();
for (Partition::iterator i = states.begin(); i != states.end(); i++)
delete *i;
}
State *DFA::match_len(State *state, const char *str, size_t len)
{
for (; len > 0; ++str, --len)
state = state->next(*str);
return state;
}
State *DFA::match_until(State *state, const char *str, const char term)
{
while (*str != term)
state = state->next(*str++);
return state;
}
State *DFA::match(const char *str)
{
return match_until(start, str, 0);
}
void DFA::dump_uniq_perms(const char *s)
{
set<perms_t> uniq;
for (Partition::iterator i = states.begin(); i != states.end(); i++)
uniq.insert((*i)->perms);
cerr << "Unique Permission sets: " << s << " (" << uniq.size() << ")\n";
cerr << "----------------------\n";
for (set<perms_t >::iterator i = uniq.begin(); i != uniq.end(); i++) {
cerr << " allow:" << hex << i->allow << " deny:"
<< i->deny << " audit:" << i->audit
<< " quiet:" << i->quiet << dec << "\n";
}
}
/* Remove dead or unreachable states */
void DFA::remove_unreachable(dfaflags_t flags)
{
set<State *> reachable;
/* find the set of reachable states */
reachable.insert(nonmatching);
work_queue.push_back(start);
while (!work_queue.empty()) {
State *from = work_queue.front();
work_queue.pop_front();
reachable.insert(from);
if (from->otherwise != nonmatching &&
reachable.find(from->otherwise) == reachable.end())
work_queue.push_back(from->otherwise);
for (StateTrans::iterator j = from->trans.begin(); j != from->trans.end(); j++) {
if (reachable.find(j->second) == reachable.end())
work_queue.push_back(j->second);
}
}
/* walk the set of states and remove any that aren't reachable */
if (reachable.size() < states.size()) {
int count = 0;
Partition::iterator i;
Partition::iterator next;
for (i = states.begin(); i != states.end(); i = next) {
next = i;
next++;
if (reachable.find(*i) == reachable.end()) {
if (flags & DFA_DUMP_UNREACHABLE) {
cerr << "unreachable: " << **i;
if (*i == start)
cerr << " <==";
if ((*i)->perms.is_accept())
(*i)->perms.dump(cerr);
cerr << "\n";
}
State *current = *i;
states.erase(i);
delete(current);
count++;
}
}
if (count && (flags & DFA_DUMP_STATS))
cerr << "DFA: states " << states.size() << " removed "
<< count << " unreachable states\n";
}
}
/* test if two states have the same transitions under partition_map */
bool DFA::same_mappings(State *s1, State *s2)
{
/* assumes otherwise is set to best choice, if there are multiple
* otherwise choices this will fail to fully minimize the dfa
* if we are not careful. Make sure in cases with multiple
* equiv otherwise we always choose the same otherwise to avoid
*/
if (s1->otherwise->partition != s2->otherwise->partition)
return false;
StateTrans::iterator j1;
StateTrans::iterator j2;
for (j1 = s1->trans.begin(), j2 = s2->trans.begin();
j1 != s1->trans.end() && j2 != s2->trans.end();
/*inc inline*/) {
if (j1->first < j2->first) {
if (j1->second->partition != s2->otherwise->partition)
return false;
j1++;
} else if (j1->first == j2->first) {
if (j1->second->partition != j2->second->partition)
return false;
j1++;
j2++;
} else {
if (s1->otherwise->partition != j2->second->partition)
return false;
j2++;
}
}
for ( ; j1 != s1->trans.end(); j1++) {
if (j1->second->partition != s2->otherwise->partition)
return false;
}
for ( ; j2 != s2->trans.end(); j2++) {
if (j2->second->partition != s1->otherwise->partition)
return false;
}
return true;
}
int DFA::apply_and_clear_deny(void)
{
int c = 0;
for (Partition::iterator i = states.begin(); i != states.end(); i++)
c += (*i)->apply_and_clear_deny();
return c;
}
/* minimize the number of dfa states */
void DFA::minimize(dfaflags_t flags)
{
map<pair<uint64_t, size_t>, Partition *> perm_map;
list<Partition *> partitions;
/* Set up the initial partitions
* minimum of - 1 non accepting, and 1 accepting
* if trans hashing is used the accepting and non-accepting partitions
* can be further split based on the number and type of transitions
* a state makes.
* If permission hashing is enabled the accepting partitions can
* be further divided by permissions. This can result in not
* obtaining a truly minimized dfa but comes close, and can speedup
* minimization.
*/
int accept_count = 0;
int final_accept = 0;
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
size_t hash = 0;
uint64_t permtype = ((uint64_t) (PACK_AUDIT_CTL((*i)->perms.audit, (*i)->perms.quiet & (*i)->perms.deny)) << 32) | (uint64_t) (*i)->perms.allow;
pair<uint64_t, size_t> group = make_pair(permtype, hash);
map<pair<uint64_t, size_t>, Partition *>::iterator p = perm_map.find(group);
if (p == perm_map.end()) {
Partition *part = new Partition();
part->push_back(*i);
perm_map.insert(make_pair(group, part));
partitions.push_back(part);
(*i)->partition = part;
if (permtype)
accept_count++;
} else {
(*i)->partition = p->second;
p->second->push_back(*i);
}
if ((flags & DFA_DUMP_PROGRESS) && (partitions.size() % 1000 == 0))
cerr << "\033[2KMinimize dfa: partitions "
<< partitions.size() << "\tinit " << partitions.size()
<< " (accept " << accept_count << ")\r";
}
/* perm_map is no longer needed so free the memory it is using.
* Don't remove - doing it manually here helps reduce peak memory usage.
*/
perm_map.clear();
int init_count = partitions.size();
if (flags & DFA_DUMP_PROGRESS)
cerr << "\033[2KMinimize dfa: partitions " << partitions.size()
<< "\tinit " << init_count << " (accept "
<< accept_count << ")\r";
/* Now do repartitioning until each partition contains the set of
* states that are the same. This will happen when the partition
* splitting stables. With a worse case of 1 state per partition
* ie. already minimized.
*/
Partition *new_part;
int new_part_count;
do {
new_part_count = 0;
for (list<Partition *>::iterator p = partitions.begin();
p != partitions.end(); p++) {
new_part = NULL;
State *rep = *((*p)->begin());
Partition::iterator next;
for (Partition::iterator s = ++(*p)->begin(); s != (*p)->end();) {
if (same_mappings(rep, *s)) {
++s;
continue;
}
if (!new_part) {
new_part = new Partition;
list<Partition *>::iterator tmp = p;
partitions.insert(++tmp, new_part);
new_part_count++;
}
new_part->push_back(*s);
s = (*p)->erase(s);
}
/* remapping partition_map for new_part entries
* Do not do this above as it messes up same_mappings
*/
if (new_part) {
for (Partition::iterator m = new_part->begin();
m != new_part->end(); m++) {
(*m)->partition = new_part;
}
}
if ((flags & DFA_DUMP_PROGRESS) && (partitions.size() % 100 == 0))
cerr << "\033[2KMinimize dfa: partitions "
<< partitions.size() << "\tinit "
<< init_count << " (accept "
<< accept_count << ")\r";
}
} while (new_part_count);
if (partitions.size() == states.size()) {
if (flags & DFA_DUMP_STATS)
cerr << "\033[2KDfa minimization no states removed: partitions "
<< partitions.size() << "\tinit " << init_count
<< " (accept " << accept_count << ")\n";
goto out;
}
/* Remap the dfa so it uses the representative states
* Use the first state of a partition as the representative state
* At this point all states with in a partition have transitions
* to states within the same partitions, however this can slow
* down compressed dfa compression as there are more states,
*/
if (flags & DFA_DUMP_MIN_PARTS)
cerr << "Partitions after minimization\n";
for (list<Partition *>::iterator p = partitions.begin();
p != partitions.end(); p++) {
/* representative state for this partition */
State *rep = *((*p)->begin());
if (flags & DFA_DUMP_MIN_PARTS)
cerr << *rep << " : ";
/* update representative state's transitions */
rep->otherwise = *rep->otherwise->partition->begin();
for (StateTrans::iterator c = rep->trans.begin(); c != rep->trans.end(); ) {
Partition *partition = c->second->partition;
if (rep->otherwise != *partition->begin()) {
c->second = *partition->begin();
c++;
} else
/* transition is now covered by otherwise */
c = rep->trans.erase(c);
}
/* clear the state label for all non representative states,
* and accumulate permissions */
for (Partition::iterator i = ++(*p)->begin(); i != (*p)->end(); i++) {
if (flags & DFA_DUMP_MIN_PARTS)
cerr << **i << ", ";
(*i)->label = -1;
rep->perms.add((*i)->perms, filedfa);
}
if (rep->perms.is_accept())
final_accept++;
if (flags & DFA_DUMP_MIN_PARTS)
cerr << "\n";
}
if (flags & DFA_DUMP_STATS)
cerr << "\033[2KMinimized dfa: final partitions "
<< partitions.size() << " (accept " << final_accept
<< ")" << "\tinit " << init_count << " (accept "
<< accept_count << ")\n";
/* make sure nonmatching and start state are up to date with the
* mappings */
{
Partition *partition = nonmatching->partition;
if (*partition->begin() != nonmatching) {
nonmatching = *partition->begin();
}
partition = start->partition;
if (*partition->begin() != start) {
start = *partition->begin();
}
}
/* Now that the states have been remapped, remove all states
* that are not the representative states for their partition, they
* will have a label == -1
*/
for (Partition::iterator i = states.begin(); i != states.end();) {
if ((*i)->label == -1) {
State *s = *i;
i = states.erase(i);
delete(s);
} else
i++;
}
out:
/* Cleanup */
while (!partitions.empty()) {
Partition *p = partitions.front();
partitions.pop_front();
delete(p);
}
}
/* diff_encode helper functions */
static unsigned int add_to_dag(DiffDag *dag, State *state,
State *parent)
{
unsigned int rc = 0;
if (!state->diff) {
dag->rel = NULL;
if (parent)
dag->depth = parent->diff->depth + 1;
else
dag->depth = 1;
dag->state = state;
state->diff = dag;
rc = 1;
}
if (parent && parent->diff->depth < state->diff->depth)
state->diff->parents.push_back(parent);
return rc;
}
static int diff_partition(State *state, Partition &part, int max_range, int upper_bound, State **candidate)
{
int weight = 0;
*candidate = NULL;
for (Partition::iterator i = part.begin(); i != part.end(); i++) {
if (*i == state)
continue;
int tmp = state->diff_weight(*i, max_range, upper_bound);
if (tmp > weight) {
weight = tmp;
*candidate = *i;
}
}
return weight;
}
/**
* diff_encode - compress dfa by differentially encoding state transitions
* @dfa_flags: flags controlling dfa creation
*
* This function reduces the number of transitions that need to be stored
* by encoding transitions as the difference between the state and a
* another transitions that is set as the states default.
*
* For performance reasons this function does not try to compute the
* absolute best encoding (maximal spanning tree) but instead computes
* a very good encoding within the following limitations.
* - Not all states have to be differentially encoded. This allows for
* multiple states to be used as a terminating basis.
* - The number of state transitions needed to match an input of length
* m will be 2m
*
* To guarantee this the ordering and distance calculation is done in the
* following manner.
* - A DAG of the DFA is created starting with the start state(s).
* - A state can only be relative (have a differential encoding) to
* another state if that state has
* - a lower depth in the DAG
* - is a sibling (same depth) that is not relative
* - is a sibling that is relative to a state with lower depth in the DAG
*
* The run time constraints are maintained by the DAG ordering + relative
* state constraints. For any input character C when at state S with S being
* at level N in the DAG then at most 2N states must be traversed to find the
* transition for C. However on the maximal number of transitions is not m*m,
* because when a character is matched and forward movement is made through
* the DFA any relative transition search will move back through the DAG order.
* So say for character C we start matching on a state S that is at depth 10
* in the DAG. The transition for C is not found in S and we recurse backwards
* to a depth of 6. A transition is found and it steps to the next state, but
* the state transition at most will only move 1 deeper into the DAG so for
* the next state the maximum number of states traversed is 2*7.
*/
void DFA::diff_encode(dfaflags_t flags)
{
DiffDag *dag;
unsigned int xcount = 0, xweight = 0, transitions = 0, depth = 0;
/* clear the depth flag */
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
(*i)->diff = NULL;
transitions += (*i)->trans.size();
}
/* Prealloc structures we need. We know the exact number of elements,
* and once setup they don't change so we don't need the flexibility
* or overhead of stl, just allocate the needed data as an array
*/
dag = new DiffDag [states.size()];
/* Generate DAG ordering and parent sets */
add_to_dag(&dag[0], nonmatching, NULL);
add_to_dag(&dag[1], start, NULL);
unsigned int tail = 2;
for (unsigned int i = 1; i < tail; i++) {
State *state = dag[i].state;
State *child = dag[i].state->otherwise;
if (child)
tail += add_to_dag(&dag[tail], child, state);
for (StateTrans::iterator j = state->trans.begin(); j != state->trans.end(); j++) {
child = j->second;
tail += add_to_dag(&dag[tail], child, state);
}
}
depth = dag[tail - 1].depth;
/* calculate which state to make a transitions relative too */
for (unsigned int i = 2; i < tail; i++) {
State *state = dag[i].state;
State *candidate = NULL;
int weight = diff_partition(state,
state->otherwise->diff->parents, max_range,
upper_bound, &candidate);
for (StateTrans::iterator j = state->trans.begin(); j != state->trans.end(); j++) {
State *tmp_candidate;
int tmp = diff_partition(state,
j->second->diff->parents, max_range,
upper_bound, &tmp_candidate);
if (tmp > weight) {
weight = tmp;
candidate = tmp_candidate;
}
}
if ((flags & DFA_DUMP_DIFF_PROGRESS) && (i % 100 == 0))
cerr << "\033[2KDiff Encode: " << i << " of "
<< tail << ". Diff states " << xcount
<< " Savings " << xweight << "\r";
state->diff->rel = candidate;
if (candidate) {
xcount++;
xweight += weight;
}
}
/* now make transitions relative, start at the back of the list so
* as to start with the last transitions and work backwards to avoid
* having to traverse multiple previous states (that have been made
* relative already) to reconstruct previous state transition table
*/
unsigned int aweight = 0;
diffcount = 0;
for (int i = tail - 1; i > 1; i--) {
if (dag[i].rel) {
int weight = dag[i].state->make_relative(dag[i].rel, upper_bound);
aweight += weight;
diffcount++;
}
}
if (flags & DFA_DUMP_DIFF_STATS)
cerr << "Diff encode states: " << diffcount << " of "
<< tail << " reached @ depth " << depth << ". "
<< aweight << " trans removed\n";
if (xweight != aweight)
cerr << "Diff encode error: actual savings " << aweight
<< " != expected " << xweight << "\n";
if (xcount != diffcount)
cerr << "Diff encode error: actual count " << diffcount
<< " != expected " << xcount << " \n";
/* cleanup */
for (unsigned int i = 0; i < tail; i++)
dag[i].parents.clear();
delete [] dag;
}
/**
* flatten_differential - remove differential state encoding
*
* Flatten the dfa back into a flat encoding.
*/
void DFA::undiff_encode(void)
{
for (Partition::iterator i = states.begin(); i != states.end(); i++)
(*i)->flatten_relative(nonmatching, upper_bound);
diffcount = 0;
}
void DFA::dump_diff_chain(ostream &os, map<State *, Partition> &relmap,
Partition &chain, State *state, unsigned int &count,
unsigned int &total, unsigned int &max)
{
if (relmap[state].size() == 0) {
for (Partition::iterator i = chain.begin(); i != chain.end(); i++)
os << **i << " <- ";
os << *state << "\n";
count++;
total += chain.size() + 1;
if (chain.size() + 1 > max)
max = chain.size() + 1;
}
chain.push_back(state);
for (Partition::iterator i = relmap[state].begin(); i != relmap[state].end(); i++)
dump_diff_chain(os, relmap, chain, *i, count, total, max);
chain.pop_back();
}
/* Dump the DFA diff_encoding chains */
void DFA::dump_diff_encode(ostream &os)
{
map<State *, Partition> rel;
Partition base, chain;
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
if ((*i)->flags & DiffEncodeFlag)
rel[(*i)->otherwise].push_back(*i);
else
base.push_back(*i);
}
unsigned int count = 0, total = 0, max = 0;
for (Partition::iterator i = base.begin(); i != base.end(); i++)
dump_diff_chain(os, rel, chain, *i, count, total, max);
os << base.size() << " non-differentially encoded states\n";
os << "chains: " << count - base.size() << "\n";
os << "average chain size: " << (double) (total - base.size()) / (double) (count - base.size()) << "\n";
os << "longest chain: " << max << "\n";
}
/**
* text-dump the DFA (for debugging).
*/
void DFA::dump(ostream & os)
{
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
if (*i == start || (*i)->perms.is_accept()) {
os << **i;
if (*i == start)
os << " <== (allow/deny/audit/quiet)";
if ((*i)->perms.is_accept())
(*i)->perms.dump(os);
os << "\n";
}
}
os << "\n";
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
Chars excluded;
bool first = true;
for (StateTrans::iterator j = (*i)->trans.begin();
j != (*i)->trans.end(); j++) {
if (j->second == nonmatching) {
excluded.insert(j->first);
} else {
if (first) {
first = false;
os << **i << " perms: ";
if ((*i)->perms.is_accept())
(*i)->perms.dump(os);
else
os << "none";
os << "\n";
}
os << " "; j->first.dump(os) << " -> " <<
*(j)->second;
if ((j)->second->perms.is_accept())
os << " ", (j->second)->perms.dump(os);
os << "\n";
}
}
if ((*i)->otherwise != nonmatching) {
if (first) {
first = false;
os << **i << " perms: ";
if ((*i)->perms.is_accept())
(*i)->perms.dump(os);
else
os << "none";
os << "\n";
}
os << " [";
if (!excluded.empty()) {
os << "^";
for (Chars::iterator k = excluded.begin();
k != excluded.end(); k++) {
os << *k;
}
}
os << "] -> " << *(*i)->otherwise;
if ((*i)->otherwise->perms.is_accept())
os << " ", (*i)->otherwise->perms.dump(os);
os << "\n";
}
}
os << "\n";
}
/**
* Create a dot (graphviz) graph from the DFA (for debugging).
*/
void DFA::dump_dot_graph(ostream & os)
{
os << "digraph \"dfa\" {" << "\n";
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
if (*i == nonmatching)
continue;
os << "\t\"" << **i << "\" [" << "\n";
if (*i == start) {
os << "\t\tstyle=bold" << "\n";
}
if ((*i)->perms.is_accept()) {
os << "\t\tlabel=\"" << **i << "\\n";
(*i)->perms.dump(os);
os << "\"\n";
}
os << "\t]" << "\n";
}
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
Chars excluded;
for (StateTrans::iterator j = (*i)->trans.begin(); j != (*i)->trans.end(); j++) {
if (j->second == nonmatching)
excluded.insert(j->first);
else {
os << "\t\"" << **i << "\" -> \"" << *j->second
<< "\" [" << "\n";
os << "\t\tlabel=\"";
j->first.dump(os);
os << "\"\n\t]" << "\n";
}
}
if ((*i)->otherwise != nonmatching) {
os << "\t\"" << **i << "\" -> \"" << *(*i)->otherwise
<< "\" [" << "\n";
if (!excluded.empty()) {
os << "\t\tlabel=\"[^";
for (Chars::iterator i = excluded.begin();
i != excluded.end(); i++) {
i->dump(os);
}
os << "]\"" << "\n";
}
os << "\t]" << "\n";
}
}
os << '}' << "\n";
}
/**
* Compute character equivalence classes in the DFA to save space in the
* transition table.
*/
map<transchar, transchar> DFA::equivalence_classes(dfaflags_t flags)
{
map<transchar, transchar> classes;
transchar next_class = 1;
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
/* Group edges to the same next state together */
map<const State *, Chars> node_sets;
for (StateTrans::iterator j = (*i)->trans.begin(); j != (*i)->trans.end(); j++) {
if (j->first.c < 0)
continue;
node_sets[j->second].insert(j->first);
}
for (map<const State *, Chars>::iterator j = node_sets.begin();
j != node_sets.end(); j++) {
/* Group edges to the same next state together by class */
map<transchar, Chars> node_classes;
bool class_used = false;
for (Chars::iterator k = j->second.begin();
k != j->second.end(); k++) {
pair<map<transchar, transchar>::iterator, bool> x = classes.insert(make_pair(*k, next_class));
if (x.second)
class_used = true;
pair<map<transchar, Chars>::iterator, bool> y = node_classes.insert(make_pair(x.first->second, Chars()));
y.first->second.insert(*k);
}
if (class_used) {
next_class++;
class_used = false;
}
for (map<transchar, Chars>::iterator k = node_classes.begin();
k != node_classes.end(); k++) {
/**
* If any other characters are in the same class, move
* the characters in this class into their own new
* class
*/
map<transchar, transchar>::iterator l;
for (l = classes.begin(); l != classes.end(); l++) {
if (l->second == k->first &&
k->second.find(l->first) == k->second.end()) {
class_used = true;
break;
}
}
if (class_used) {
for (Chars::iterator l = k->second.begin();
l != k->second.end(); l++) {
classes[*l] = next_class;
}
next_class++;
class_used = false;
}
}
}
}
if (flags & DFA_DUMP_EQUIV_STATS)
fprintf(stderr, "Equiv class reduces to %d classes\n",
next_class.c - 1);
return classes;
}
/**
* Text-dump the equivalence classes (for debugging).
*/
void dump_equivalence_classes(ostream &os, map<transchar, transchar> &eq)
{
map<transchar, Chars> rev;
for (map<transchar, transchar>::iterator i = eq.begin(); i != eq.end(); i++) {
Chars &chars = rev.insert(make_pair(i->second, Chars())).first->second;
chars.insert(i->first);
}
os << "(eq):" << "\n";
for (map<transchar, Chars>::iterator i = rev.begin(); i != rev.end(); i++) {
os << i->first.c << ':';
Chars &chars = i->second;
for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
os << ' ' << *j;
}
os << "\n";
}
}
/**
* Replace characters with classes (which are also represented as
* characters) in the DFA transition table.
*/
void DFA::apply_equivalence_classes(map<transchar, transchar> &eq)
{
/**
* Note: We only transform the transition table; the nodes continue to
* contain the original characters.
*/
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
map<transchar, State *> tmp;
tmp.swap((*i)->trans);
for (StateTrans::iterator j = tmp.begin(); j != tmp.end(); j++) {
if (j->first.c < 0)
continue;
(*i)->trans.insert(make_pair(eq[j->first], j->second));
}
}
}
#if 0
typedef set <ImportantNode *>AcceptNodes;
map<ImportantNode *, AcceptNodes> dominance(DFA & dfa)
{
map<ImportantNode *, AcceptNodes> is_dominated;
for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) {
AcceptNodes set1;
for (State::iterator j = (*i)->begin(); j != (*i)->end(); j++) {
if (AcceptNode * accept = dynamic_cast<AcceptNode *>(*j))
set1.insert(accept);
}
for (AcceptNodes::iterator j = set1.begin(); j != set1.end(); j++) {
pair<map<ImportantNode *, AcceptNodes>::iterator, bool> x = is_dominated.insert(make_pair(*j, set1));
if (!x.second) {
AcceptNodes & set2(x.first->second), set3;
for (AcceptNodes::iterator l = set2.begin();
l != set2.end(); l++) {
if (set1.find(*l) != set1.end())
set3.insert(*l);
}
set3.swap(set2);
}
}
}
return is_dominated;
}
#endif
static inline int diff_qualifiers(uint32_t perm1, uint32_t perm2)
{
return ((perm1 & AA_EXEC_TYPE) && (perm2 & AA_EXEC_TYPE) &&
(perm1 & AA_EXEC_TYPE) != (perm2 & AA_EXEC_TYPE));
}
/**
* Compute the permission flags that this state corresponds to. If we
* have any exact matches, then they override the execute and safe
* execute flags.
*/
int accept_perms(NodeSet *state, perms_t &perms, bool filedfa)
{
int error = 0;
uint32_t exact_match_allow = 0;
uint32_t exact_audit = 0;
perms.clear();
if (!state)
return error;
for (NodeSet::iterator i = state->begin(); i != state->end(); i++) {
if (!(*i)->is_type(NODE_TYPE_MATCHFLAG))
continue;
MatchFlag *match = static_cast<MatchFlag *>(*i);
if (match->is_type(NODE_TYPE_EXACTMATCHFLAG)) {
/* exact match only ever happens with x */
if (filedfa && !is_merged_x_consistent(exact_match_allow,
match->flag))
error = 1;;
exact_match_allow |= match->flag;
exact_audit |= match->audit;
} else if (match->is_type(NODE_TYPE_DENYMATCHFLAG)) {
perms.deny |= match->flag;
perms.quiet |= match->audit;
} else {
if (filedfa && !is_merged_x_consistent(perms.allow, match->flag))
error = 1;
perms.allow |= match->flag;
perms.audit |= match->audit;
}
}
if (filedfa) {
perms.allow |= exact_match_allow & ~(ALL_AA_EXEC_TYPE);
perms.audit |= exact_audit & ~(ALL_AA_EXEC_TYPE);
} else {
perms.allow |= exact_match_allow;
perms.audit |= exact_audit;
}
if (exact_match_allow & AA_USER_EXEC) {
perms.allow = (exact_match_allow & AA_USER_EXEC_TYPE) |
(perms.allow & ~AA_USER_EXEC_TYPE);
perms.exact = AA_USER_EXEC_TYPE;
}
if (exact_match_allow & AA_OTHER_EXEC) {
perms.allow = (exact_match_allow & AA_OTHER_EXEC_TYPE) |
(perms.allow & ~AA_OTHER_EXEC_TYPE);
perms.exact |= AA_OTHER_EXEC_TYPE;
}
if (filedfa && (AA_USER_EXEC & perms.deny))
perms.deny |= AA_USER_EXEC_TYPE;
if (filedfa && (AA_OTHER_EXEC & perms.deny))
perms.deny |= AA_OTHER_EXEC_TYPE;
perms.allow &= ~perms.deny;
perms.quiet &= perms.deny;
if (error)
fprintf(stderr, "profile has merged rule with conflicting x modifiers\n");
return error;
}