Add Differential State Compression to the DFA

Differential state compression encodes a state's transitions as the
difference between the state and its default state (the state it is
relative too).

This reduces the number of transitions that need to be stored in the
transition table, hence reducing the size of the dfa.  There is a
trade off in that a single input character may have to traverse more
than one state.  This is somewhat offset by reduced table sizes providing
better locality and caching properties.

With carefully encoding we can still make constant match time guarentees.
This patch guarentees that a state that is differentially encoded will do at
most 3m state traversal to match an input of length m (as opposed to a
non-differentially compressed dfa doing exactly m state traversals).
In practice the actually number of extra traversals is less than this becaus
we selectively choose which states are differentially encoded.

In addition to reducing the size of the dfa by reducing the number of
transitions that have to be stored.  Differential encoding reduces the
number of transitions that need to be considered by comb compression,
which can result in tighter packing, due to a reduction in sparseness, and
also reduces the time spent in comb compression which currently uses an
O(n^2) algorithm.

Differential encoding will always result in a DFA that is smaller or equal
in size to the encoded DFA, and will usually improve compilation times,
with the performance improvements increasing as the DFA gets larger.

Eg. Given a example DFA that created 8991 states after minimization.
* If only comb compression (current default) is used

 52057 transitions are packed into a table of 69591 entries. Achieving an
 efficiency of about 75% (an average of about 7.74 table entries per state).
 With a resulting compressed dfa16 size of 404238 bytes and a run time for
 the dfa compilation of
   real 0m9.037s
   user 0m8.893s
   sys  0m0.036s

* If differential encoding + comb compression is used, 8292 of the 8991
  states are differentially encoded, with 31557 trans removed.  Resulting in

  20500 transitions are packed into a table of 20675 entries.  Acheiving an
  efficiency of about 99.2% (an average of about 2.3 table entries per state
  With a resulting compressed dfa16 size of 207874 bytes (about 48.6%
  reduction) and a run time for the dfa compilation of
   real 0m5.416s (about 40% faster)
   user 0m5.280s
   sys  0m0.040s

Repeating with a larger DFA that has 17033 states after minimization.
* If only comb compression (current default) is used

 102992 transitions are packed into a table of 137987 entries.  Achieving
 an efficiency of about 75% (an average of about 8.10 entries per state).
 With a resultant compressed dfa16 size of 790410 bytes and a run time for d
 compilation of
  real  0m28.153s
  user  0m27.634s
  sys   0m0.120s

* with differential encoding
 39374 transition are packed into a table of 39594 entries. Achieving an
 efficiency of about 99.4% (an average of about 2.32 entries per state).
 With a resultant compressed dfa16 size of 396838 bytes (about 50% reduction
 and a run time for dfa compilation of
  real  0m11.804s (about 58% faster)
  user  0m11.657s
  sys   0m0.084s

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
This commit is contained in:
John Johansen 2014-01-09 16:55:55 -08:00
parent c6f1034177
commit 22855508e8
8 changed files with 510 additions and 11 deletions

View file

@ -315,6 +315,13 @@ void *aare_create_dfa(aare_ruleset_t *rules, size_t *size,
} else if (flags & DFA_DUMP_EQUIV)
cerr << "\nDFA did not generate an equivalence class\n";
if (flags & DFA_CONTROL_DIFF_ENCODE) {
dfa.diff_encode(flags);
if (flags & DFA_DUMP_DIFF_ENCODE)
dfa.dump_diff_encode(cerr);
}
CHFA chfa(dfa, eq, flags);
if (flags & DFA_DUMP_TRANS_TABLE)
chfa.dump(cerr);

View file

@ -31,7 +31,11 @@ typedef int dfaflags_t;
#define DFA_CONTROL_FILTER_DENY (1 << 6)
#define DFA_CONTROL_REMOVE_UNREACHABLE (1 << 7)
#define DFA_CONTROL_TRANS_HIGH (1 << 8)
#define DFA_CONTROL_DIFF_ENCODE (1 << 9)
#define DFA_DUMP_DIFF_PROGRESS (1 << 10)
#define DFA_DUMP_DIFF_ENCODE (1 << 11)
#define DFA_DUMP_DIFF_STATS (1 << 12)
#define DFA_DUMP_MIN_PARTS (1 << 13)
#define DFA_DUMP_UNIQ_PERMS (1 << 14)
#define DFA_DUMP_MIN_UNIQ_PERMS (1 << 15)

View file

@ -32,6 +32,7 @@
#include "hfa.h"
#include "chfa.h"
#include "../immunix.h"
#include "flex-tables.h"
void CHFA::init_free_list(vector<pair<size_t, size_t> > &free_list,
size_t prev, size_t start)
@ -53,6 +54,11 @@ CHFA::CHFA(DFA &dfa, map<uchar, uchar> &eq, dfaflags_t flags): eq(eq)
if (flags & DFA_DUMP_TRANS_PROGRESS)
fprintf(stderr, "Compressing HFA:\r");
if (dfa.diffcount)
flags = YYTH_FLAG_DIFF_ENCODE;
else
flags = 0;
if (eq.empty())
max_eq = 255;
else {
@ -242,6 +248,8 @@ repeat:
}
do_insert:
if (from->flags & DiffEncodeFlag)
base |= DiffEncodeBit32;
default_base.push_back(make_pair(default_state, base));
}
@ -279,7 +287,7 @@ void CHFA::dump(ostream &os)
<< *next_check[i].second << " -> "
<< *next_check[i].first << ": ";
size_t offs = i - default_base[num[next_check[i].second]].second;
size_t offs = i - base_mask_size(default_base[num[next_check[i].second]].second);
if (eq.size())
os << offs;
else
@ -296,7 +304,6 @@ void CHFA::dump(ostream &os)
* (Only the -Cf and -Ce formats are currently supported.)
*/
#include "flex-tables.h"
#define YYTH_REGEX_MAGIC 0x1B5E783D
static inline size_t pad64(size_t i)
@ -395,6 +402,7 @@ void CHFA::flex_table(ostream &os, const char *name)
size_t hsize = pad64(sizeof(th) + sizeof(th_version) + strlen(name) + 1);
th.th_magic = htonl(YYTH_REGEX_MAGIC);
th.th_flags = htonl(flags);
th.th_hsize = htonl(hsize);
th.th_ssize = htonl(hsize +
flex_table_size(accept.begin(), accept.end()) +

View file

@ -26,6 +26,10 @@
#include "hfa.h"
#define BASE32_FLAGS 0xff000000
#define DiffEncodeBit32 0x80000000
#define base_mask_size(X) ((X) & ~BASE32_FLAGS)
using namespace std;
class CHFA {
@ -51,6 +55,7 @@ class CHFA {
map<uchar, uchar> &eq;
uchar max_eq;
size_t first_free;
unsigned int flags;
};
#endif /* __LIBAA_RE_CHFA_H */

View file

@ -5,6 +5,7 @@
#include <stdint.h>
#define YYTH_MAGIC 0xF13C57B1
#define YYTH_FLAG_DIFF_ENCODE 1
struct table_set_header {
uint32_t th_magic; /* TH_MAGIC */

View file

@ -73,6 +73,194 @@ ostream &operator<<(ostream &os, const State &state)
return os;
}
/**
* diff_weight - Find differential compression distance between @rel and @this
* @rel: State to compare too
* Returns: An integer indicating how good rel is as a base, larger == better
*
* Find the relative weighted difference for differential state compression
* with queried state being compressed against @rel
*
* +1 for each transition that matches (char and dest - saves a transition)
* 0 for each transition that doesn't match and exists in both states
* 0 for transition that self has and @other doesn't (no extra required)
* -1 for each transition that is in @rel and not in @this (have to override)
*
* @rel should not be a state that has already been made differential or it may
* introduce extra transitions as it does not recurse to find all transitions
*
* Should be applied after state minimization
*/
int State::diff_weight(State *rel)
{
int weight = 0;
if (this == rel)
return 0;
if (rel->diff->rel) {
/* Can only be diff encoded against states that are relative
* to a state of a lower depth. ie, at most one sibling in
* the chain
*/
if (rel->diff->rel->diff->depth >= this->diff->depth)
return 0;
} else if (rel->diff->depth >= this->diff->depth)
return 0;
if (rel->flags & DiffEncodeFlag) {
for (int i = 0; i < 256; i++) {
State *state = rel->next(i);
StateTrans::iterator j = trans.find(i);
if (j != trans.end()) {
if (state == j->second)
weight++;
/* else
0 - keep transition to mask
*/
} else if (state == otherwise) {
/* 0 - match of default against @rel
* We don't save a transition but don't have
* to mask either
*/
} else {
/* @rel has transition not covered by @this.
* Need to add a transition to mask it
*/
weight--;
}
}
return weight;
}
unsigned int count = 0;
for (StateTrans::iterator i = rel->trans.begin(); i != rel->trans.end();
i++) {
StateTrans::iterator j = trans.find(i->first);
if (j != trans.end()) {
if (i->second == j->second)
weight++;
/* } else {
0 - keep transition to mask
*/
count++;
} else if (i->second == otherwise) {
/* 0 - match of default against @rel
* We don't save a transition but don't have to
* mask either
*/
} else {
/* rel has transition not covered by @this. Need to
* add a transition to mask
*/
weight--;
}
}
/* cover transitions in @this but not in @rel */
unsigned int this_count = 0;
if (count < trans.size()) {
for (StateTrans::iterator i = trans.begin(); i != trans.end(); i++) {
StateTrans::iterator j = rel->trans.find(i->first);
if (j == rel->trans.end()) {
this_count++;
if (i->second == rel->otherwise)
/* replaced by rel->cases.otherwise */
weight++;
}
}
}
if (rel->otherwise != otherwise) {
/* rel default transitions have to be masked with transitions
* This covers all transitions not covered above
*/
weight -= 256 - (rel->trans.size() + this_count);
}
return weight;
}
/**
* make_relative - Make this state relative to @rel
* @rel: state to make this state relative too
*
* @rel can be a relative (differentially compressed state)
*/
int State::make_relative(State *rel)
{
int weight = 0;
if (this == rel || !rel)
return 0;
if (flags & DiffEncodeFlag)
return 0;
flags |= DiffEncodeFlag;
for (int i = 0; i < 256 ; i++) {
State *next = rel->next(i);
StateTrans::iterator j = trans.find(i);
if (j != trans.end()) {
if (j->second == next) {
trans.erase(j);
weight++;
}
/* else keep transition to mask */
} else if (otherwise == next) {
/* do nothing, otherwise transition disappears when
* reassigned
*/
} else {
/* need a new transition to mask those in lower state */
trans[i] = otherwise;
weight--;
}
}
otherwise = rel;
return weight;
}
/**
* flatten_differential - remove differential encode from this state
*/
void State::flatten_relative(void)
{
if (!(flags & DiffEncodeFlag))
return;
map<State *, int> count;
for (int i = 0; i < 256; i++)
count[next(i)] += 1;
int j = 0;
State *def = next(0);
for (int i = 1; i < 256; i++) {
if (count[next(i)] > count[next(j)]) {
j = i;
def = next(i);
}
}
for (int i = 0; i < 256; i++) {
if (trans.find(i) != trans.end()) {
if (trans[i] == def)
trans.erase(i);
} else {
if (trans[i] != def)
trans[i] = next(i);
}
}
otherwise = def;
flags = flags & ~DiffEncodeFlag;
}
static void split_node_types(NodeSet *nodes, NodeSet **anodes, NodeSet **nnodes
)
{
@ -175,6 +363,7 @@ void DFA::dump_node_to_dfa(void)
DFA::DFA(Node *root, dfaflags_t flags): root(root)
{
int i = 0;
diffcount = 0; /* set by diff_encode */
if (flags & DFA_DUMP_PROGRESS)
fprintf(stderr, "Creating dfa:\r");
@ -606,6 +795,239 @@ out:
}
}
/* diff_encode helper functions */
static unsigned int add_to_dag(DiffDag *dag, State *state,
State *parent)
{
unsigned int rc = 0;
if (!state->diff) {
dag->rel = NULL;
if (parent)
dag->depth = parent->diff->depth + 1;
else
dag->depth = 1;
dag->state = state;
state->diff = dag;
rc = 1;
}
if (parent && parent->diff->depth < state->diff->depth)
state->diff->parents.push_back(parent);
return rc;
}
static int diff_partition(State *state, Partition &part, State **candidate)
{
int weight = 0;
*candidate = NULL;
for (Partition::iterator i = part.begin(); i != part.end(); i++) {
if (*i == state)
continue;
int tmp = state->diff_weight(*i);
if (tmp > weight) {
weight = tmp;
*candidate = *i;
}
}
return weight;
}
/**
* diff_encode - compress dfa by differentially encoding state transitions
* @dfa_flags: flags controling dfa creation
*
* This function reduces the number of transitions that need to be stored
* by encoding transitions as the difference between the state and a
* another transitions that is set as the states default.
*
* For performance reasons this function does not try to compute the
* absolute best encoding (maximal spanning tree) but instead computes
* a very good encoding within the following limitations.
* - Not all states have to be differentially encoded. This allows for
* multiple states to be used as a terminating basis.
* - The number of state transitions needed to match an input of length
* m will be 2m
*
* To guarentee this the ordering and distance calculation is done in the
* following manner.
* - A DAG of the DFA is created starting with the start state(s).
* - A state can only be relative (have a differential encoding) to
* another state if that state has
* - a lower depth in the DAG
* - is a sibling (same depth) that is not relative
* - is a sibling that is relative to a state with lower depth in the DAG
*
* The run time constraints are maintained by the DAG ordering + relative
* state constraints. For any input character C when at state S with S being
* at level N in the DAG then at most 2N states must be traversed to find the
* transition for C. However on the maximal number of transitions is not m*m,
* because when a character is matched and forward movement is made through
* the DFA any relative transition search will move back through the DAG order.
* So say for character C we start matching on a state S that is at depth 10
* in the DAG. The transition for C is not found in S and we recurse backwards
* to a depth of 6. A transition is found and it steps to the next state, but
* the state transition at most will only move 1 deeper into the DAG so for
* the next state the maximum number of states traversed is 2*7.
*/
void DFA::diff_encode(dfaflags_t flags)
{
DiffDag *dag;
unsigned int xcount = 0, xweight = 0, transitions = 0, depth = 0;
/* clear the depth flag */
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
(*i)->diff = NULL;
transitions += (*i)->trans.size();
}
/* Prealloc structures we need. We know the exact number of elements,
* and once setup they don't change so we don't need the flexibility
* or overhead of stl, just allocate the needed data as an array
*/
dag = new DiffDag [states.size()];
/* Generate DAG ordering and parent sets */
add_to_dag(&dag[0], nonmatching, NULL);
add_to_dag(&dag[1], start, NULL);
unsigned int tail = 2;
for (unsigned int i = 1; i < tail; i++) {
State *state = dag[i].state;
State *child = dag[i].state->otherwise;
if (child)
tail += add_to_dag(&dag[tail], child, state);
for (StateTrans::iterator j = state->trans.begin(); j != state->trans.end(); j++) {
child = j->second;
tail += add_to_dag(&dag[tail], child, state);
}
}
depth = dag[tail - 1].depth;
/* calculate which state to make a transitions relative too */
for (unsigned int i = 2; i < tail; i++) {
State *state = dag[i].state;
State *candidate = NULL;
int weight = diff_partition(state,
state->otherwise->diff->parents,
&candidate);
for (StateTrans::iterator j = state->trans.begin(); j != state->trans.end(); j++) {
State *tmp_candidate;
int tmp = diff_partition(state,
j->second->diff->parents,
&tmp_candidate);
if (tmp > weight) {
weight = tmp;
candidate = tmp_candidate;
}
}
if ((flags & DFA_DUMP_DIFF_PROGRESS) && (i % 100 == 0))
cerr << "\033[2KDiff Encode: " << i << " of "
<< tail << ". Diff states " << xcount
<< " Savings " << xweight << "\r";
state->diff->rel = candidate;
if (candidate) {
xcount++;
xweight += weight;
}
}
/* now make transitions relative, start at the back of the list so
* as to start with the last transitions and work backwards to avoid
* having to traverse multiple previous states (that have been made
* relative already) to reconstruct previous state transition table
*/
unsigned int aweight = 0;
diffcount = 0;
for (int i = tail - 1; i > 1; i--) {
if (dag[i].rel) {
int weight = dag[i].state->make_relative(dag[i].rel);
aweight += weight;
diffcount++;
}
}
if (flags & DFA_DUMP_DIFF_STATS)
cerr << "Diff encode states: " << diffcount << " of "
<< tail << " reached @ depth " << depth << ". "
<< aweight << " trans removed\n";
if (xweight != aweight)
cerr << "Diff encode error: actual savings " << aweight
<< " != expected " << xweight << "\n";
if (xcount != diffcount)
cerr << "Diff encode error: actual count " << diffcount
<< " != expected " << xcount << " \n";
/* cleanup */
for (unsigned int i = 0; i < tail; i++)
dag[i].parents.clear();
delete [] dag;
}
/**
* flatten_differential - remove differential state encoding
*
* Flatten the dfa back into a flat encoding.
*/
void DFA::undiff_encode(void)
{
for (Partition::iterator i = states.begin(); i != states.end(); i++)
(*i)->flatten_relative();
diffcount = 0;
}
void DFA::dump_diff_chain(ostream &os, map<State *, Partition> &relmap,
Partition &chain, State *state, unsigned int &count,
unsigned int &total, unsigned int &max)
{
if (relmap[state].size() == 0) {
for (Partition::iterator i = chain.begin(); i != chain.end(); i++)
os << **i << " <- ";
os << *state << "\n";
count++;
total += chain.size() + 1;
if (chain.size() + 1 > max)
max = chain.size() + 1;
}
chain.push_back(state);
for (Partition::iterator i = relmap[state].begin(); i != relmap[state].end(); i++)
dump_diff_chain(os, relmap, chain, *i, count, total, max);
chain.pop_back();
}
/* Dump the DFA diff_encoding chains */
void DFA::dump_diff_encode(ostream &os)
{
map<State *, Partition> rel;
Partition base, chain;
for (Partition::iterator i = states.begin(); i != states.end(); i++) {
if ((*i)->flags & DiffEncodeFlag)
rel[(*i)->otherwise].push_back(*i);
else
base.push_back(*i);
}
unsigned int count = 0, total = 0, max = 0;
for (Partition::iterator i = base.begin(); i != base.end(); i++)
dump_diff_chain(os, rel, chain, *i, count, total, max);
os << base.size() << " non-differentially encoded states\n";
os << "chains: " << count - base.size() << "\n";
os << "average chain size: " << (double) (total - base.size()) / (double) (count - base.size()) << "\n";
os << "longest chain: " << max << "\n";
}
/**
* text-dump the DFA (for debugging).
*/

View file

@ -28,10 +28,13 @@
#include <map>
#include <vector>
#include <assert.h>
#include <stdint.h>
#include "expr-tree.h"
#define DiffEncodeFlag 1
class State;
typedef map<uchar, State *> StateTrans;
@ -334,6 +337,19 @@ public:
}
};
/* Temporary state structure used when building differential encoding
* @parents - set of states that have transitions to this state
* @depth - level in the DAG
* @state - back reference to state this DAG entry belongs
* @rel - state that this state is relative to for differential encoding
*/
struct DiffDag {
Partition parents;
int depth;
State *state;
State *rel;
};
/*
* State - DFA individual state information
* label: a unique label to identify the state used for pretty printing
@ -352,7 +368,7 @@ public:
class State {
public:
State(int l, ProtoState &n, State *other) throw(int):
label(l), perms(), trans()
label(l), flags(0), perms(), trans()
{
int error;
@ -372,15 +388,30 @@ public:
};
State *next(uchar c) {
StateTrans::iterator i = trans.find(c);
if (i != trans.end())
return i->second;
return otherwise;
};
State *state = this;
do {
StateTrans::iterator i = state->trans.find(c);
if (i != state->trans.end())
return i->second;
if (!(state->flags & DiffEncodeFlag))
return state->otherwise;
state = state->otherwise;
} while (state);
/* never reached */
assert(0);
return NULL;
}
int diff_weight(State *rel);
int make_relative(State *rel);
void flatten_relative(void);
int apply_and_clear_deny(void) { return perms.apply_and_clear_deny(); }
int label;
int flags;
perms_t perms;
StateTrans trans;
State *otherwise;
@ -389,6 +420,7 @@ public:
union {
Partition *partition; /* used during minimization */
ProtoState proto; /* used during creation */
DiffDag *diff; /* used during diff encoding */
};
};
@ -429,12 +461,16 @@ public:
}
};
/* Transitions in the DFA. */
/* Transitions in the DFA. */
class DFA {
void dump_node_to_dfa(void);
State *add_new_state(NodeSet *nodes, State *other);
void update_state_transitions(State *state);
void dump_diff_chain(ostream &os, map<State *, Partition> &relmap,
Partition &chain, State *state,
unsigned int &count, unsigned int &total,
unsigned int &max);
/* temporary values used during computations */
NodeCache anodes_cache;
@ -455,11 +491,19 @@ public:
size_t hash_trans(State *s);
void minimize(dfaflags_t flags);
int apply_and_clear_deny(void);
void diff_encode(dfaflags_t flags);
void undiff_encode(void);
void dump_diff_encode(ostream &os);
void dump(ostream &os);
void dump_dot_graph(ostream &os);
void dump_uniq_perms(const char *s);
map<uchar, uchar> equivalence_classes(dfaflags_t flags);
void apply_equivalence_classes(map<uchar, uchar> &eq);
unsigned int diffcount;
Node *root;
State *nonmatching, *start;
Partition states;

View file

@ -194,10 +194,10 @@ optflag_table_t dumpflag_table[] = {
DFA_DUMP_SIMPLE_TREE },
{ 1, "stats", "Dump all compile stats",
DFA_DUMP_TREE_STATS | DFA_DUMP_STATS | DFA_DUMP_TRANS_STATS |
DFA_DUMP_EQUIV_STATS },
DFA_DUMP_EQUIV_STATS | DFA_DUMP_DIFF_STATS },
{ 1, "progress", "Dump progress for all compile phases",
DFA_DUMP_PROGRESS | DFA_DUMP_STATS | DFA_DUMP_TRANS_PROGRESS |
DFA_DUMP_TRANS_STATS },
DFA_DUMP_TRANS_STATS | DFA_DUMP_DIFF_PROGRESS | DFA_DUMP_DIFF_STATS },
{ 1, "dfa-progress", "Dump dfa creation as in progress",
DFA_DUMP_PROGRESS | DFA_DUMP_STATS },
{ 1, "dfa-stats", "Dump dfa creation stats", DFA_DUMP_STATS },
@ -222,6 +222,12 @@ optflag_table_t dumpflag_table[] = {
{ 1, "equiv-stats", "Dump equivance class stats",
DFA_DUMP_EQUIV_STATS },
{ 1, "equiv", "Dump equivance class", DFA_DUMP_EQUIV },
{ 1, "diff-encode", "Dump differential encoding",
DFA_DUMP_DIFF_ENCODE },
{ 1, "diff-stats", "Dump differential encoding stats",
DFA_DUMP_DIFF_STATS },
{ 1, "diff-progress", "Dump progress of differential encoding",
DFA_DUMP_DIFF_PROGRESS | DFA_DUMP_DIFF_STATS },
{ 0, NULL, NULL, 0 },
};
@ -251,6 +257,8 @@ optflag_table_t optflag_table[] = {
DFA_CONTROL_TRANS_HIGH },
{ 2, "compress-fast", "do faster dfa transition table compression",
DFA_CONTROL_TRANS_HIGH },
{ 1, "diff-encode", "Differentially encode transitions",
DFA_CONTROL_DIFF_ENCODE },
{ 0, NULL, NULL, 0 },
};