apparmor/utils/binary_analyze

#!/usr/bin/perl

#****************************************************************************
#*  Copyright (c) Andrew Gross 2003-2004
#*  All Rights Reserved
#*
#*  The following information and material is confidential and proprietary
#*  information of Andrew Gross (the "Confidential Material") and
#*  is protected by copyright, patent, trade secrets and other intellectual
#*  and property laws.  Access to this Confidential Material is limited to
#*  authorized employees and/or licensees.  Any unauthorized use of the
#*  Confidential Material could subject the user to criminal and/or civil
#*  penalties.
#*
#*  This work is intended for Immunix, Inc. (the "Client") on a
#*  work-for-hire basis pending contract completion and payment whereupon
#*  all rights confer to the Client.
#*
#****************************************************************************

# NB: This script works only on dynamically linked programs.  Static
#     programs require a similar but different approach.


use Data::Dumper;

use strict;
my (	$DEBUG,						# debug printing control
	$addr, $byte, $data, $flag, $i, $t2, $t3,	# counters and temps
	$first, $first_addr, $k, $last_cv, $last_push,
	$subno, $drop_push_ebp, $reloc_flag, $name,
	$cf, $rod, $rw, $sstr, $str, $tmp, $x, $y,
	%DATA, %FNS, %RELOC, %RODATA, %SEC, %PLT,	# data store hashes
	%Branches,
	%obj_list, %obj_ref, @o_list,			# for object file info
	@line, $cv,					# text seg. loop info
	%Regs, %Seta, @pargs, @p2args,
	$NO_MATCH_DELIVERED_DEMOS,			# turn off bug fixes to match delivered data
   );


# Comments:
#
# subroutine args: 0x8(%ebp) [argc], 0xc(%ebp) [argv]
#
# add'l instruction support / invalidate registers when necessary
#
# ACL generation and mapping:
#   bit of library stuff as example
#   +open +sprintf +chdir +creat +mkdir +opendir +rename
#
# rarely a routine will save extra registers at the beginning
#   and this hoses the first function call if push based args.


# Debug printing setting.  0 is default, 5 is highest (most verbose)
#
#$DEBUG=5;
$DEBUG=0;


# Flag to disable improved functionality and bug fixes so that runs will
# match demos delivered to the Client.
#
#$NO_MATCH_DELIVERED_DEMOS=0;
$NO_MATCH_DELIVERED_DEMOS=1;


#=========================================================================
#
# All supporting data is read in, parsed, and stored in this portion of the code.
#
# NB: In reading data all addresses are converted from hex and stored in decimal.


#=== Get sections and address ranges
# Read output from "objdump -x":
#   section start and end addresses are needed later so we can
#   decide how to translate addresses

$flag=0;

print "reading header...\n";
open(F, "$ARGV[0]/header") || die $!;
while (<F>) {
	$flag=1 if /^Sections:$/;		# when we've passed the header info
	next if /^Sections:$/;
	next if !$flag;

	# ignore the junk lines
	next if /ALLOC|CONTENTS|LOAD|READONLY|CODE|DATA/;
	next if /^Idx/;
	last if /^SYMBOL TABLE:/;

	# extract section start address, end address, and length:
	#>  0 .interp       00000013  080480f4  080480f4  000000f4  2**0

	split;
	$t2 = hex ("0x".$_[2]);
	$t3 = hex ("0x".$_[3]);
	$SEC{$_[1]} = [ $t3, $t3+$t2, $t2 ];	# name = start, end, len
}
close(F);

# Print the loaded section information
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort keys %SEC) {
		printf "%-20s\t%08x\t%08x\t%08x\n", $i, @{ $SEC{$i} };
	}
}

# Print the loaded section information sorted by start address (for reference)
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort { ${ $SEC{$a} }[0] <=> ${ $SEC{$b} }[0] } keys %SEC) {
		printf "%-20s\t%08x\t%08x\t%08x\n", $i, @{ $SEC{$i} };
	}
}


#=== Get symbol names and addresses (dynamic symbol table)
# Read output from "objdump -T":
#   dynamic symbol information for xlating calls
#
print "reading dyn_syms...\n";
open(F, "$ARGV[0]/dyn_syms") || die $!;
while (<F>) {
	# ignore junk lines
	next if /^$/;
	next if /file format elf32-i386/;
	next if /^DYNAMIC SYMBOL TABLE:/;

	# extract the address and dynamic symbol name (library functions typically)
	#> 08049260      DF *UND*  0000003a  GLIBC_2.0   mkdir

	chomp;
	split;
	$_[0] =~ s/^0+//;
	next if $_[-1] eq "_start" && !$NO_MATCH_DELIVERED_DEMOS; # This bug fix throws off sub names
	$FNS{hex("0x".$_[0])} = $_[-1];
}
close(F);

# Print the loaded symbol information sorted by symbol name
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort keys %FNS) {
		printf "%08x\t%-20s\n", $i, $FNS{$i};
	}
}


#=== Get symbol names and addresses (linker RR info)
# Read output from "objdump -R":
#   dynamic relocation information also for xlating calls
#
print "reading dynam...\n";
open(F, "$ARGV[0]/dynam") || die $!;
while (<F>) {
	# ignore junk lines
	next if /^$/;
	next if /file format elf32-i386/;
	next if /^DYNAMIC SYMBOL TABLE:/;
	next if /^OFFSET/;

	# extract the address and symbol name
	#> 0804f260 R_386_JUMP_SLOT   mkdir

	chomp;
	split;
	$_[0] =~ s/^0+//;
	$FNS{hex("0x".$_[0])} = $_[-1];
}
close(F);

# Print the loaded relocation information sorted by symbol name
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort keys %FNS) {
		printf "%08x\t%-20s\n", $i, $FNS{$i};
	}
}


#=== Get read only data (rodata)
# Read output from "objdump -s --section=.rodata":
#   read only text data (string and other constants)
#
print "reading rodata...\n";
open(F, "$ARGV[0]/rodata") || die $!;
while (<F>) {
	# ignore junk lines
	next if /^$/;
	next if /file format elf32-i386/;
	next if /^Contents of section/;

	# extract bytes and store with corresponding address key
	#   in the %RODATA hash
	#> 804d9e0 03000000 01000200 756e6162 6c652074  ........unable t

	split;
	$addr=$_[0];
	$addr = hex("0x".$addr);
	shift @_;
	$data=join(' ', @_);
	$data =~ s/ //g;
	while ( length($data) ) {
		$byte=substr($data,0,2);
		$byte = hex("0x".$byte);
		$RODATA{$addr}=$byte;
		$data=substr($data,2);
		$addr++;
	}
}
close(F);

# Print the loaded string data
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort keys %RODATA) {
		printf "%08x\t%02x\n", $i, $RODATA{$i};
	}
}

print Data::Dumper->Dump([\%RODATA]);


#=== Get data (data)
# Read output from "objdump -s --section=.data":
#   data section (for variable tracking and the odd string)
#
print "reading data...\n";
open(F, "$ARGV[0]/data") || die $!;
while (<F>) {
	# ignore junk lines
	next if /^$/;
	next if /file format elf32-i386/;
	next if /^Contents of section/;

	# identical to RODATA section but stores in %DATA hash
	#> 804f000 00000000 00000000 4cf20408 19000000  ........L.......

	$addr=substr($_,0,8);
	$addr =~ s/ //g;
	$addr = hex("0x".$addr);
	$data=substr($_,9,35);
	$data =~ s/ //g;
	while ( length($data) ) {
		$byte=substr($data,0,2);
		$byte = hex("0x".$byte);
		$DATA{$addr}=$byte;
		$data=substr($data,2);
		$addr++;
	}
}
close(F);

# Print the loaded data
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort keys %DATA) {
		printf "%08x\t%02x\n", $i, $DATA{$i};
	}
}


#=== Get reloc symbol names and addresses
# Read output from "objdump -r":
#   read in relocation information which contains
#   hints as to in which section a datum is stored
#
# Note: this is only for handling object files
#       as executables will not have data in this
#       section of the file.
#
print "reading reloc...\n";
open(F, "$ARGV[0]/reloc") || die $!;
while (<F>) {
	# ignore junk lines
	next if /^$/;
	next if /file format elf32-i386/;
	next if /^RELOCATION RECORDS/;
	next if /^OFFSET/;

	chomp;
	split;

	# skip BSS symbols
	next if $_[2] eq ".bss";

	#
	#> 00000098 R_386_32          .rodata

	# if a symbol (32 bit relocation), store with other symbols
	#   else store with relocation symbols
	#
	if ( $_[1] eq "R_386_PC32" ) {
		$FNS{hex("0x".$_[0])} = $_[-1];
	} else {
		$RELOC{hex("0x".$_[0])} = $_[-1];
	}
}
close(F);

# Print the loaded relocation symbols
#
if ($DEBUG>4) {
	print "\n";
	foreach $i (sort {$a <=> $b} keys %RELOC) {
		printf "%08x\t%-20s\n", $i, $RELOC{$i};
	}
}


# Note: data from the "objdump -t" is not currently used.  This information
#       would only exist in an unstripped binary and is only of help
#       to a human reading the code.  It only contains symbols internal
#       to the program and not of interest for library calls.
#	We can crib this from the text directly.


#=========================================================================
#
# This portion of the code reads in the disassembly of the text segment.
#   The processing is done in two phases which correspond to the
#   two loops (the first split into two pieces) each of which makes
#   a complete pass of the text segment.  The text segment data is
#   not stored in core due possible large size.
#
# The first pass pulls the PLT data as the newer compiler uses a
#   different relocation scheme; then the subroutines are found and named,
#   main() is located, and branch target addresses are found.
#
# The second pass does all of the bookkeeping required to generate
#   coherent subroutine call representations.


# First pass of text segment, part 1:
#   Locate .plt section and pull jump information
#   which is required to xlate external library
#   calls under the newer compiler.

$flag=0;

print "reading text...\n";
open(F, "$ARGV[0]/text") || die $!;
while ($i=<F>) {

	# Skip lines until we find "<.plt>:", then
	# reset state (exit loop in this case) when
	# we finish that section (blank line).
	# Except for the last and section names
	# this is the same as for the other loops.
	#
	if ( $i =~ /^$/ || $i =~ /^Disassembly/ ) {
		last if $flag;
		$flag=0;
		next;
	} elsif ( $i =~ /<.plt>:/ ) {
		$flag=1;
		next;
	} elsif ( !$flag ) {
		next;
	}

	# We are looking for lines like this:
	#> 804a58c:       ff 25 74 51 0e 08       jmp    *0x80e5174
	#
	# Library calls will be to this address and the destination
	# address will have to be dereferenced via the %FNS hash

	# removed unneeded trailer, split, and dump null 1st element
	# set $cv to current line address
	#
	chomp($i);
	$i =~ s/ <[\w+]+>$//;
	@line=split(/\s+/, $i);
	shift @line;
	$line[0] =~ s/:$//;
	$cv = hex("0x".$line[0]);

	# $cv == current value == address of current line
	#
	$line[0] =~ s/:$//;
	$cv = hex("0x".$line[0]);

	# if we have a jmp *addr line, enter the info into %PLT
	#
	if ( $line[-2] eq "jmp" && $line[-1] =~ /^\*/ ) {
		$line[-1] =~ s/^\*//;
		$PLT{$cv} = hex($line[-1]);
	}
}


# First pass of text segment, part 2:
#   In which subroutines are found and named,
#   and branch targets are identified.

$first_addr=0;		# first address of non-system code
$first=1;		# flag to make sure first subroutine is named
$last_cv=-1;		# address of the previous line (for object files)
$subno="00";		# subroutine name counter

$flag=0;

while ($i=<F>) {

	# Skip lines until we reach the beginning of the
	# actual "text" (either .text or an internal
	# subroutine name.
	#
	if ( $i =~ /^$/ || $i =~ /^Disassembly/ ) {
		$flag=0;
		next;
	} elsif ( $i =~ /<.text>:/ || $i =~ /<(\w+)>:/ ) {
		$name = $1;
		$flag=1;
		next;
	} elsif ( !$flag ) {
		next;
	}


	print ">>> ",$i,"\n" if $DEBUG>3;


	# Clean up line as above and set $cv to current address
	#
	chomp($i);
	$i =~ s/ <[\w+]+>$//;
	@line=split(/\s+/, $i);
	shift @line;
	$line[0] =~ s/:$//;
	$cv = hex("0x".$line[0]);


	# set $first_addr only once
	#
	$first_addr=$cv if !$first_addr;


	# For object files we need to know if there is a relocation
	# entry that refers to the previous line of assembly.  If so,
	# note that for later xlation in loop #2.
	#
	foreach $k (sort {$a <=> $b} keys %RELOC) {
		next if $k < $last_cv;
		last if $k > $cv;

		$obj_list{$last_cv}++;
		$obj_ref {$last_cv}=$RELOC{$k};
	}


	# Main logic of pass 1.  Find start of subroutines, identify
	# address of main(), and branch target addresses.
	#
	if ( $first ) {

		# Special case for first line of code so that it is
		# always flagged as a subroutine.  Clear first flag
		# and enter address with subroutine name.
		#
		$first=0;
		printf "START %08x\n", $cv if $DEBUG>4;
		if ( !length($FNS{$cv}) ) {
			$FNS{$cv} = "sub_".$subno;
			$subno++;
		}
	} elsif ( $line[-2] eq "push" && $line[-1] eq "%ebp" ) {
		printf "STOP  %08x\n", $cv if $DEBUG>3;
		printf "START %08x\n", $cv if $DEBUG>3;

		# This is a subroutine start [cf ref. #1] so name it
		# if it doesn't already have a name.
		#
		if ( !length($FNS{$cv}) ) {
			if ( length($name) && $NO_MATCH_DELIVERED_DEMOS ) { # This throws off sub names
				$FNS{$cv} = $name;
			} else {
				$FNS{$cv} = "sub_".$subno;
				$subno++;
			}
		}
	} elsif ( $line[-2] eq "push" ) {

		# Save last value pushed onto the stack as for
		# the __libc_start_main call that value will be
		# the address of main() [cf ref. #2]
		#
		$last_push = $line[-1];
		$last_push =~ s/^\$//;
		$last_push = hex($last_push);
	} elsif ( $line[-2] eq "call" ) {
		$addr = $line[-1];
		if ( $addr !~ /%/ ) {
			$addr = hex($addr);

			# Check to see if the destination address of the call is in
			# the text segment.  If so, make sure it's not a fake
			# localization call. [cf ref. #3]
			#
			if ( $addr >= ${ $SEC{".text"} }[0] && $addr < ${ $SEC{".text"} }[1] ) {
				if ( $line[1] eq "e8" && $line[2] eq "00" && $line[3] eq "00"
				     && $line[4] eq "00" && $line[5] eq "00" ) {
					$FNS{$addr} = "fake_localization_call";
					print $i,"\n" if $DEBUG>3;
					printf "CALL= %08x\n", $addr if $DEBUG>3;
				} else {
					printf "CALL  %08x\n", $addr if $DEBUG>3;

					# Should probably keep a list of calls into the
					# text segment as a double check for problems
					# with the disassembly.

				}
			} else {
				printf "CALL* %08x\n", $addr if $DEBUG>3;
			}

			# Starting program setup call.  Last pushed address
			# is main(). [cf ref. #2]
			#
			if ( $FNS{$addr} eq "__libc_start_main" ||
			     $NO_MATCH_DELIVERED_DEMOS && $FNS{$PLT{$addr}} eq "__libc_start_main" ) { # Fixes PLT lookup issue
				$FNS{$last_push} = "main";
				printf "main = %08x\n", $last_push if $DEBUG>3;
			}
		} else {

			# If the call target is *%e__, then it's a register indirect call
			# and we won't have information about the destination in most cases.
			#
			print "CALL- ", $addr, "\n" if $DEBUG>3;
		}
	} elsif ( $line[-2] =~ /^j/ ) {

		# All branches start with "j" so this is a change of control and
		# we note all destinations so that we can invalidate the registers
		# at that point.  More intensive branch analysis and register
		# bookkeeping can avoid having to do this in all cases but
		# beware loops/backward branches which cause problems.
		#
		$line[-1] =~ s/^0x//;
		$Branches{hex("0x".$line[-1])}++;
		print ">>> $line[-1]\n" if $DEBUG>3;
	} else {
		print ">>> Unused\n" if $DEBUG>4;
	}


	# reset last address value
	#
	$last_cv=$cv;
}
close(F);


# Print separator tag into the output file to show we've completed pass 1.
#
print "====\n";


# Second pass of text segment:
#   Keep track of subroutines, registers, and branches as required to
#   generate coherent subroutine calls with arguments.  And generate
#   output.


$drop_push_ebp=0;				# flag to ignore subroutine start push
$reloc_flag=0;					# relocation info present flag
@o_list = sort {$a <=> $b} keys %obj_list;	# sorted list of object file info

$flag=0;

open(F, "$ARGV[0]/text") || die $!;
while ($i=<F>) {

	# Same intro as previous loop.
	#
	if ( $i =~ /^$/ || $i =~ /^Disassembly/ ) {
		$flag=0;
		next;
	} elsif ( $i =~ /<.text>:/ || $i =~ /<\w+>:/ ) {
		$flag=1;
		next;
	} elsif ( !$flag ) {
		next;
	}


	# Third verse, same as the first...
	#
	chomp($i);
	$i =~ s/ <[\w+]+>$//;
	@line=split(/\s+/, $i);
	shift @line;
	$line[0] =~ s/:$//;
	$cv = hex("0x".$line[0]);


	# If this is a real subroutine, we need to ignore the push %ebp
	# as it isn't involved in a subroutine call.
	#
	if ( length($FNS{$cv}) && $FNS{$cv} ne "fake_localization_call" ) {
		print "\n",$FNS{$cv},":\n";
		$drop_push_ebp=1;

		if ( $NO_MATCH_DELIVERED_DEMOS ) { # Fixes registers not invalidated over end of subroutine
			undef @pargs;
			undef @p2args;
			undef %Regs;
		}
	}


	# If this address is the target of a branch, reset registers to
	# prevent incorrect answers and note that we have done so.
	#
	if ( $Branches{$cv} ) {
		undef @pargs;
		undef @p2args;
		undef %Regs;
		print "Branch-target\n";
	}


	# Check for object file relocation entry that applies to
	# this line and set flag if so.
	#
	if ( $#o_list > -1 ) {
		if ( $cv == $o_list[0] ) {
			$reloc_flag=1;
			shift @o_list;
			print "***   <<$obj_ref{$cv}>>   " if $DEBUG>5;
		} elsif ( $cv > $o_list[0] ) {
			print "ERROR: o_list mishandled $cv $o_list[0]\n";
		}
	}


	# This case statement is the heart of the matter.  This handles
	# each assembly instruction and maintains state, generating
	# whatever output is apropos.  Each line that is recognized
	# and processed has a '+' prepended when it is output.
	#
	# The heart of the heart is the if's that deal with call,
	# mov, and push as these instructions are the ones that
	# control function calls and argument setups. [cf refs. #4,#5]
	# For push, @pargs contains the arguments.  For mov,
	# @p2args contains the arguments.  There are a few cases
	# where additional registers are pushed onto the stack
	# and this can interfere with arguments to function calls
	# but a little more bookkeeping will make those rare cases
	# go away.
	#
	# There is a rare third case of the argument setup which
	# seems to be only in optimized code.  In this case
	# the arguments passed in are left on the stack and
	# implicitly referred to by function calls in the
	# subroutine.
	#
	# All code that references $reloc_flag is intended for
	# dealing with object files as this was out of scope
	# the support is enough to help see what is going on
	# but not sufficient for reliable use -- there
	# are a number of different segments that would have
	# to be supported for a more solid implementation.

	if ( $#line == 1 ) {

		# Single element -- nothing to do.  Usually a long instruction length.
		#
		print "+",$i,"\n";
	} elsif ( $line[-2] eq "push" ) {

		# Clear rodata flag and set up for relocation entry if one exists.
		#
		$rod=0;
		if ( $reloc_flag ) {
			$line[-1] =~ s/^\$//;
			$tmp=$line[-1];
			$line[-1] = "<<" . $obj_ref{$cv} . "+" . $tmp . ">>";
		}

		# If the argument of the push is an address, check to see if there is a
		# corresponding rodata string or if it is the address of a function
		# (such as for signal(3)).  Otherwise just print out the line
		# with relocation info.
		#
		if ( $line[-1] =~ /^\$/ ) {
			$line[-1] =~ s/^\$//;
			$a = hex($line[-1]);
			$str="";
			if ( $a >= ${ $SEC{".rodata"} }[0] && $a < ${ $SEC{".rodata"} }[1] && ${ $SEC{".rodata"} }[0] ) {
				$str = &get_rodata_str($a);
				$rod=1;
			} elsif ( $a >= ${ $SEC{".text"} }[0] && $a < ${ $SEC{".text"} }[1] && ${ $SEC{".text"} }[0] ) {
				$str = length($FNS{$a}) ? $FNS{$a} : "unknown_text_addr";
			} else {
				;	# Otherwise no xlation.
			}
			print "+",$i, length($str) ? "\t".$str : "" ,"\n";
		} else {
			print "+",$i, $reloc_flag ? "   reloc $line[-1]\n" : "\n";
		}

		# Transfer argument to stack.  If from an initial push %ebp, ignore it.
		# Otherwise put rodata strings on preferentially or just the register/value.
		#
		if ( $line[-1] eq "%ebp" && $drop_push_ebp ) {

			# Subroutine start, ignore
		} else {
			unshift(@pargs, $rod ? $str : $line[-1]);
		}
	} elsif ( $line[-2] eq "call" ) {

		# Set the called address.  Note the type of call and resolve the
		# called address if possible.
		#
		if ( $line[-1] =~ /\*/ ) {
			$addr = -1;
			print "+",$i,"\tindirect_call\n";
		} else {
			$addr = hex($line[-1]);
			print "+",$i,"\t",$FNS{$addr},"\n" if !$NO_MATCH_DELIVERED_DEMOS; # Old code
			if ( !length($FNS{$addr}) ) {
				if ( length($FNS{$PLT{$addr}}) ) {	# Check for new style lib calls
					$addr=$PLT{$addr};
				} else {
					print "!!! ERR: no sub name defined\n";
				}
			}
			print "+",$i,"\t",length($FNS{$addr}) ? $FNS{$addr} : "UNKNOWN","\n" if $NO_MATCH_DELIVERED_DEMOS; # PLT support
		}

		# If this is a real call (not a fake localization call), then handle the args.
		#
		if ( $i =~ /\*/ || $FNS{$addr} ne "fake_localization_call" ) {

			# Print the name of the function call or INDIR if indirect.
			#
			if ( -1 == $addr ) {
				$sstr="INDIR";
				print "INDIR";
			} else {
				$sstr=$FNS{$addr};
				print $FNS{$addr};
			}


			# Decide which calling mode is in use.  Favor mov's onto
			# the stack.  Print the arguments as we have them to go with
			# the function name just printed.  Then construct $sstr
			# for use in loading registers with function return
			# values.
			#
			if ( $#pargs > -1 && $#p2args == -1 ) {
				$x=$,; $,=' '; print "(", @pargs,  ")\n"; $,=$x;
				$sstr .= "(-" . join(", ", @pargs) .  ")";
			} else {

				# Rarely %eax is used as an implicit argument --
				# seems to depend on optimizer and compiler version.
				#
				if ( !length($p2args[0]) ) {
					if ( length($Regs{"eax"}) ) {
						$p2args[0] = "%eax:" . $Regs{"%eax"};
					} else {
						$p2args[0] = "";
					}
				}

				# Mark any empty spaces in the argument list --
				# will flag any missed/improperly processed
				# instructions.
				#
				for ($i=0; $i <= $#p2args; $i++) {
					$p2args[$i] = "<<undef>>" if !length($p2args[$i]);
				}

				$x=$,; $,=' '; print "(", @p2args, ")\n"; $,=$x;
				@pargs = @p2args;
				$sstr .= "(+" . join(", ", @pargs) .  ")";
			}

			# At this point we are prepared to handle a function call.
			# $FNS{$addr} contains the function name and @pargs contains
			# the arguments.  $sstr contains the function call and arguments
			# for use in registers.  What remains is knowing what to do for
			# each function name and how to massage the arguments to
			# get what we want.
			#
			# I'm not clear on how all of these map into ACLs for subdomain
			# so I've just decoded what I could and left it at that.

			if ( $FNS{$addr} eq "chdir" ) {

				# Print a warning since this can't be tracked w/o global
				# context.
				#
				print "EMIT: WARN: chdir global $pargs[0]\n";
			} elsif ( $FNS{$addr} eq "open" ) {

				# If filename points to register, see if that register
				# contains anything.
				#
				if ( $pargs[0] =~ /^%e..$/ && length($Regs{$pargs[0]}) ) {
					$pargs[0] = $Regs{$pargs[0]};
				}

				# Decode open mode flags if constant, $rw gets
				# read/write modes; $cf gets '+' for append.
				#
				$rw = "?"; $cf="";
				if ( $pargs[1] =~ /^0x/ ) {
					$tmp = hex($pargs[1]);
					if ( ($tmp & 3) == 0 ) {
						$rw="r";
					} elsif ( ($tmp & 3) == 1 ) {
						$rw="w";
					} elsif ( ($tmp & 3) == 2 ) {
						$rw="rw";
					}
					$cf="+" if ($tmp & 0x200);
				}
				print "EMIT: open $pargs[0] $rw $cf\n";
			} elsif ( $FNS{$addr} eq "fopen" ) {

				# fopen flags, while text, will need massaging to
				# put into the proper ACL format.
				#
				print "EMIT: fopen $pargs[0] $pargs[1]\n";
			} elsif ( $FNS{$addr} eq "opendir" ) {

				# Read on foo
				#
				print "EMIT: opendir $pargs[0]\n";
			} elsif ( $FNS{$addr} eq "creat" ) {

				# Write on foo
				#
				print "EMIT: creat $pargs[0] w\n";
			} elsif ( $FNS{$addr} eq "mkdir" ) {

				# ??? ACLs?
				#
				print "EMIT: mkdir $pargs[0]\n";
			} elsif ( $FNS{$addr} eq "unlink" ) {

				# ??? Write?
				#
				print "EMIT: unlink $pargs[0]\n";
			} elsif ( $FNS{$addr} eq "rename" ) {

				# Delete on old, write on new?
				#
				print "EMIT: rename [" . $sstr . "]\n";
			} elsif ( $FNS{$addr} eq "sprintf" ) {

				# In this case put results into corresponding register in case
				# it is used later on.
				#
				print "EMIT: sprintf [$sstr]\n";
				# put in reg for later use
				$Regs{$pargs[0]} = "[" . $sstr . "]";
			} elsif ( $FNS{$addr} eq "snprintf" ) {

				# In this case put results into corresponding register in case
				# it is used later on.
				#
				print "EMIT: snprintf [$sstr]\n";
				# put in reg for later use
				$Regs{$pargs[0]} = "[" . $sstr . "]";
			} elsif ( $FNS{$addr} eq "getenv" ) {

				# In this case put results into corresponding register in case
				# it is used later on.
				#
				$Regs{"%eax"} = "[" . $sstr . "]";
			} elsif ( $FNS{$addr} eq "localtime" ) {

				# Example 1 of a fixed library call.  Whenever localtime(3) is
				# called, an open on /etc/localtime will result.
				#
				print "EMIT: localtime \"/etc/localtime\" r\n";
			} elsif ( $FNS{$addr} eq "openlog" ) {

				# Example 2 of a fixed library call.  Whenever openlog(3) is
				# called, an open on /dev/log will result.  Similar things
				# happen for nameservice calls, *pwent, *grent, etc.
				#
				print "EMIT: openlog \"/dev/log\" r\n";
			} elsif ( $FNS{$addr} =~ /^exec(l|lp|le|v|vp)$/ ) {

        # we want to know about all the exec*() variants

				print "EMIT: $FNS{$addr} [$sstr]\n";
			} elsif ( $FNS{$addr} eq "system" ) {

        # system is important too

				print "EMIT: system [$sstr]\n";
			}
		}

		# Reset arguments since we just passed a call.
		#
		undef @pargs;
		undef @p2args;
	} elsif ( $line[-2] eq "mov" && ( $line[-1] eq "%esp,%ebp" || $line[-1] eq "%ebp,%esp" ) ) {

		# Begin / end of subroutine mov's
		#
		shift @pargs if !$NO_MATCH_DELIVERED_DEMOS; # Bug in previous.  This is handled elsewhere
		print "+",$i,"\n";
	} elsif ( $line[-2] =~ /^mov/ && $line[-1] =~ /^(.*),(%e..)$/ ) {

		# Save source, destination register from operands.
		#
		$str="";
		$x=$1;
		$y=$2;

		# Check for $x being a local variable that we are tracking.
		# If so, use that value to set the destination register.
		# Then check for funky indirection modes and clear them out.
		# Otherwise, check for relocation information and possible
		# rodata string else use the values given.
		# We set the "set time" of the register for use elsewhere
		# for register indirection (cf #1#).
		#
		if ( $x =~ /^0xf[0-9a-f]+\(%ebp\)$/ && length($Regs{$x})) {
			$Regs{$y} = $Regs{$x};
		} elsif ( $x =~ /\(.*\),/ ) { # funky indirection modes
			$Regs{$y} = $x;
		} else {
			if ( $reloc_flag ) {
				$x =~ s/^$//;
				if ( $obj_ref{$cv} eq ".rodata" ) {
					$str = &get_rodata_str(hex($x));
					$Regs{$y} = $str;
					$str = "\t".$str;
				} else {
					$tmp = "<<" . $obj_ref{$cv} . "+" . $x . ">>";
					$Regs{$y} = $tmp;
				}
			} else {
				if ( $x =~ /^\$/ ) {
					$x =~ s/^\$//;
					$a = hex($x);
					my $str="";
					if ( $a >= ${ $SEC{".rodata"} }[0] && $a < ${ $SEC{".rodata"} }[1] && ${ $SEC{".rodata"} }[0] ) {
						$str = &get_rodata_str($a);
						$Regs{$y} = $str;
					} else {
						$Regs{$y} = $x;
					}
				} else {
					$Regs{$y} = $x;
				}
			}
		}
		$Seta{$y} = $cv;

		# Handle relocation information if any.  If we were able to resolve
		# a string, print it.  Otherwise, note reloc info.
		# Just mark the line as handled otherwise.
		#
		if ( $reloc_flag ) {
			if ( length($str) ) {
				print "+",$i,$str,"\n";
			} else {
				print "+",$i,"   reloc <<$obj_ref{$cv}>>\n";
			}
		} else {
			print "+",$i,"\n";
		}
		print "== $y = $Regs{$y}\n";
	} elsif ( $line[-2] =~ /^mov/ && $line[-1] =~ /\(%esp(,1)?\)/ ) {

		# Extract offset and operand from a statement like:
		#    mov    %eax,0x8(%esp,1)
		# $x gets the 1st operand, $y gets the offset into the argument list.
		#
		$line[-1] =~ /^(.*),(0x.*)?\(%esp(,1)?\)$/;
		$x=$1; $y=hex($2)/4;

		if ( $x =~ /^\$/ ) {

			# If the operand is a value, check for rodata string.
			# Otherwise just put the literal value in the argument list.
			#
			$x =~ s/^\$//;
			$a = hex($x);
			if ( $a>= ${ $SEC{".rodata"} }[0] && $a< ${ $SEC{".rodata"} }[1] ) {
				$str = &get_rodata_str($a);
				print "+",$i,"\t",$str,"\n";
				print "[",$y,"]=",$str,"\n";
				$p2args[$y] = $str;
			} else {
				print "+",$i,"\n";
				print "[",$y,"]=",$x,"\n";
				$p2args[$y] = $x;
			}
		} elsif ( $x =~ /^%/ ) {

			# If a register was set to the value of another register
			# *and* that register has not been modified since, then
			# substitute the second register's contents for the first.
			# Otherwise just put the register name. #1#
			#
			print "+",$i,"\n";
			if ( length($Regs{$x}) && $Seta{$x} > $Seta{$Regs{$x}} ) {
				print "[",$y,"]=",$x," ==> ", $Regs{$x},"\n";
				$p2args[$y] = $Regs{$x};
			} else {
				print "[",$y,"]=",$x,"\n";
				$p2args[$y] = $x;
			}
		} else {

			# Wasn't a format we deal with, so mark the line as unprocessed.
			#
			print "X",$i,"\n";
		}
	} elsif ( $line[-2] eq "pushl" ) {

		# Push long (64-bit) quantity.
		#
		if ( $reloc_flag ) {
			$line[-1] =~ s/^\$//;
			$tmp=$line[-1];
			$line[-1] = "<<" . $obj_ref{$cv} . "+" . $tmp . ">>";
			print "+",$i,"   reloc $line[-1]\n";
		} else {
			print "+",$i,"\n";
		}
		unshift(@pargs, "L:".$line[-1]);
	} elsif ( $line[-2] eq "add" || $line[-2] eq "sub" || $line[-2] eq "and" || $line[-2] eq "ror" ) {

		# Not implementing these opcodes.  Invalidate affected register.
		#
		$line[-1] =~ /^.*,(%e..)$/;
		$Regs{$1} = undef;
		print "+",$i, $reloc_flag ? "   reloc <<$obj_ref{$cv}>>\n" : "\n";
	} elsif ( $line[-2] =~ /^mov[lwb]?$/ ) {

		# We only care about (%esp,1), (%esp), and register versions of mov opcodes
		# and those are handled above.
		#
		print "+",$i, $reloc_flag ? "   reloc <<$obj_ref{$cv}>>\n" : "\n";
	} elsif ( $line[-2] =~ /^j/ ) {

		# Branch/change of control.  Reset regs and stack.
		#
		print "+",$i,"\n";
		undef @pargs;
		undef @p2args;
		undef %Regs;
	} elsif ( $line[-2] =~ /^test[bwl]?$/ || $line[-2] =~ /^cmp[bwl]?$/ ) {

		# Just ignore.  Nothing to do with comparison opcodes.
		# Print relocation info if present.
		#
		print "+",$i, $reloc_flag ? "   reloc <<$obj_ref{$cv}>>\n" : "\n";
	} elsif ( $line[-3] eq "repz" || $line[-3] eq "repnz" || $line[-2] eq "setne" ) {

		# Moving loop opcodes.  Probably should invalidate related registers
		# and not depend on the compiler to reinitialize them.
		#
		print "+",$i,"\n";
	} elsif ( $line[-2] eq "pop" && $line[-1] eq "%ebp" ) {

		# Just ignore.  End of subroutine cruft.
		#
		print "+",$i,"\n";
	} elsif ( $line[-2] eq "lea" && ( $line[-1] =~ /,(%e..)$/ || $line[-1] =~ /[sd]i.*[sd]i/ ) ) {

		# Just ignore these as they are filler between subroutines.
		#
		$Regs{$1} = undef if length($1);
		print "+",$i,"\n";
	} elsif ( $line[-2] eq "xor" && $line[-1] =~ /^(%e..),\1$/ ) {

		# xor-ing a register with itself is a shorthand for setting it to zero.
		#
		$Regs{$1} = 0;
		print "+",$i,"\n";
	} elsif ( $line[-2] eq "inc" || $line[-2] eq "dec" || $line[-2] eq "not" || $line[-2] eq "neg" ) {

		# Not implementing these opcodes; just invalidate register contents.
		#
		$Regs{$line[-1]} = undef;
		print "+",$i,"\n";
	} elsif ( $line[-1] eq "hlt"   ||
		  $line[-1] eq "nop"   ||
		  $line[-1] eq "leave" ||
		  $line[-1] eq "ret"   ) {

			# These are all control flow related opcodes; all ignored.
			#
			print "+",$i,"\n";
	} elsif ( $line[-2] eq "pop" ) {

		# Currently not used to manipulate the stack.
		# Stack is dumped after each "call".
		#
		print "+",$i,"\n";
	} else {

		# This line is not recognized nor processed.
		#
		print $i,"\n";
	}


	# Reset relocation info flag for next pass
	#
	$reloc_flag=0;
}
close(F);


# All done
#
exit 0;


#=========================================================================
#
# Subroutines:


# get_rodata_str:
#   Takes a decimal address and returns the best representation
#   of data at that address in the %RODATA hash as a string.
#   String is returned in quotes and with most metacharacters
#   replaced with '?'
#
sub get_rodata_str () {
	my($addr) = @_;
	my($str);

	$str = "\"";

	while ( $RODATA{$addr} ) {
		if ( $RODATA{$addr} > 31 && $RODATA{$addr} < 127 ) {
			$str .= sprintf "%c", $RODATA{$addr};
		} elsif ( 10 == $RODATA{$addr} ) {
			$str .= "\\n";
		} elsif (  9 == $RODATA{$addr} ) {
			$str .= "\\t";
		} else {
			$str .= "?";
		}
		$addr++;
	}

	$str .= "\"";

	return $str;
}


#=========================================================================
#
# References:
#
# Ref. #1:
# User subroutines always start begin with "push %ebp".  If this
# changes, then a number of modifications of this script will be
# required.  For example:
#
# 8049864:      55                      push   %ebp
#
#
#
# Ref. #2:
#
# This is the typical start() routine.  Note the last item
# pushed on the stack is the address of main().
#
# 8049840:      31 ed                   xor    %ebp,%ebp
# 8049842:      5e                      pop    %esi
# 8049843:      89 e1                   mov    %esp,%ecx
# 8049845:      83 e4 f0                and    $0xfffffff0,%esp
# 8049848:      50                      push   %eax
# 8049849:      54                      push   %esp
# 804984a:      52                      push   %edx
# 804984b:      68 50 d9 04 08          push   $0x804d950       sub_75
# 8049850:      68 20 d9 04 08          push   $0x804d920       sub_74
# 8049855:      51                      push   %ecx
# 8049856:      56                      push   %esi
# 8049857:      68 10 b2 04 08          push   $0x804b210       main
# 804985c:      e8 4f fd ff ff          call   0x80495b0        __libc_start_main
# __libc_start_main( 0x804b210 %esi %ecx 0x804d920 0x804d950 %edx %esp %eax )
# 8049861:      f4                      hlt
# 8049862:      90                      nop
# 8049863:      90                      nop
#
#
#
# Ref. #3:
# A fake call used for getting the current execution address (PC).
# It is a call to relative address 0 (next instruction) and then
# the return address is popped off the stack for manipulation.
# I've only seen this in system code.  For example:
#
# 8049869:      e8 00 00 00 00          call   0x804986e        fake_localization_call
# 804986e:      5b                      pop    %ebx
# 804986f:      81 c3 e6 59 00 00       add    $0x59e6,%ebx
# 8049875:      8b 83 84 01 00 00       mov    0x184(%ebx),%eax
#
#
#
# Ref. #4:
# Calling sequence based on mov.  In this case operands are put in the stack
# by moving them to 0xXX(%esp,1) where 0xXX is 4 time the parameter number,
# e.g., 0x0 is first, 0x4 is second, etc.  Variants include just (%esp).
#
# 8049bb6:      c7 44 24 04 bd dd 04    movl   $0x804ddbd,0x4(%esp,1)   "%s"
# 8049bbd:      08
# 8049bbe:      8b 45 08                mov    0x8(%ebp),%eax
# 8049bc1:      c7 04 24 03 00 00 00    movl   $0x3,(%esp,1)
# 8049bc8:      89 44 24 08             mov    %eax,0x8(%esp,1)
# 8049bcc:      e8 bf f8 ff ff          call   0x8049490        syslog
# syslog( 0x3 "%s" 0x8(%ebp) )
#
#
#
# Ref. #5:
# Calling sequence based on push.  In this case operands are put on the stack in
# reverse order for the call.  For example:
#
# 804b286:      68 aa 53 0c 08          push   $0x80c53aa       "none"
# 804b28b:      a1 60 b4 21 08          mov    0x821b460,%eax
# 804b290:      50                      push   %eax
# 804b291:      e8 d6 fb ff ff          call   804ae6c  strcasecmp
# strcasecmp( %eax "none" )