# perl script for substituting entity definitions into text stream
($code=shift)=~s/^-e//;
die "$code not a valid corpus code" unless ($code=~/^(...)(..)(.?)$/);
$pat=$ENV{"ECI_ROOT"} . "/data/eci?/$1$2/$1$2";
die "Can't find $code.ent"
    unless ($fn=<$pat.ent>);
die "Can't open $fn: $!" unless open(FILE,$fn);
$prog='
while (<STDIN>) {
if (/&/) {
    s/&amp([^-.a-zA-Z0-9])/"&" . (($1 eq ";")?"":$1)/eg;
    s/&lab([^-.a-zA-Z0-9])/"<" . (($1 eq ";")?"":$1)/eg;
    s/&sq([^-.a-zA-Z0-9])/"\'" . (($1 eq ";")?"":$1)/eg;
};
if (/&/) {
';
$pl1="s/&";
$pl2='([^-.a-zA-Z0-9])/"';
$pl3='" . (($1 eq ";")?"":$1)/eg;';
while (<FILE>) {
    # look for general entity declarations
    if (/<!ENTITY +([^% ]+) +(['"])/) { #'])/){
	$name=$1;
	$qt=$2;
	unless ($name=~/(corp|comp|$code|copyright)/) { # rule out structural ones
	    die "can't parse entity definition: $_"
		unless (($val)=/<!ENTITY.*$qt([^$qt]*)$qt/);
	    unless ($val=~/&/) { # rule out tricky ones
		$prog.="$pl1$name$pl2$val$pl3\n";
	    };
	};
    };
};
# print STDERR $prog;
eval "$prog
s/&#([0-9]+);?/sprintf('%c',\$1)/ge;
};
print;
};
";
