#!/bin/sh
# give a stream of just text from ECI files
# Usage:  textonly [-b] [-e[corp-code]] files...
# With -e, will substitute &, < and ' for &lab;, &lab; and &sq respectively and
#   will replace &#nnn; with the appropriate character (i.e. &#27 -> <ESC>).
# With -ecorp-code, will attempt to find further entity definititions
#  from corp-code.ent and substitute them as well.
# With -b, will include blank lines, at a very slight time penalty,
#  otherwise will not
if [ ! "$ECI_ROOT" ]
 then
  echo "Must set ECI_ROOT before using!" 1>&2
  exit 10
fi
grep=${ECI_GREP:-egrep}
# the horrendous inclusion of the explicit <cr> in the pattern below is because
# of a bug? in vanilla egrep, which matches empty lines with e.g. ^[^<]
case $grep in
 *egrep*) doe () { $grep -h "^[^
<]" $@ ; } ; dobe () { $grep -h "^([^<]|$)" $@ ; } ;;
 *) doe () { $grep -h "^[^<]" $@ ; } ; dobe () { $grep -hv "^<" $@ ; } ;;
esac
prog=doe
case "$1" in
  -[xXhH?]*)
   echo "Usage: textonly [-b] [-e[corp-code]] files..."
   echo " With -e, will substitute &, < and ' for &lab;, &lab; and &sq respectively"
   echo " With -ecorp-code, will attempt to find further entity definititions"
   echo "  from corp-code.ent and substitute them as well."
   echo " With -b, will always include blank lines, otherwise not"
   exit 1 ;;
  -b)
    prog=dobe
    shift ;;
esac
case "$1" in
  -e)
   shift
# note that informal tests suggested that egrep was the fastest of the widely
# available egrep, fgrep and grep, and that this version of the test is faster
# than doing -v "^<".
# HOWEVER, the GNU egrep is faster still -- if you have it installed, set
# the environment variable ECI_GREP to its name.
   $prog $@ | \
    perl -pe '
if (/&/) {
    s/&amp([^-.a-zA-Z0-9])/"&" . (($1 eq ";")?"":$1)/eg;
    s/&lab([^-.a-zA-Z0-9])/"<" . (($1 eq ";")?"":$1)/eg;
    s/&sq([^-.a-zA-Z0-9])/"'\''" . (($1 eq ";")?"":$1)/eg;
    s/&#([0-9]+);?/sprintf("%c",$1)/ge;
}' ;;
  -e*)
   eflg=$1
   shift
   $prog $@ | perl $ECI_ROOT/src/perl/substent.pl $eflg ;;
  -*)
    echo "Usage: textonly [-b] [-e[corp-code]] files..." 1>&2
    echo " or textonly -? for more information" 1>&2
    exit 2 ;;
  *)
    $prog $@ ;;
esac
