# Assembled with the script 'compile.pss' 
start:
 
#   translate.tcl.pss 
#
#   This is a parse-script which translates parse-scripts into tcl
#   code, using the 'pep' tool. The script creates a standalone 
#   tcl program
#   
#   The virtual machine and engine is implemented in plain c at
#   http://bumble.sf.net/books/pars/pep.c. This implements a script
#   language with a syntax reminiscent of sed and awk (much simpler than
#   awk, but more complex than sed).
#   
#   This code was originally created by adapting the code in
#   'translate.java.pss' which compiles scripts to java
#
#STATUS
#
#   7 sept 2022
#     Most short tests working. 
#
#NOTES
#   
#   Tcl doesn't require a semicolon ; at the end of a 1 line
#   statement, but I will include it, in case I want to put more 
#   than one statement on a line. also for comments
#
#   In other translation scripts, we use labelled loops and
#   break/continue to implement the parse> label and .reparse .restart
#   commands. But tcl does not have these, nor a "goto" statement.
#   This is resolved using "run once" blocks and break and continue
#   statements: eg while true { read; break; }. The run once technique
#   is a better general solution and could be used in all the other
#   translation scripts (because it doesnt require goto or labelled
#   blocks)
#
#TODO
#
#   writefile; writefile "name"; readfile; readfile "name";
#   The version without a parameter gets the filename from the 
#   current tape cell.
#
#   Consolidate error checking after parse> label
#   Add script* token on succesfull parse at EOF
#
#   Add "until;"? (no-argument until). Add the "write <filename>;"
#   and "append <filename>;" commands.
#
#   Convert to a parse method, which will also allow this to 
#   act as an immediate interpreter of pep/nom scripts.
#   See tr/translate.perl.pss for a good example of a parse 
#   method, that can act as an interpreter.
#
#   Convert the grammar to the simpler version used in translate.perl.pss
#   (which also allows test-expressions like 
#     >> (B"xx",B"yy").!E"zz" { ... }
#
#SEE ALSO
#   
#   At http://bumble.sf.net/books/pars/
#   
#   d/- eg/: some example pep/nom scripts
#     - tr/: 
#       the folder contains other translation scripts. The scripts
#       for python/ruby/java/go/javascript/tcl etc are quite complete.
#     - translate.py.pss
#      A python translator in working order (june 2021).
#     - compile.pss
#     compiles a script into an "assembly" format that can be loaded
#     and run on the parse-machine with the -a  switch. This performs
#     the same function as "asm.pp" 
#
#TESTING
#
#  Comprehensive 1st and 2nd generation tests can be done with 
#  >> pep.tt tcl
#
#  This script tests every command in the pep/nom language with a 
#  simple script and tests its translation into TCL.
#
#  Also, use the helper functions in the peprc helpers.pars.sh such as
#  pep.ts pep.tss pep.tf and pep.tff
#
#  Remember "man 3tcl command" for documentation about tcl/tk on
#  Unix-like systems. I would like to make a similar doc system for
#  pep. Could split the pars-book.txt file into "man" pages for 
#  each pep command.
#
#   * a simple test procedure, working
#   ---------
#    pep -f translate.tcl.pss -i "r;t;t;d;" > test.tcl
#    chmod a+x test.tcl
#    echo "abc" | tclsh test.tcl # or
#    echo "abc" | ./test.tcl 
#    # should print 'aabbcc'
#   ,,,
#
#   Check multiline text with 'add' and 'until'
#
#   * one comprehensive test is to run the script on itself
#   >> pep -f translate.tcl.pss translate.tcl.pss > tran.tcl 
#
#   This is the "shangrilah" of pep scripts.
#
#   test 
#   -----
#     pep -f translate.tcl.pss translate.tcl.pss > eg/tcl/translate.tcl.tcl 
#     chmod a+x eg/tcl/translate.tcl.tcl 
#     echo "nop;r;t;t;d;" | eg/tcl/translate.tcl.tcl 
#   ,,,,
#
#   test eg/natural.language.pss 
#
#  * translate the doc formatter into tcl 
#  ----
#    pep -f translate.tcl.pss eg/mark.latex.pss > eg/tcl/mark.latex.tcl
#    cat pars-book.txt | eg/tcl/mark.latex.tcl 
#  ,,,,
#
#  * translate the translator into tcl 
#   -----
#     pep -f translate.tcl.pss translate.tcl.pss > test.tcl
#     cat eg/exp.tolisp.pss | ./test.tcl > exp.tolisp.tr.tcl 
#     echo "(a+2)*3+4" | ./exp.tolisp.tr.tcl 
#   ,,,
#
#   This is fairly complex. The script translates itself into
#   tcl, and then that translator is used to translate 
#   another script into tcl, which is then executed....
#
#   self referentiality cubed, not working yet
#   -----
#     pep -f translate.tcl.pss translate.tcl.pss > 1.tr.tcl.pss
#     chmod a+x 1.tr.tcl.pss
#     cat translate.tcl.pss | 1.tr.tcl.pss > 2.tr.tcl.pss
#     chmod a+x 2.tr.tcl.pss 
#     cat eg/exp.tolisp.pss | 2.tr.tcl.pss > test.tcl 
#     chmod a+x test.tcl 
#     echo "(a+2)*3+4" | ./test.tcl 
#   ,,,
#
#BUGS
#
# image format not working in eg/mark.latex.pss eg [[ ... ]]
#
# In second gen scripts \\ causes problems
#
# No-argument until not implemented here.
#
# unescape needs to walk the string, not just do \c -> c  
#
# check until code for multiple escapes \\\\\\
# parse> label cannot be at beginning or end of script.
#
#SOLVED BUGS TO WATCH FOR 
#
#  Need to escape *[]\? in begin and ends tests because this
#  uses string match.
#
#  In quotes interpolation will occur which is not good. We need to
#  stop it by escaping certain chars in the quote string.
#
#  With these scripting languages, arrays elements dont exist
#  unless created. so "++; put; " fails with a list index out 
#  of range error.
#
#  Need to double \\ backslash sometimes to get a single one in the output
#
#  found and fixed a bug in java whilenot/while. The code exits if the 
#  character is not found, which is not correct.
#
#  "until" bug where the code did not read at least one character.
#
#  Read must exit if at end of stream, but while/whilenot/until, no.
#
#TASKS 
#
#HISTORY
#    
#  18 feb 2022
#    Added go; syntax
#  13 aug 2022
#    One year later, fixing some remaining bugs. Quoting of special
#    chars in "string match" and regex is not easy in tcl. 
#    When running eg/mark.latex.pss for some reason
#    "bl*uutext*4dots*nl*" does not reduce...
#  20 aug 2021
#    fixing escape code. Unescape still doesnt seem to work.
#  18 august 2021
#    seem to have fixed multiple escape bug.
#  18 june 2021
#    fixing the .restart bug before the parse label. just making .restart
#    do 'break;' instead of continue (which results in an infinite loop)
#    Added upper,lower and cap commands to this translator script.
#
#  7 june 2021
#    Reexamining. Escaped { and } in classes. Made an eg/tcl/ folder
#    where generated code can go. Made the pep.tcs bash function 
#    better for testing
#
#  7 august 2020
#    the eg/exp.tolisp.pss seems to be working under translation
#    to tcl.
#
#    testing and debugging. Init procedure.
#    reconfigured .reparse .restart as unlabelled run-once 
#    blocks. This is useful for any language that has no "goto"
#    and no labelled blocks (loops etc). Script is nearing a 
#    useable stage.
#
#  5 august 2020
#
#    tested print/clear/clop/clip/push/pop/stack/unstack
#      put/get/swap/ ...
#
#    First most basic script is working: "r;t;t;d;"
#
#  2 august 2020
#
#    Began to adapt this script from translate.py.pss, the 
#    python translator 
#
#
read
#--------------
testclass [:space:]
jumpfalse block.end.7134
  clear
  jump parse
block.end.7134:
#---------------
# We can ellide all these single character tests, because
# the stack token is just the character itself with a *
# Braces {} are used for blocks of commands, ',' and '.' for concatenating
# tests with OR or AND logic. 'B' and 'E' for begin and end
# tests, '!' is used for negation, ';' is used to terminate a 
# command.
testis "{"
jumptrue 16
testis "}"
jumptrue 14
testis ";"
jumptrue 12
testis ","
jumptrue 10
testis "."
jumptrue 8
testis "!"
jumptrue 6
testis "B"
jumptrue 4
testis "E"
jumptrue 2 
jump block.end.7570
  put
  add "*"
  push
  jump parse
block.end.7570:
#---------------
# format: "text"
testis "\""
jumpfalse block.end.8271
  # save the start line number (for error messages) in case 
  # there is no terminating quote character.
  clear
  add "line "
  ll
  add " (character "
  cc
  add ") "
  # safer to use braces for quotes??
  put
  clear
  add "\""
  until "\""
  testends "\""
  jumptrue block.end.8004
    clear
    add "Unterminated quote character (\") starting at "
    get
    add " !\n"
    print
    quit
  block.end.8004:
  # just escape [ and $ because they cause string interpolation
  # in tcl
  clip
  escape "["
  escape "]"
  escape "$"
  escape "{"
  escape "}"
  escape "*"
  escape "^"
  escape "?"
  add "\""
  put
  clear
  add "quote*"
  push
  jump parse
block.end.8271:
#---------------
# format: 'text', single quotes are converted to double quotes
# but we must escape embedded double quotes.
testis "'"
jumpfalse block.end.9077
  # save the start line number (for error messages) in case 
  # there is no terminating quote character.
  clear
  add "line "
  ll
  add " (character "
  cc
  add ") "
  put
  clear
  until "'"
  testends "'"
  jumptrue block.end.8735
    clear
    add "Unterminated quote (') starting at "
    get
    add "!\n"
    print
    quit
  block.end.8735:
  clip
  # [ and $ cause interpolation in tcl so must be escaped
  # escape other special chars. What about "\\"???
  escape "["
  escape "]"
  escape "$"
  escape "{"
  escape "}"
  escape "\""
  escape "*"
  escape "^"
  escape "?"
  put
  clear
  add "\""
  get
  add "\""
  put
  clear
  add "quote*"
  push
  jump parse
block.end.9077:
#---------------
# formats: [:space:] [a-z] [abcd] [:alpha:] etc 
# should class tests really be multiline??!
testis "["
jumpfalse block.end.12684
  # save the start line number (for error messages) in case 
  # there is no terminating bracket character.
  clear
  add "line "
  ll
  add " (character "
  cc
  add ") "
  put
  clear
  add "["
  until "]"
  testis "[]"
  jumpfalse block.end.9601
    clear
    add "pep script error at line "
    ll
    add " (character "
    cc
    add "): \n"
    add "  empty character class [] \n"
    print
    quit
  block.end.9601:
  testends "]"
  jumptrue block.end.9888
    clear
    add "Unterminated class text ([...]) starting at "
    get
    add "\n"
    add "      class text can be used in tests or with the 'while' and \n"
    add "      'whilenot' commands. For example: \n"
    add "        [:alpha:] { while [:alpha:]; print; clear; }\n"
    add "      "
    print
    quit
  block.end.9888:
  # need to escape { and } so they dont interfere with the
  # quote braces used for regexp. Dont need to escape [ and ] because 
  # the script writer has to escape them any way. (but what about in quotes?)
  escape "}"
  escape "{"
  # the caret is not a negation operator in pep scripts
  escape "^"
  # the line below also works
  #replace "^" "\\^";
  # save the class on the tape
  put
  clop
  clop
  testbegins "-"
  jumptrue block.end.10442
    # not a range class, eg [a-z] so need to escape '-' chars. yes
    clear
    get
    escape "-"
    put
  block.end.10442:
  testbegins "-"
  jumpfalse block.end.10830
    # a range class, eg [a-z], check if it is correct
    clip
    clip
    testis "-"
    jumptrue block.end.10824
      clear
      add "Error in pep script at line "
      ll
      add " (character "
      cc
      add "): \n"
      add " Incorrect character range class "
      get
      add "\n"
      add "   For example:\n"
      add "     [a-g]  # correct\n"
      add "     [f-gh] # error! \n"
      print
      clear
      quit
    block.end.10824:
  block.end.10830:
  clear
  get
  # restore class text
  testbegins "[:"
  jumpfalse 3
  testends ":]"
  jumpfalse 2 
  jump block.end.10995
    clear
    add "malformed character class starting at "
    get
    add "!\n"
    print
    quit
  block.end.10995:
  testbegins "[:"
  jumpfalse 3
  testis "[:]"
  jumpfalse 2 
  jump block.end.11826
    clip
    clip
    clop
    clop
    # unicode posix character classes in tcl
    # Also, abbreviations (not implemented in gh.c yet.)
    # abbreviations are written [:A:] etc
    testis "alnum"
    jumptrue 4
    testis "N"
    jumptrue 2 
    jump block.end.11242
      clear
      add "[[:alnum:]]"
    block.end.11242:
    testis "alpha"
    jumptrue 4
    testis "A"
    jumptrue 2 
    jump block.end.11290
      clear
      add "[[:alpha:]]"
    block.end.11290:
    testis "ascii"
    jumptrue 4
    testis "I"
    jumptrue 2 
    jump block.end.11338
      clear
      add "[[:ascii:]]"
    block.end.11338:
    testis "blank"
    jumptrue 4
    testis "B"
    jumptrue 2 
    jump block.end.11386
      clear
      add "[[:blank:]]"
    block.end.11386:
    testis "cntrl"
    jumptrue 4
    testis "C"
    jumptrue 2 
    jump block.end.11434
      clear
      add "[[:cntrl:]]"
    block.end.11434:
    testis "digit"
    jumptrue 4
    testis "D"
    jumptrue 2 
    jump block.end.11482
      clear
      add "[[:digit:]]"
    block.end.11482:
    testis "graph"
    jumptrue 4
    testis "G"
    jumptrue 2 
    jump block.end.11530
      clear
      add "[[:graph:]]"
    block.end.11530:
    testis "lower"
    jumptrue 4
    testis "L"
    jumptrue 2 
    jump block.end.11578
      clear
      add "[[:lower:]]"
    block.end.11578:
    testis "print"
    jumptrue 4
    testis "P"
    jumptrue 2 
    jump block.end.11626
      clear
      add "[[:print:]]"
    block.end.11626:
    testis "punct"
    jumptrue 4
    testis "T"
    jumptrue 2 
    jump block.end.11674
      clear
      add "[[:punct:]]"
    block.end.11674:
    testis "space"
    jumptrue 4
    testis "S"
    jumptrue 2 
    jump block.end.11722
      clear
      add "[[:space:]]"
    block.end.11722:
    testis "upper"
    jumptrue 4
    testis "U"
    jumptrue 2 
    jump block.end.11770
      clear
      add "[[:upper:]]"
    block.end.11770:
    testis "xdigit"
    jumptrue 4
    testis "X"
    jumptrue 2 
    jump block.end.11820
      clear
      add "[[:xdigit:]]"
    block.end.11820:
  block.end.11826:
  
  #     alnum - alphanumeric like [0-9a-zA-Z] 
  #     alpha - alphabetic like [a-zA-Z] 
  #     blank - blank chars, space and tab 
  #     cntrl - control chars, ascii 000 to 037 and 177 (del) 
  #     digit - digits 0-9 
  #     graph - graphical chars same as :alnum: and :punct: 
  #     lower - lower case letters [a-z] 
  #     print - printable chars ie :graph: + space 
  #     punct - punctuation ie !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~. 
  #     space - all whitespace, eg \n\r\t vert tab, space, \f 
  #     upper - upper case letters [A-Z] 
  #     xdigit - hexadecimal digit ie [0-9a-fA-F] 
  #    
  put
  clear
  # add quotes around the class and limits around the 
  # class so it can be used with the string.matches() method
  # (must match the whole string, not just one character)
  add "^"
  get
  add "+$"
  put
  clear
  add "class*"
  push
  jump parse
block.end.12684:
#---------------
# formats: (eof) (EOF) (==) etc. 
testis "("
jumpfalse block.end.13155
  clear
  until ")"
  clip
  put
  testis "eof"
  jumptrue 4
  testis "EOF"
  jumptrue 2 
  jump block.end.12838
    clear
    add "eof*"
    push
    jump parse
  block.end.12838:
  testis "=="
  jumpfalse block.end.12891
    clear
    add "tapetest*"
    push
    jump parse
  block.end.12891:
  add " << unknown test near line "
  ll
  add " of script.\n"
  add " bracket () tests are \n"
  add "   (eof) test if end of stream reached. \n"
  add "   (==)  test if workspace is same as current tape cell \n"
  print
  clear
  quit
block.end.13155:
#---------------
# multiline and single line comments, eg #... and #* ... *#
testis "#"
jumpfalse block.end.14652
  clear
  read
  testis "\n"
  jumpfalse block.end.13291
    clear
    jump parse
  block.end.13291:
  # checking for multiline comments of the form "#* \n\n\n *#"
  # these are just ignored at the moment (deleted) 
  testis "*"
  jumpfalse block.end.14196
    # save the line number for possible error message later
    clear
    ll
    put
    clear
    until "*#"
    testends "*#"
    jumpfalse block.end.13940
      # no, need to convert to multiple single line comments
      # or a if {0} { } block for tcl
      clip
      clip
      put
      clear
      add "if {0} {"
      get
      add "}"
      # create a "comment" parse token
      put
      clear
      # comment-out this line to remove multiline comments from the 
      # compiled code 
      # add "comment*"; push; 
      jump parse
    block.end.13940:
    # make an unterminated multiline comment an error
    # to ease debugging of scripts.
    clear
    add "Unterminated multiline comment #* ... *# \n"
    add "starting at line number "
    get
    add "\n"
    print
    clear
    quit
  block.end.14196:
  # single line comments. some will get lost.
  put
  clear
  add "#"
  get
  until "\n"
  clip
  # escape special chars for tcl, since it looks for 
  # these even in comments! (I know, strange but true)
  escape "["
  escape "]"
  escape "$"
  escape "{"
  escape "}"
  put
  clear
  # I am removing comments from translated scripts because
  # tcl doesnt like unbalanced braces in comments!
  add "comment*"
  push
  jump parse
block.end.14652:
#----------------------------------
# parse command words (and abbreviations)
# legal characters for keywords (commands)
testclass [abcdefghijklmnopqrstuvwxyzBEKGPRUWS+-<>0^]
jumptrue block.end.15039
  # error message about a misplaced character
  put
  clear
  add "!! Misplaced character '"
  get
  add "' in script near line "
  ll
  add " (character "
  cc
  add ") \n"
  print
  clear
  quit
block.end.15039:
# my testclass implementation cannot handle complex lists
# eg [a-z+-] this is why I have to write out the whole alphabet
while [abcdefghijklmnopqrstuvwxyzBEOFKGPRUWS+-<>0^]
#----------------------------------
# KEYWORDS 
# here we can test for all the keywords (command words) and their
# abbreviated one letter versions (eg: clip k, clop K etc). Then
# we can print an error message and abort if the word is not a 
# legal keyword for the parse-edit language
# make ll an alias for "lines" and cc an alias for chars
testis "ll"
jumpfalse block.end.15623
  clear
  add "lines"
block.end.15623:
testis "cc"
jumpfalse block.end.15655
  clear
  add "chars"
block.end.15655:
# one letter command abbreviations
testis "a"
jumpfalse block.end.15722
  clear
  add "add"
block.end.15722:
testis "k"
jumpfalse block.end.15752
  clear
  add "clip"
block.end.15752:
testis "K"
jumpfalse block.end.15782
  clear
  add "clop"
block.end.15782:
testis "D"
jumpfalse block.end.15815
  clear
  add "replace"
block.end.15815:
testis "d"
jumpfalse block.end.15846
  clear
  add "clear"
block.end.15846:
testis "t"
jumpfalse block.end.15877
  clear
  add "print"
block.end.15877:
testis "p"
jumpfalse block.end.15906
  clear
  add "pop"
block.end.15906:
testis "P"
jumpfalse block.end.15936
  clear
  add "push"
block.end.15936:
testis "u"
jumpfalse block.end.15969
  clear
  add "unstack"
block.end.15969:
testis "U"
jumpfalse block.end.16000
  clear
  add "stack"
block.end.16000:
testis "G"
jumpfalse block.end.16029
  clear
  add "put"
block.end.16029:
testis "g"
jumpfalse block.end.16058
  clear
  add "get"
block.end.16058:
testis "x"
jumpfalse block.end.16088
  clear
  add "swap"
block.end.16088:
testis ">"
jumpfalse block.end.16116
  clear
  add "++"
block.end.16116:
testis "<"
jumpfalse block.end.16144
  clear
  add "--"
block.end.16144:
testis "m"
jumpfalse block.end.16174
  clear
  add "mark"
block.end.16174:
testis "M"
jumpfalse block.end.16202
  clear
  add "go"
block.end.16202:
testis "r"
jumpfalse block.end.16232
  clear
  add "read"
block.end.16232:
testis "R"
jumpfalse block.end.16263
  clear
  add "until"
block.end.16263:
testis "w"
jumpfalse block.end.16294
  clear
  add "while"
block.end.16294:
testis "W"
jumpfalse block.end.16328
  clear
  add "whilenot"
block.end.16328:
testis "n"
jumpfalse block.end.16359
  clear
  add "count"
block.end.16359:
testis "+"
jumpfalse block.end.16387
  clear
  add "a+"
block.end.16387:
testis "-"
jumpfalse block.end.16415
  clear
  add "a-"
block.end.16415:
testis "0"
jumpfalse block.end.16445
  clear
  add "zero"
block.end.16445:
testis "c"
jumpfalse block.end.16476
  clear
  add "chars"
block.end.16476:
testis "l"
jumpfalse block.end.16507
  clear
  add "lines"
block.end.16507:
testis "^"
jumpfalse block.end.16539
  clear
  add "escape"
block.end.16539:
testis "v"
jumpfalse block.end.16573
  clear
  add "unescape"
block.end.16573:
testis "z"
jumpfalse block.end.16604
  clear
  add "delim"
block.end.16604:
testis "S"
jumpfalse block.end.16635
  clear
  add "state"
block.end.16635:
testis "q"
jumpfalse block.end.16665
  clear
  add "quit"
block.end.16665:
testis "s"
jumpfalse block.end.16696
  clear
  add "write"
block.end.16696:
testis "o"
jumpfalse block.end.16725
  clear
  add "nop"
block.end.16725:
testis "rs"
jumpfalse block.end.16759
  clear
  add "restart"
block.end.16759:
testis "rp"
jumpfalse block.end.16793
  clear
  add "reparse"
block.end.16793:
# some extra syntax for testeof and testtape
testis "<eof>"
jumptrue 4
testis "<EOF>"
jumptrue 2 
jump block.end.16904
  put
  clear
  add "eof*"
  push
  jump parse
block.end.16904:
testis "<==>"
jumpfalse block.end.16962
  put
  clear
  add "tapetest*"
  push
  jump parse
block.end.16962:
testis "jump"
jumptrue 18
testis "jumptrue"
jumptrue 16
testis "jumpfalse"
jumptrue 14
testis "testis"
jumptrue 12
testis "testclass"
jumptrue 10
testis "testbegins"
jumptrue 8
testis "testends"
jumptrue 6
testis "testeof"
jumptrue 4
testis "testtape"
jumptrue 2 
jump block.end.17290
  put
  clear
  add "The instruction '"
  get
  add "' near line "
  ll
  add " (character "
  cc
  add ")\n"
  add "can be used in pep assembly code but not scripts. \n"
  print
  clear
  quit
block.end.17290:
# show information if these "deprecated" commands are used
testis "Q"
jumptrue 4
testis "bail"
jumptrue 2 
jump block.end.17634
  put
  clear
  add "The instruction '"
  get
  add "' near line "
  ll
  add " (character "
  cc
  add ")\n"
  add "is no longer part of the pep language (july 2020). \n"
  add "use 'quit' instead of 'bail'\n"
  print
  clear
  quit
block.end.17634:
testis "add"
jumptrue 82
testis "clip"
jumptrue 80
testis "clop"
jumptrue 78
testis "replace"
jumptrue 76
testis "upper"
jumptrue 74
testis "lower"
jumptrue 72
testis "cap"
jumptrue 70
testis "clear"
jumptrue 68
testis "print"
jumptrue 66
testis "pop"
jumptrue 64
testis "push"
jumptrue 62
testis "unstack"
jumptrue 60
testis "stack"
jumptrue 58
testis "put"
jumptrue 56
testis "get"
jumptrue 54
testis "swap"
jumptrue 52
testis "++"
jumptrue 50
testis "--"
jumptrue 48
testis "mark"
jumptrue 46
testis "go"
jumptrue 44
testis "read"
jumptrue 42
testis "until"
jumptrue 40
testis "while"
jumptrue 38
testis "whilenot"
jumptrue 36
testis "count"
jumptrue 34
testis "a+"
jumptrue 32
testis "a-"
jumptrue 30
testis "zero"
jumptrue 28
testis "chars"
jumptrue 26
testis "lines"
jumptrue 24
testis "nochars"
jumptrue 22
testis "nolines"
jumptrue 20
testis "escape"
jumptrue 18
testis "unescape"
jumptrue 16
testis "delim"
jumptrue 14
testis "quit"
jumptrue 12
testis "state"
jumptrue 10
testis "write"
jumptrue 8
testis "nop"
jumptrue 6
testis "reparse"
jumptrue 4
testis "restart"
jumptrue 2 
jump block.end.18035
  put
  clear
  add "word*"
  push
  jump parse
block.end.18035:
#------------ 
# the .reparse command and "parse label" is a simple way to 
# make sure that all shift-reductions occur. It should be used inside
# a block test, so as not to create an infinite loop. There is
# no "goto" in java so we need to use labelled loops to 
# implement .reparse/parse>
testis "parse>"
jumpfalse block.end.18688
  clear
  count
  testis "0"
  jumptrue block.end.18543
    clear
    add "[error] pep script error:\n"
    add "  extra parse> label at line "
    ll
    add ".\n"
    print
    quit
  block.end.18543:
  clear
  add "// parse>"
  put
  clear
  add "parse>*"
  push
  # use accumulator to indicate after parse> label
  a+
  jump parse
block.end.18688:
# --------------------
# implement "begin-blocks", which are only executed
# once, at the beginning of the script (similar to awk's BEGIN {} rules)
testis "begin"
jumpfalse block.end.18899
  put
  add "*"
  push
  jump parse
block.end.18899:
add " << unknown command on line "
ll
add " (char "
cc
add ")"
add " of source file. \n"
add " \n"
add "   Valid commands are:\n"
add "     add,clip,clop,replace,upper,lower,cap,clear,print,\n"
add "     pop,push,unstack,stack,put,get,swap,\n"
add "     ++,--,mark,go,read,until,while,whilenot,\n"
add "     count,a+,a-,zero,chars,lines,nochars,nolines,\n"
add "     escape,unescape,delim,quit,state,\n"
add "     write,nop,.reparse,.restart \n"
print
clear
quit
# ----------------------------------
# PARSING PHASE:
# Below is the parse/compile phase of the script. Here we pop tokens off the
# stack and check for sequences of tokens eg "word*semicolon*". If we find a
# valid series of tokens, we "shift-reduce" or "resolve" the token series eg
# word*semicolon* --> command*
# At the same time, we manipulate (transform) the attributes on the tape, as
# required. 
parse:
#-------------------------------------
# 2 tokens
#-------------------------------------
pop
pop
# All of the patterns below are currently errors, but may not
# be in the future if we expand the syntax of the parse
# language. Also consider:
#    begintext* endtext* quoteset* notclass*, !* ,* ;* B* E*
# It is nice to trap the errors here because we can emit some
# (hopefully not very cryptic) error messages with a line number.
# Otherwise the script writer has to debug with
#   pep -a asm.pp -I scriptfile 
testis "word*word*"
jumptrue 50
testis "word*}*"
jumptrue 48
testis "word*begintext*"
jumptrue 46
testis "word*endtext*"
jumptrue 44
testis "word*!*"
jumptrue 42
testis "word*,*"
jumptrue 40
testis "quote*word*"
jumptrue 38
testis "quote*class*"
jumptrue 36
testis "quote*state*"
jumptrue 34
testis "quote*}*"
jumptrue 32
testis "quote*begintext*"
jumptrue 30
testis "quote*endtext*"
jumptrue 28
testis "class*word*"
jumptrue 26
testis "class*quote*"
jumptrue 24
testis "class*class*"
jumptrue 22
testis "class*state*"
jumptrue 20
testis "class*}*"
jumptrue 18
testis "class*begintext*"
jumptrue 16
testis "class*endtext*"
jumptrue 14
testis "class*!*"
jumptrue 12
testis "notclass*word*"
jumptrue 10
testis "notclass*quote*"
jumptrue 8
testis "notclass*class*"
jumptrue 6
testis "notclass*state*"
jumptrue 4
testis "notclass*}*"
jumptrue 2 
jump block.end.20950
  add " (Token stack) \nValue: \n"
  get
  add "\nValue: \n"
  ++
  get
  --
  add "\n"
  add "Error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of pep script (missing semicolon?) \n"
  print
  clear
  quit
block.end.20950:
testis "{*;*"
jumptrue 6
testis ";*;*"
jumptrue 4
testis "}*;*"
jumptrue 2 
jump block.end.21145
  push
  push
  add "Error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of pep script: misplaced semi-colon? ; \n"
  print
  clear
  quit
block.end.21145:
testis ",*{*"
jumpfalse block.end.21315
  push
  push
  add "Error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script: extra comma in list? \n"
  print
  clear
  quit
block.end.21315:
testis "command*;*"
jumptrue 4
testis "commandset*;*"
jumptrue 2 
jump block.end.21504
  push
  push
  add "Error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script: extra semi-colon? \n"
  print
  clear
  quit
block.end.21504:
testis "!*!*"
jumpfalse block.end.21767
  push
  push
  add "error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script: \n double negation '!!' is not implemented \n"
  add " and probably won't be, because what would be the point? \n"
  print
  clear
  quit
block.end.21767:
testis "!*{*"
jumptrue 4
testis "!*;*"
jumptrue 2 
jump block.end.22082
  push
  push
  add "error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script: misplaced negation operator (!)? \n"
  add " The negation operator precedes tests, for example: \n"
  add "   !B'abc'{ ... } or !(eof),!'abc'{ ... } \n"
  print
  clear
  quit
block.end.22082:
testis ",*command*"
jumpfalse block.end.22258
  push
  push
  add "error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script: misplaced comma? \n"
  print
  clear
  quit
block.end.22258:
testis "!*command*"
jumpfalse block.end.22463
  push
  push
  add "error near line "
  ll
  add " (at char "
  cc
  add ") \n"
  add " The negation operator (!) cannot precede a command \n"
  print
  clear
  quit
block.end.22463:
testis ";*{*"
jumptrue 6
testis "command*{*"
jumptrue 4
testis "commandset*{*"
jumptrue 2 
jump block.end.22672
  push
  push
  add "error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script: no test for brace block? \n"
  print
  clear
  quit
block.end.22672:
testis "{*}*"
jumpfalse block.end.22806
  push
  push
  add "error near line "
  ll
  add " of script: empty braces {}. \n"
  print
  clear
  quit
block.end.22806:
testis "B*class*"
jumptrue 4
testis "E*class*"
jumptrue 2 
jump block.end.23037
  push
  push
  add "error near line "
  ll
  add " of script:\n  classes ([a-z], [:space:] etc). \n"
  add "  cannot use the 'begin' or 'end' modifiers (B/E) \n"
  print
  clear
  quit
block.end.23037:
testis "comment*{*"
jumpfalse block.end.23229
  push
  push
  add "error near line "
  ll
  add " of script: comments cannot occur between \n"
  add " a test and a brace ({). \n"
  print
  clear
  quit
block.end.23229:
testis "}*command*"
jumpfalse block.end.23379
  push
  push
  add "error near line "
  ll
  add " of script: extra closing brace '}' ?. \n"
  print
  clear
  quit
block.end.23379:

#  E"begin*".!"begin*" {
#    push; push;
#    add "error near line "; lines;
#    add " of script: Begin blocks must precede code \n";
#    print; clear; quit;
#  }
#  
#------------ 
# The .restart command jumps to the first instruction after the
# begin block (if there is a begin block), or the first instruction
# of the script.
testis ".*word*"
jumpfalse block.end.24777
  clear
  ++
  get
  --
  testis "restart"
  jumpfalse block.end.24178
    clear
    count
    #"0" { clear; add "continue;   # .restart "; }
    testis "0"
    jumpfalse block.end.24059
      clear
      # use the comment '# restart' so we can replace
      # this with 'break' if the parse> label appears later
      add "set restart true; continue; # restart"
    block.end.24059:
    testis "1"
    jumpfalse block.end.24110
      clear
      add "break;   # .restart "
    block.end.24110:
    put
    clear
    add "command*"
    push
    jump parse
  block.end.24178:
  testis "reparse"
  jumpfalse block.end.24564
    clear
    count
    # no labelled loops in tcl
    # check accumulator to see if we are in the "lex" block
    # or the "parse" block and adjust the .reparse compilation
    # accordingly.
    testis "0"
    jumpfalse block.end.24444
      clear
      add "break;  # .reparse "
    block.end.24444:
    testis "1"
    jumpfalse block.end.24496
      clear
      add "continue;   # .reparse "
    block.end.24496:
    put
    clear
    add "command*"
    push
    jump parse
  block.end.24564:
  push
  push
  add "error near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script:  \n"
  add " misplaced dot '.' (use for AND logic or in .reparse/.restart \n"
  print
  clear
  quit
block.end.24777:
#---------------------------------
# Compiling comments so as to transfer them to the java 
testis "comment*command*"
jumptrue 6
testis "command*comment*"
jumptrue 4
testis "commandset*comment*"
jumptrue 2 
jump block.end.25028
  clear
  get
  add "\n"
  ++
  get
  --
  put
  clear
  add "command*"
  push
  jump parse
block.end.25028:
testis "comment*comment*"
jumpfalse block.end.25142
  clear
  get
  add "\n"
  ++
  get
  --
  put
  clear
  add "comment*"
  push
  jump parse
block.end.25142:
# -----------------------
# negated tokens.
# This is a new more elegant way to negate a whole set of 
# tests (tokens) where the negation logic is stored on the 
# stack, not in the current tape cell. We just add "not" to 
# the stack token.
# eg: ![:alpha:] ![a-z] ![abcd] !"abc" !B"abc" !E"xyz"
#  This format is used to indicate a negative test for 
#  a brace block. eg: ![aeiou] { add "< not a vowel"; print; clear; }
testis "!*quote*"
jumptrue 12
testis "!*class*"
jumptrue 10
testis "!*begintext*"
jumptrue 8
testis "!*endtext*"
jumptrue 6
testis "!*eof*"
jumptrue 4
testis "!*tapetest*"
jumptrue 2 
jump block.end.25940
  # a simplification: store the token name "quote*/class*/..."
  # in the tape cell corresponding to the "!*" token. 
  replace "!*" "not"
  push
  # this was a bug?? a missing ++; ??
  # now get the token-value
  get
  --
  put
  ++
  clear
  jump parse
block.end.25940:
#-----------------------------------------
# format: E"text" or E'text'
#  This format is used to indicate a "workspace-ends-with" text before
#  a brace block.
testis "E*quote*"
jumpfalse block.end.26446
  clear
  add "endtext*"
  push
  get
  testis "\"\""
  jumpfalse block.end.26403
    # empty argument is an error
    clear
    add "pep script error near line "
    ll
    add " (character "
    cc
    add "): \n"
    add "  empty argument for end-test (E\"\") \n"
    print
    quit
  block.end.26403:
  --
  put
  ++
  clear
  jump parse
block.end.26446:
#-----------------------------------------
# format: B"sometext" or B'sometext' 
#   A 'B' preceding some quoted text is used to indicate a 
#   'workspace-begins-with' test, before a brace block.
testis "B*quote*"
jumpfalse block.end.26993
  clear
  add "begintext*"
  push
  get
  testis "\"\""
  jumpfalse block.end.26950
    # empty argument is an error
    clear
    add "pep script error near line "
    ll
    add " (character "
    cc
    add "): \n"
    add "  empty argument for begin-test (B\"\") \n"
    print
    quit
  block.end.26950:
  --
  put
  ++
  clear
  jump parse
block.end.26993:
#--------------------------------------------
# ebnf: command := word, ';' ;
# formats: "pop; push; clear; print; " etc
# all commands need to end with a semi-colon except for 
# .reparse and .restart
testis "word*;*"
jumpfalse block.end.30863
  clear
  # check if command requires parameter
  get
  testis "add"
  jumptrue 16
  testis "while"
  jumptrue 14
  testis "whilenot"
  jumptrue 12
  testis "mark"
  jumptrue 10
  testis "escape"
  jumptrue 8
  testis "unescape"
  jumptrue 6
  testis "delim"
  jumptrue 4
  testis "replace"
  jumptrue 2 
  jump block.end.27538
    put
    clear
    add "'"
    get
    add "'"
    add " << command needs an argument, on line "
    ll
    add " of script.\n"
    print
    clear
    quit
  block.end.27538:
  # the new until; read until workspace ends with tape-cell text
  testis "until"
  jumpfalse block.end.27727
    clear
    add "Until [lindex $mm(tape) $mm(cell)]; # until (tape-cell)"
    put
  block.end.27727:
  # the new go; go to mark named on tapecell 
  testis "go"
  jumpfalse block.end.27893
    clear
    add "GoToMark [lindex $mm(tape) $mm(cell)]; # go (tape-cell)"
    put
  block.end.27893:
  testis "clip"
  jumpfalse block.end.28072
    clear
    add "if { $mm(work) ne \"\" } {\n"
    add "  set mm(work) [string range $mm(work) 0 end-1]\n"
    add "}; # clip"
    put
  block.end.28072:
  testis "clop"
  jumpfalse block.end.28235
    clear
    add "if { $mm(work) ne \"\" } { \n"
    add "  set mm(work) [string range $mm(work) 1 end] } ; # clop"
    put
  block.end.28235:
  testis "clear"
  jumpfalse block.end.28302
    clear
    add "set mm(work) \"\";       # clear"
    put
  block.end.28302:
  testis "upper"
  jumpfalse block.end.28403
    clear
    add "set mm(work) [string toupper $mm(work)]; # upper"
    put
  block.end.28403:
  testis "lower"
  jumpfalse block.end.28504
    clear
    add "set mm(work) [string tolower $mm(work)]; # lower"
    put
  block.end.28504:
  testis "cap"
  jumpfalse block.end.28603
    clear
    add "set mm(work) [string totitle $mm(work)];   # cap"
    put
  block.end.28603:
  testis "print"
  jumpfalse block.end.28808
    clear
    # flush doesnt seem necessary
    #add "puts -nonewline $mm(work); flush stdout; # print"; 
    add "puts -nonewline $mm(work);    # print"
    put
  block.end.28808:
  testis "pop"
  jumpfalse block.end.28847
    clear
    add "Pop;"
    put
  block.end.28847:
  testis "push"
  jumpfalse block.end.28888
    clear
    add "Push;"
    put
  block.end.28888:
  testis "unstack"
  jumpfalse block.end.28981
    clear
    add "while {[Pop]} {};           # unstack "
    put
  block.end.28981:
  testis "stack"
  jumpfalse block.end.29070
    clear
    add "while {[Push]} {};          # stack "
    put
  block.end.29070:
  testis "put"
  jumpfalse block.end.29169
    clear
    add "lset mm(tape) $mm(cell) $mm(work);  # put "
    put
  block.end.29169:
  testis "get"
  jumpfalse block.end.29276
    clear
    add "append mm(work) [lindex $mm(tape) $mm(cell)]; # get"
    put
  block.end.29276:
  testis "swap"
  jumpfalse block.end.29625
    clear
    # other ways to swap 2 vars, but tricky with list
    # lassign "$a $b $c $d" b a d c
    # foreach {x y} [list $y $x] {break}
    add "set s $mm(work); \n"
    add "set mm(work) [lindex $mm(tape) $mm(cell)]; # swap \n"
    add "lset mm(tape) $mm(cell) $s;                # swap "
    put
  block.end.29625:
  testis "++"
  jumpfalse block.end.29789
    clear
    add "if { $mm(cell) >= $mm(size) } { MoreTape; }\n"
    add "incr mm(cell);                             # ++"
    put
  block.end.29789:
  testis "--"
  jumpfalse block.end.29892
    clear
    add "if { $mm(cell) > 0 } { incr mm(cell) -1 };  # --"
    put
  block.end.29892:
  testis "read"
  jumpfalse block.end.29950
    clear
    add "Read;           # read"
    put
  block.end.29950:
  testis "count"
  jumpfalse block.end.30033
    clear
    add "append mm(work) $mm(counter); # count "
    put
  block.end.30033:
  testis "a+"
  jumpfalse block.end.30091
    clear
    add "incr mm(counter);  # a+ "
    put
  block.end.30091:
  testis "a-"
  jumpfalse block.end.30152
    clear
    add "incr mm(counter) -1;  # a- "
    put
  block.end.30152:
  testis "zero"
  jumpfalse block.end.30214
    clear
    add "set mm(counter) 0; # zero "
    put
  block.end.30214:
  testis "chars"
  jumpfalse block.end.30299
    clear
    add "append mm(work) $mm(charsRead); # chars "
    put
  block.end.30299:
  testis "lines"
  jumpfalse block.end.30384
    clear
    add "append mm(work) $mm(linesRead); # lines "
    put
  block.end.30384:
  testis "nochars"
  jumpfalse block.end.30454
    clear
    add "set mm(charsRead) 0; # nochars "
    put
  block.end.30454:
  testis "nolines"
  jumpfalse block.end.30524
    clear
    add "set mm(linesRead) 0; # nolines "
    put
  block.end.30524:
  # use a labelled loop to quit script.
  testis "quit"
  jumpfalse block.end.30608
    clear
    add "exit;"
    put
  block.end.30608:
  testis "state"
  jumpfalse block.end.30660
    clear
    add "State;  # state"
    put
  block.end.30660:
  testis "write"
  jumpfalse block.end.30709
    clear
    add "WriteToFile;"
    put
  block.end.30709:
  # just eliminate since it does nothing.
  testis "nop"
  jumpfalse block.end.30809
    clear
    add "# nop: no operation "
    put
  block.end.30809:
  clear
  add "command*"
  push
  jump parse
block.end.30863:
#-----------------------------------------
# ebnf: commandset := command , command ;
testis "command*command*"
jumptrue 4
testis "commandset*command*"
jumptrue 2 
jump block.end.31187
  clear
  add "commandset*"
  push
  # format the tape attributes. Add the next command on a newline 
  --
  get
  add "\n"
  ++
  get
  --
  put
  ++
  clear
  jump parse
block.end.31187:
#-------------------
# here we begin to parse "test*" and "ortestset*" and "andtestset*"
# 
#-------------------
# eg: B"abc" {} or E"xyz" {}
# transform and markup the different test types
testis "begintext*,*"
jumptrue 36
testis "endtext*,*"
jumptrue 34
testis "quote*,*"
jumptrue 32
testis "class*,*"
jumptrue 30
testis "eof*,*"
jumptrue 28
testis "tapetest*,*"
jumptrue 26
testis "begintext*.*"
jumptrue 24
testis "endtext*.*"
jumptrue 22
testis "quote*.*"
jumptrue 20
testis "class*.*"
jumptrue 18
testis "eof*.*"
jumptrue 16
testis "tapetest*.*"
jumptrue 14
testis "begintext*{*"
jumptrue 12
testis "endtext*{*"
jumptrue 10
testis "quote*{*"
jumptrue 8
testis "class*{*"
jumptrue 6
testis "eof*{*"
jumptrue 4
testis "tapetest*{*"
jumptrue 2 
jump block.end.32672
  # use glob matching for begin and end tests
  testbegins "begin"
  jumpfalse block.end.31948
    # remove quotes, add {}
    clear
    get
    clip
    clop
    # todo: here escape {}*[]? which have a special meaning
    # in "string match". No do it at quote stage
    put
    clear
    add "[string match {"
    get
    add "*} $mm(work)]"
  block.end.31948:
  testbegins "end"
  jumpfalse block.end.32088
    # remove quotes
    clear
    get
    clip
    clop
    put
    clear
    add "[string match {*"
    get
    add "} $mm(work)]"
  block.end.32088:
  testbegins "quote"
  jumpfalse block.end.32138
    clear
    add "$mm(work) eq "
    get
  block.end.32138:
  testbegins "class"
  jumpfalse block.end.32293
    # use tcl brace quotes to stop special character problems in patterns.
    clear
    add "[regexp {"
    get
    add "} $mm(work)]"
  block.end.32293:
  testbegins "eof"
  jumpfalse block.end.32331
    clear
    add "$mm(eof)"
  block.end.32331:
  testbegins "tapetest"
  jumpfalse block.end.32419
    clear
    add "$mm(work) eq [lindex $mm(tape) $mm(cell)]"
  block.end.32419:
  put
  
  #    #  maybe we could ellide the not tests by doing here
  #    B"not" { clear; add "!"; get; put; }
  #    
  clear
  add "test*"
  push
  # the trick below pushes the right token back on the stack.
  get
  add "*"
  push
  jump parse
block.end.32672:
#-------------------
# negated tests
# eg: !B"xyz {} !(eof) {} !(==) {}
#     !E"xyz" {} 
#     !"abc" {}
#     ![a-z] {}
testis "notbegintext*,*"
jumptrue 36
testis "notendtext*,*"
jumptrue 34
testis "notquote*,*"
jumptrue 32
testis "notclass*,*"
jumptrue 30
testis "noteof*,*"
jumptrue 28
testis "nottapetest*,*"
jumptrue 26
testis "notbegintext*.*"
jumptrue 24
testis "notendtext*.*"
jumptrue 22
testis "notquote*.*"
jumptrue 20
testis "notclass*.*"
jumptrue 18
testis "noteof*.*"
jumptrue 16
testis "nottapetest*.*"
jumptrue 14
testis "notbegintext*{*"
jumptrue 12
testis "notendtext*{*"
jumptrue 10
testis "notquote*{*"
jumptrue 8
testis "notclass*{*"
jumptrue 6
testis "noteof*{*"
jumptrue 4
testis "nottapetest*{*"
jumptrue 2 
jump block.end.33791
  testbegins "notbegin"
  jumpfalse block.end.33247
    # remove quotes, add {}
    clear
    get
    clip
    clop
    put
    clear
    add "![string match "
    get
    add "* $mm(work)] "
  block.end.33247:
  testbegins "notend"
  jumpfalse block.end.33398
    # remove quotes, add {}
    clear
    get
    clip
    clop
    put
    clear
    add "![string match *"
    get
    add " $mm(work)] "
  block.end.33398:
  testbegins "notquote"
  jumpfalse block.end.33451
    clear
    add "$mm(work) ne "
    get
  block.end.33451:
  testbegins "notclass"
  jumpfalse block.end.33521
    clear
    add "![regexp {"
    get
    add "} $mm(work)]"
  block.end.33521:
  testbegins "noteof"
  jumpfalse block.end.33563
    clear
    add "!$mm(eof)"
  block.end.33563:
  testbegins "nottapetest"
  jumpfalse block.end.33654
    clear
    add "$mm(work) ne [lindex $mm(tape) $mm(cell)]"
  block.end.33654:
  put
  clear
  add "test*"
  push
  # the trick below pushes the right token back on the stack.
  get
  add "*"
  push
  jump parse
block.end.33791:
#-------------------
# 3 tokens
#-------------------
pop
#-----------------------------
# some 3 token errors!!!
# not a comprehensive list of 3 token errors
testis "{*quote*;*"
jumptrue 12
testis "{*begintext*;*"
jumptrue 10
testis "{*endtext*;*"
jumptrue 8
testis "{*class*;*"
jumptrue 6
testis "commandset*quote*;*"
jumptrue 4
testis "command*quote*;*"
jumptrue 2 
jump block.end.34268
  push
  push
  push
  add "[pep error]\n invalid syntax near line "
  ll
  add " (char "
  cc
  add ")"
  add " of script (misplaced semicolon?) \n"
  print
  clear
  quit
block.end.34268:
# to simplify subsequent tests, transmogrify a single command
# to a commandset (multiple commands).
testis "{*command*}*"
jumpfalse block.end.34464
  clear
  add "{*commandset*}*"
  push
  push
  push
  jump parse
block.end.34464:
# errors! mixing AND and OR concatenation
testis ",*andtestset*{*"
jumptrue 4
testis ".*ortestset*{*"
jumptrue 2 
jump block.end.34931
  # push the tokens back to make debugging easier
  push
  push
  push
  add " error: mixing AND (.) and OR (,) concatenation in \n"
  add " in pep script near line "
  ll
  add " (character "
  cc
  add ") \n"
  add " \n"
  add "  For example:\n"
  add "     B\".\".!E\"/\".[abcd./] { print; }  # Correct!\n"
  add "     B\".\".!E\"/\",[abcd./] { print; }  # Error! \n"
  print
  clear
  quit
block.end.34931:
#--------------------------------------------
# ebnf: command := keyword , quoted-text , ";" ;
# format: add "text";
testis "word*quote*;*"
jumpfalse block.end.39119
  clear
  get
  testis "replace"
  jumpfalse block.end.35274
    # error 
    add "< command requires 2 parameters, not 1 \n"
    add "near line "
    ll
    add " of script. \n"
    print
    clear
    quit
  block.end.35274:
  # check whether argument is single character, otherwise
  # throw and error
  testis "escape"
  jumptrue 8
  testis "unescape"
  jumptrue 6
  testis "while"
  jumptrue 4
  testis "whilenot"
  jumptrue 2 
  jump block.end.36273
    # This is trickier than I thought it would be.
    clear
    ++
    get
    --
    # check that arg not empty, (but an empty quote is ok 
    # for the second arg of 'replace'
    testis "\"\""
    jumpfalse block.end.35825
      clear
      add "[pep error] near line "
      ll
      add " (or char "
      cc
      add "): \n"
      add "  command '"
      get
      add "\' cannot have an empty argument (\"\") \n"
      print
      quit
    block.end.35825:
    # quoted text has the quotes still around it.
    # also handle escape characters like \n \r etc
    clip
    clop
    clop
    clop
    # B "\\" { clip; } 
    clip
    testis ""
    jumptrue block.end.36249
      clear
      add "Pep script error near line "
      ll
      add " (character "
      cc
      add "): \n"
      add "  command '"
      get
      add "' takes only a single character argument. \n"
      print
      quit
    block.end.36249:
    clear
    get
  block.end.36273:
  testis "mark"
  jumpfalse block.end.36423
    clear
    add "lset mm(marks) $mm(cell) "
    ++
    get
    --
    add "; # mark"
    put
    clear
    add "command*"
    push
    jump parse
  block.end.36423:
  testis "go"
  jumpfalse block.end.36548
    clear
    add "GoToMark "
    ++
    get
    --
    add " "
    put
    clear
    add "command*"
    push
    jump parse
  block.end.36548:
  testis "delim"
  jumpfalse block.end.36763
    clear
    # only the first character of the delimiter argument is used. 
    add "set mm(delimiter) "
    ++
    get
    --
    add "; # delim "
    put
    clear
    add "command*"
    push
    jump parse
  block.end.36763:
  testis "add"
  jumpfalse block.end.37069
    clear
    add "append mm(work) "
    ++
    get
    --
    # handle multiline text
    # tcl can handle multiline text but I will leave this
    # anyway because of indenting issues.
    replace "\n" "\" \nappend mm(work) \"\\n"
    put
    clear
    add "command*"
    push
    jump parse
  block.end.37069:
  testis "while"
  jumpfalse block.end.37294
    clear
    add "# while \n"
    add "while {$mm(peep) eq "
    ++
    get
    --
    add "} {  \n"
    add "  if {$mm(eof)} { break; } Read; } "
    put
    clear
    add "command*"
    push
    jump parse
  block.end.37294:
  testis "whilenot"
  jumpfalse block.end.37519
    clear
    add "# whilenot \n"
    add "while {$mm(peep) ne "
    ++
    get
    --
    add "} {  \n"
    add "  if {$mm(eof)} { break; } Read }"
    put
    clear
    add "command*"
    push
    jump parse
  block.end.37519:
  testis "until"
  jumpfalse block.end.38176
    clear
    ++
    get
    --
    # error until cannot have empty argument
    testis "\"\""
    jumpfalse block.end.37958
      clear
      add "Pep script error near line "
      ll
      add " (character "
      cc
      add "): \n"
      add " empty argument for 'until' \n"
      add " \n"
      add "   For example:\n"
      add "     until '.txt'; until \">\";    # correct   \n"
      add "     until '';  until \"\";        # errors! \n"
      print
      quit
    block.end.37958:
    # remove quotes
    clip
    clop
    put
    clear
    add "Until {"
    get
    # handle multiline argument
    replace "\n" "\\n"
    add "};"
    put
    clear
    add "command*"
    push
    jump parse
  block.end.38176:
  # but hard code escape here!
  testis "escape"
  jumpfalse block.end.38516
    clear
    # use "string map"
    # remove quotes from escape argument
    ++
    get
    clip
    clop
    put
    clear
    add "set mm(work) [string map {\""
    get
    add "\" \"\\\\"
    get
    add "\"} $mm(work)]"
    --
    put
    clear
    add "command*"
    push
    jump parse
  block.end.38516:
  # could just use replace instead ?
  # but unescape should probably 'walk the string' to 
  # work out what is really escaped
  testis "unescape"
  jumpfalse block.end.38928
    clear
    # remove quotes from escape argument
    ++
    get
    clip
    clop
    put
    clear
    add "set mm(work) [string map {\"\\"
    get
    add "\" \""
    get
    add "\"} $mm(work)]"
    --
    put
    clear
    add "command*"
    push
    jump parse
  block.end.38928:
  # error, superfluous argument
  add ": command does not take an argument \n"
  add "near line "
  ll
  add " of script. \n"
  print
  clear
  #state
  quit
block.end.39119:
#----------------------------------
# format: "while [:alpha:] ;" or whilenot [a-z] ;
testis "word*class*;*"
jumpfalse block.end.39851
  clear
  get
  testis "while"
  jumpfalse block.end.39471
    clear
    add "# while  \n"
    add "while {[regexp {"
    ++
    get
    --
    add "} $mm(peep)]} { if {$mm(eof)} { break; } Read }"
    put
    clear
    add "command*"
    push
    jump parse
  block.end.39471:
  testis "whilenot"
  jumpfalse block.end.39698
    clear
    add "# whilenot  \n"
    add "while {![regexp {"
    ++
    get
    --
    add "} $mm(peep)]} { if {$mm(eof)} { break; } Read }"
    put
    clear
    add "command*"
    push
    jump parse
  block.end.39698:
  # error 
  add " < command cannot have a class argument \n"
  add "line "
  ll
  add ": error in script \n"
  print
  clear
  quit
block.end.39851:
# arrange the parse> label loops
testeof 
jumpfalse block.end.41009
  testis "commandset*parse>*commandset*"
  jumptrue 8
  testis "command*parse>*commandset*"
  jumptrue 6
  testis "commandset*parse>*command*"
  jumptrue 4
  testis "command*parse>*command*"
  jumptrue 2 
  jump block.end.41005
    clear
    # indent both code blocks
    add "  "
    get
    replace "\n" "\n  "
    # change .restart code before parse> label
    # this make .restart work both before, after and without a 
    # parse> label
    replace "continue; # restart" "break; # restart"
    put
    clear
    ++
    ++
    add "  "
    get
    replace "\n" "\n  "
    put
    clear
    --
    --
    # add a block so that .reparse works before the parse> label.
    # but no labelled loops in tcl
    add "\n# lex block \n"
    add "while true { \n"
    get
    add "\n  break;\n}\n"
    ++
    ++
    add "if {$restart == true} { set restart false; continue; }\n"
    # indent code block
    # add "  "; get; replace "\n" "\n  "; put; clear;
    # tcl doesnt support labelled loops
    # add "parse: \n";
    add "\n# parse block \n"
    add "while true {  \n"
    get
    add "\n  break; \n"
    add "}\n"
    --
    --
    put
    clear
    add "commandset*"
    push
    jump parse
  block.end.41005:
block.end.41009:
# -------------------------------
# 4 tokens
# -------------------------------
pop
#-------------------------------------
# bnf:     command := replace , quote , quote , ";" ;
# example:  replace "and" "AND" ; 
testis "word*quote*quote*;*"
jumpfalse block.end.41882
  clear
  get
  testis "replace"
  jumpfalse block.end.41713
    #---------------------------
    # a command plus 2 arguments, eg replace "this" "that"
    # the empty string test is not really necessary here.
    # note!
    clear
    add "# replace \n"
    add "if {$mm(work) ne \"\"} { \n"
    add "set mm(work) [string map {"
    ++
    get
    add " "
    ++
    get
    add "} $mm(work)] }\n"
    --
    --
    put
    clear
    add "command*"
    push
    jump parse
  block.end.41713:
  add "pep script error on line "
  ll
  add " (character "
  cc
  add "): \n"
  add "  command does not take 2 quoted arguments. \n"
  print
  quit
block.end.41882:
#-------------------------------------
# format: begin { #* commands *# }
# "begin" blocks which are only executed once (they
# will are assembled before the "start:" label. They must come before
# all other commands.
# "begin*{*command*}*",
testis "begin*{*commandset*}*"
jumpfalse block.end.42266
  clear
  ++
  ++
  get
  --
  --
  put
  clear
  add "beginblock*"
  push
  jump parse
block.end.42266:
# -------------
# parses and compiles concatenated tests
# eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ...
# these 2 tests should be all that is necessary
testis "test*,*ortestset*{*"
jumptrue 4
testis "test*,*test*{*"
jumptrue 2 
jump block.end.42610
  clear
  get
  add " || "
  ++
  ++
  get
  --
  --
  put
  clear
  add "ortestset*{*"
  push
  push
  jump parse
block.end.42610:
# dont mix AND and OR concatenations 
# -------------
# AND logic 
# parses and compiles concatenated AND tests
# eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ...
# it is possible to elide this block with the negated block
# for compactness but maybe readability is not as good.
# negated tests can be chained with non negated tests.
# eg: B'http' . !E'.txt' { ... }
testis "test*.*andtestset*{*"
jumptrue 4
testis "test*.*test*{*"
jumptrue 2 
jump block.end.43179
  clear
  get
  add " && "
  ++
  ++
  get
  --
  --
  put
  clear
  add "andtestset*{*"
  push
  push
  jump parse
block.end.43179:
#-------------------------------------
# we should not have to check for the {*command*}* pattern
# because that has already been transformed to {*commandset*}*
testis "test*{*commandset*}*"
jumptrue 6
testis "andtestset*{*commandset*}*"
jumptrue 4
testis "ortestset*{*commandset*}*"
jumptrue 2 
jump block.end.43742
  clear
  # indent the java code for readability
  ++
  ++
  add "  "
  get
  replace "\n" "\n  "
  put
  --
  --
  clear
  add "if {"
  get
  add "} {\n"
  ++
  ++
  get
  add "\n}"
  --
  --
  put
  clear
  add "command*"
  push
  # always reparse/compile
  jump parse
block.end.43742:
# -------------
# multi-token end-of-stream errors
# not a comprehensive list of errors...
testeof 
jumpfalse block.end.44935
  testends "begintext*"
  jumptrue 10
  testends "endtext*"
  jumptrue 8
  testends "test*"
  jumptrue 6
  testends "ortestset*"
  jumptrue 4
  testends "andtestset*"
  jumptrue 2 
  jump block.end.44052
    add "  Error near end of script at line "
    ll
    add ". Test with no brace block? \n"
    print
    clear
    quit
  block.end.44052:
  testends "quote*"
  jumptrue 6
  testends "class*"
  jumptrue 4
  testends "word*"
  jumptrue 2 
  jump block.end.44277
    put
    clear
    add "Error at end of pep script near line "
    ll
    add ": missing semi-colon? \n"
    add "Parse stack: "
    get
    add "\n"
    print
    clear
    quit
  block.end.44277:
  # A begin block with no other commands is not really an
  # error...
  testis "beginblock*"
  jumpfalse block.end.44675
    put
    clear
    add "\n"
    add "   Pep script error: begin block with no other commands.\n"
    add "   Follow the begin block with other script commands. Eg:\n"
    add "     begin { add \"starting script...\"; print; clear; }\n"
    add "     read; print; clear;\n"
    add "   Or use \"nop;\" after the begin block \n"
    print
    clear
    quit
  block.end.44675:
  testends "{*"
  jumptrue 16
  testends "}*"
  jumptrue 14
  testends ";*"
  jumptrue 12
  testends ",*"
  jumptrue 10
  testends ".*"
  jumptrue 8
  testends "!*"
  jumptrue 6
  testends "B*"
  jumptrue 4
  testends "E*"
  jumptrue 2 
  jump block.end.44931
    put
    clear
    add "Error: misplaced terminal character at end of script! (line "
    ll
    add "). \n"
    add "Parse stack: "
    get
    add "\n"
    print
    clear
    quit
  block.end.44931:
block.end.44935:
# put the 4 (or less) tokens back on the stack
push
push
push
push
testeof 
jumpfalse block.end.53302
  print
  clear
  # create the virtual machine object code and save it
  # somewhere on the tape.
  add "#!/usr/bin/tclsh\n"
  add "\n"
  add "# code generated by \"translate.tcl.pss\" a pep script\n"
  add "# see bumble.sf.net/books/pars/\n"
  add "#import sys    # \n"
  add "\n"
  add "  # make a new machine. Standard tcl doesnt have objects\n"
  add "  # so I will use an associative array, instead.\n"
  add "  #array set mm {\n"
  add "  #  eof false     # end of stream reached?\n"
  add "  #  charsRead 0   # how many chars already read\n"
  add "  #  linesRead 1   # how many lines already read\n"
  add "  #  escape \"\\\\\"\n"
  add "  #  delimiter \"*\" # push/pop delimiter (default \"*\")\n"
  add "  #  counter 0     # a counter for anything\n"
  add "  #  work \"\"       # the workspace\n"
  add "  #  stack {}    # stack for parse tokens \n"
  add "  #  cell 0      # current tape cell\n"
  add "  #  size 100    # the initial tape/marks list size\n"
  add "  #  tape {}     # a list of attribute for tokens \n"
  add "  #  marks {}    # marked tape cells\n"
  add "  #  peep [read stdin 1] \n"
  add "  #}\n"
  add "\n"
  add "  # make a new machine. Standard tcl doesnt have objects\n"
  add "  # so I will use an associative array, instead.\n"
  add "  array set mm {\n"
  add "    eof false     \n"
  add "    charsRead 0  \n"
  add "    linesRead 1 \n"
  add "    escape \"\\\\\"\n"
  add "    delimiter \"*\" \n"
  add "    counter 0    \n"
  add "    work \"\"     \n"
  add "    stack {}   \n"
  add "    cell 0    \n"
  add "    size 0 \n"
  add "    tape {}  \n"
  add "    marks {}\n"
  add "    peep {} \n"
  add "  }\n"
  add "\n"
  add "  # Adds more elements to the tape and marks lists \n"
  add "  proc MoreTape {} { \n"
  add "    global mm\n"
  add "    for {set ii 0} {$ii < 100} {incr ii} { \n"
  add "      lappend mm(tape) \"\"; lappend mm(marks) \"\";\n"
  add "    }\n"
  add "    incr mm(size) 100\n"
  add "  }\n"
  add "\n"
  add "  # initialises a machine  \n"
  add "  proc Init {} { \n"
  add "    global mm\n"
  add "    set mm(peep) [ read stdin 1 ]\n"
  add "    # or Read;\n"
  add "    MoreTape;\n"
  add "  }\n"
  add "\n"
  add "  # read one character from the input stream and \n"
  add "  #    update the machine.\n"
  add "  proc Read {} { \n"
  add "    # use upvar eg\n"
  add "    # upvar $machine mm\n"
  add "    global mm\n"
  add "    if { $mm(eof) } { exit }\n"
  add "    incr mm(charsRead)\n"
  add "    # increment lines\n"
  add "    if { $mm(peep) eq \"\\n\" } { incr mm(linesRead) }\n"
  add "    append mm(work) $mm(peep)\n"
  add "    set mm(peep) [ read stdin 1 ]\n"
  add "    if {[eof stdin]} { set mm(eof) true; set mm(peep) -1 }\n"
  add "  } \n"
  add "\n"
  add "  # increment tape pointer by one: trivial method? But need\n"
  add "  # to increase tape/marks size if exceeded\n"
  add "  proc Increment {} { global mm; incr mm(cell) } \n"
  add "\n"
  add "  # remove escape character: trivial method ?\n"
  add "  proc UnescapeChar {c} {\n"
  add "    global mm\n"
  add "    #if { $mm(work) ne \"\" } $mm(work = $mm(work.replace(\"\\\\\"+c, c)\n"
  add "  }\n"
  add "\n"
  add "  # add escape character : trivial\n"
  add "  proc EscapeChar {c} {\n"
  add "    global mm\n"
  add "    #if { $mm(work) ne \"\" } { $mm(work = $mm(work.replace(c, \"\\\\\"+c) }\n"
  add "  }\n"
  add "\n"
  add "  # pop the first token from the stack into the workspace */\n"
  add "  proc Pop {} { \n"
  add "    global mm\n"
  add "    if {[llength $mm(stack)] == 0} { return false }\n"
  add "    # prepend last stack item, and delete the item\n"
  add "    set mm(work) \"[lindex $mm(stack) end]$mm(work)\"\n"
  add "    set mm(stack) [lrange $mm(stack) 0 [expr [llength $mm(stack)]-2]] \n"
  add "    if {$mm(cell) > 0} { incr mm(cell) -1 }\n"
  add "    return true\n"
  add "  }\n"
  add "  \n"
  add "  # push the first token from the workspace to the stack \n"
  add "  proc Push {} {\n"
  add "    # lappend list $value\n"
  add "    # dont increment the tape pointer on an empty push\n"
  add "    global mm\n"
  add "    if { $mm(work) eq \"\" } { return false }\n"
  add "    # need to get this from the delimiter.\n"
  add "    set firstdelim [string first $mm(delimiter) $mm(work)]\n"
  add "    if {$firstdelim == -1} {\n"
  add "      lappend mm(stack) $mm(work)\n"
  add "      set mm(work) \"\"\n"
  add "      incr mm(cell) 1\n"
  add "      # a hack because \"stack\" hangs otherwise (never returns false)\n"
  add "      return false\n"
  add "      #return true\n"
  add "    }\n"
  add "    lappend mm(stack) [string range $mm(work) 0 $firstdelim]\n"
  add "    set mm(work) [string range $mm(work) [expr {$firstdelim+1}] end]\n"
  add "    incr mm(cell) 1\n"
  add "    return true\n"
  add "  }\n"
  add "\n"
  add "  # a helper function\n"
  add "  proc IsEscaped {suffix} {\n"
  add "    global mm\n"
  add "    # remove suffix\n"
  add "    set count 0\n"
  add "    set last [expr {[string last $suffix $mm(work)]-1}]\n"
  add "    set new [string range $mm(work) 0 $last]\n"
  add "    # now count trailing escape chars\n"
  add "    while {[string index $new end] eq $mm(escape)} {\n"
  add "      set last [expr {[string last $mm(escape) $new]-1}]\n"
  add "      set new [string range $new 0 $last]\n"
  add "      incr count\n"
  add "    }\n"
  add "    # puts count=$count\n"
  add "    if { $count == 1 } { return true }\n"
  add "    if {[expr {($count % 2) == 0}]} { return false } else { return true }\n"
  add "  }\n"
  add "\n"
  add "  # reads the input stream until the workspace end with text \n"
  add "  proc Until {suffix} { \n"
  add "    # read at least one character\n"
  add "    global mm\n"
  add "    if { $mm(eof) } { return }\n"
  add "    Read;\n"
  add "    while true { \n"
  add "      if {$mm(eof)} { return }\n"
  add "      # this must count trailing escapes\n"
  add "      if {[string match *$suffix $mm(work)] && ![IsEscaped $suffix]} { return }\n"
  add "      Read;\n"
  add "    }\n"
  add "  }  \n"
  add "\n"
  add "  # maybe not required \n"
  add "  proc Swap {} { \n"
  add "    global mm\n"
  add "    set s $mm(work)\n"
  add "    set mm(work) $mm(tape)[$mm(cell)]\n"
  add "    # could be a problem if $s has spaces in it. (becomes a list)\n"
  add "    lset mm(tape) $mm(cell) $s\n"
  add "  }\n"
  add "\n"
  add "  proc GoToMark {mark} { \n"
  add "    # or use tcls lsearch here.\n"
  add "    global mm\n"
  add "    set ii [lsearch -exact $mm(marks) $mark]\n"
  add "    if {$ii >= 0} { set mm(cell) $ii \n"
  add "    } else { puts \"badmark \'$mark\'!\"; exit; }\n"
  add "  }\n"
  add "\n"
  add "  # todo! add an argument to this.\n"
  add "  proc WriteToFile {} { \n"
  add "    global mm\n"
  add "    set f [open sav.pp w 0600]  \n"
  add "    puts $f $mm(work)\n"
  add "    close $f\n"
  add "  }\n"
  add "\n"
  add "  # useful for debugging, the \"state\" command\n"
  add "  proc State {} { \n"
  add "    global mm\n"
  add "    puts \"---------- Machine State --------------\";\n"
  add "    puts -nonewline \" Stack\\[[join $mm(stack) {}]\\] Work\\[$mm(work)\\] \";\n"
  add "    puts \"Peep\\[$mm(peep)\\]\";\n"
  add "    puts -nonewline \" Acc:$mm(counter) Esc:$mm(escape) \";\n"
  add "    puts -nonewline \"Delim:$mm(delimiter) Chars:$mm(charsRead) \";\n"
  add "    puts \"Lines:$mm(linesRead)\";\n"
  add "    puts \"---------- Tape (size:$mm(size))  --------------\";\n"
  add "    set ii 0\n"
  add "    while { $ii < 7 } {\n"
  add "      puts -nonewline \"  $ii\";\n"
  add "      if { $ii == $mm(cell) } { \n"
  add "        puts -nonewline \"> \"\n"
  add "      } else { puts -nonewline \"  \" }\n"
  add "      # display marks\n"
  add "      if { [lindex $mm(marks) $ii] ne \"\" } { \n"
  add "        puts -nonewline \"\\\"[lindex $mm(marks) $ii]\\\" \"\n"
  add "      } else { puts -nonewline \". \" }\n"
  add "\n"
  add "      puts \"\\[[lindex $mm(tape) $ii]\\]\";\n"
  add "      incr ii\n"
  add "    }\n"
  add "  }\n"
  add "  # end of tcl pep Machine \"class\" (array) definition\n"
  add "\n"
  add "  # a flag var to make .restart work in run-once loops\n"
  add "  set restart false\n"
  add "  # initialise the machine\n"
  add "  Init;\n"
  add " \n"
  # save the code in the current tape cell
  put
  clear
  #---------------------
  # check if the script correctly parsed (there should only
  # be one or two tokens on the stack, namely "commandset*" or "command*").
  # or beginblock commandset
  pop
  pop
  testis "commandset*"
  jumptrue 4
  testis "command*"
  jumptrue 2 
  jump block.end.51897
    clear
    # indent generated code (6 spaces) for readability.
    add "  "
    get
    replace "\n" "\n  "
    put
    clear
    # restore the java preamble from the tape
    ++
    get
    --
    #add 'script: \n';
    add "while !$mm(eof) { \n"
    get
    # end block marker 
    add "\n}\n"
    add "# end of generated code\n"
    # put a copy of the final compilation into the tapecell
    # so it can be inspected interactively.
    put
    print
    clear
    quit
  block.end.51897:
  testis "beginblock*commandset*"
  jumptrue 4
  testis "beginblock*command*"
  jumptrue 2 
  jump block.end.52616
    clear
    # indentation not needed here 
    #add ""; get; 
    #replace "\n" "\n"; put; clear; 
    # indent main code for readability.
    ++
    add "  "
    get
    replace "\n" "\n  "
    put
    clear
    --
    # get tcl preamble from tape
    ++
    ++
    get
    --
    --
    get
    add "\n"
    ++
    # a labelled loop for "quit" (but quit can just exit?)
    #add "script: \n";
    add "while !$mm(eof) { \n"
    get
    # end block marker required 
    add "\n}\n"
    add "# end of generated code\n"
    # put a copy of the final compilation into the tapecell
    # for interactive debugging.
    put
    print
    clear
    quit
  block.end.52616:
  push
  push
  # try to explain some more errors
  unstack
  testbegins "parse>"
  jumpfalse block.end.52887
    put
    clear
    add "[error] pep syntax error:\n"
    add "  The parse> label cannot be the 1st item \n"
    add "  of a script \n"
    print
    quit
  block.end.52887:
  put
  clear
  add "After compiling with 'translate.tcl.pss' (at EOF): \n "
  add "  parse error in input script. \n "
  print
  clear
  # unstack; put; clear;
  add "Parse stack: "
  get
  add "\n"
  add "   * debug script "
  add "   >> pep -If script -i 'some input' \n "
  add "   *  debug compilation. \n "
  add "   >> pep -Ia asm.pp script' \n "
  print
  clear
  quit
block.end.53302:
# not eof
# there is an implicit .restart command here (jump start)
jump start 
