Posted to tcl by mjanssen at Thu Oct 08 16:52:40 GMT 2020view raw
- proc tokenize {script} {
- #puts [lindex [info level 0] 0]
- set ctxt(rest) [split $script {}]
- set ctxt(parsed) {}
- set ctxt(currentCmd) {}
- while {$ctxt(rest) ne {}} {
- tokenizeCommentOrCommand ctxt
- }
- return $ctxt(parsed)
- }
- proc handleContinuation {ctxtVar} {
- upvar $ctxtVar ctxt
- lassign $ctxt(rest) char next
- if {$char eq "\\" && $next eq "\n"} {
- # puts [lindex [info level 0] 0]
- set ctxt(rest) [lrange $ctxt(rest) 1 end]
- dropSpace ctxt
- set ctxt(rest) [linsert $ctxt(rest) 0 " "]
- }
- }
- proc dropSpace {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- while {1} {
- set new [lassign $ctxt(rest) char]
- if {[string is space $char] && $char ne {}} {
- set ctxt(rest) $new
- continue
- }
- break
- }
- }
- proc tokenizeCommentOrCommand {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- # parray ctxt
- while {1} {
- dropSpace ctxt
- handleContinuation ctxt
- lassign $ctxt(rest) char
- if {$char eq "#"} {
- tokenizeComment ctxt
- } else {
- tokenizeCommand ctxt
- }
- return
- }
- }
- proc tokenizeComment {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- set comment {}
- while {1} {
- handleContinuation ctxt
- set ctxt(rest) [lassign $ctxt(rest) char]
- if {$char eq "\n" || $char eq {}} {
- lappend ctxt(parsed) [list "#" $comment]
- return
- }
- append comment $char
- }
- }
- proc tokenizeWord {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- set word {}
- # expand ?
- if {[join [lrange $ctxt(rest) 0 3] {}] eq "{*}" && ![string is space [lindex $ctxt(rest) 4]]} {
- append word "{*}"
- set ctxt(rest) [lrange $ctxt(rest) 0 3]
- }
- if {[string is space [lindex $ctxt(rest) 0 ]]} {
- error "Unexpected space at start of word"
- }
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- switch -exact $char {
- "\{" {append word [tokenizeBraceWord ctxt]}
- "\"" {append word [tokenizeQuoteWord ctxt]}
- "\[" {append word [tokenizeSubstWord ctxt]}
- default {append word [tokenizeBareWord ctxt]}
- }
- lappend ctxt(currentCmd) $word
- }
- proc tokenizeCommand {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- # parray ctxt
- set ctxt(currentCmd) {}
- set word {}
- while {1} {
- set new [lassign $ctxt(rest) char]
- if {$char eq "\n" || $char eq {}} {
- if {[llength $ctxt(currentCmd)] > 0} {
- lappend ctxt(parsed) [list cmd {*}$ctxt(currentCmd)]
- }
- set ctxt(rest) $new
- return
- }
- dropSpace ctxt
- handleContinuation ctxt
- tokenizeWord ctxt
- }
- }
- proc tokenizeQuoteWord {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- set word {}
- while {1} {
- handleContinuation ctxt
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- if {$char eq "\\"} {
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- continue
- }
- if {$char eq "\""} {
- break
- }
- }
- return $word
- }
- proc tokenizeBraceWord {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- set word {}
- while {1} {
- handleContinuation ctxt
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- if {$char eq "\\"} {
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- continue
- }
- if {$char eq "\{"} {
- append word [tokenizeBraceWord ctxt]
- continue
- }
- if {$char eq "\}"} {
- break
- }
- }
- return $word
- }
- proc tokenizeSubstWord {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- set word {}
- while {1} {
- handleContinuation ctxt
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- if {$char eq "\\"} {
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- continue
- }
- if {$char eq "\["} {
- append word [tokenizeSubstWord ctxt]
- continue
- }
- if {$char eq "\]"} {
- break
- }
- }
- return $word
- }
- proc tokenizeBareWord {ctxtVar} {
- #puts [lindex [info level 0] 0]
- upvar $ctxtVar ctxt
- set word {}
- while {1} {
- handleContinuation ctxt
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- if {$char eq "\\"} {
- set ctxt(rest) [lassign $ctxt(rest) char]
- append word $char
- continue
- }
- if {$char eq "\["} {
- append word [tokenizeSubstWord ctxt]
- continue
- }
- if {[string is space $char]} {
- break
- }
- }
- return [string trimright $word]
- }