Posted to tcl by mjanssen at Thu Oct 08 16:52:40 GMT 2020view pretty
proc tokenize {script} { #puts [lindex [info level 0] 0] set ctxt(rest) [split $script {}] set ctxt(parsed) {} set ctxt(currentCmd) {} while {$ctxt(rest) ne {}} { tokenizeCommentOrCommand ctxt } return $ctxt(parsed) } proc handleContinuation {ctxtVar} { upvar $ctxtVar ctxt lassign $ctxt(rest) char next if {$char eq "\\" && $next eq "\n"} { # puts [lindex [info level 0] 0] set ctxt(rest) [lrange $ctxt(rest) 1 end] dropSpace ctxt set ctxt(rest) [linsert $ctxt(rest) 0 " "] } } proc dropSpace {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt while {1} { set new [lassign $ctxt(rest) char] if {[string is space $char] && $char ne {}} { set ctxt(rest) $new continue } break } } proc tokenizeCommentOrCommand {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt # parray ctxt while {1} { dropSpace ctxt handleContinuation ctxt lassign $ctxt(rest) char if {$char eq "#"} { tokenizeComment ctxt } else { tokenizeCommand ctxt } return } } proc tokenizeComment {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt set comment {} while {1} { handleContinuation ctxt set ctxt(rest) [lassign $ctxt(rest) char] if {$char eq "\n" || $char eq {}} { lappend ctxt(parsed) [list "#" $comment] return } append comment $char } } proc tokenizeWord {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt set word {} # expand ? if {[join [lrange $ctxt(rest) 0 3] {}] eq "{*}" && ![string is space [lindex $ctxt(rest) 4]]} { append word "{*}" set ctxt(rest) [lrange $ctxt(rest) 0 3] } if {[string is space [lindex $ctxt(rest) 0 ]]} { error "Unexpected space at start of word" } set ctxt(rest) [lassign $ctxt(rest) char] append word $char switch -exact $char { "\{" {append word [tokenizeBraceWord ctxt]} "\"" {append word [tokenizeQuoteWord ctxt]} "\[" {append word [tokenizeSubstWord ctxt]} default {append word [tokenizeBareWord ctxt]} } lappend ctxt(currentCmd) $word } proc tokenizeCommand {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt # parray ctxt set ctxt(currentCmd) {} set word {} while {1} { set new [lassign $ctxt(rest) char] if {$char eq "\n" || $char eq {}} { if {[llength $ctxt(currentCmd)] > 0} { lappend ctxt(parsed) [list cmd {*}$ctxt(currentCmd)] } set ctxt(rest) $new return } dropSpace ctxt handleContinuation ctxt tokenizeWord ctxt } } proc tokenizeQuoteWord {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt set word {} while {1} { handleContinuation ctxt set ctxt(rest) [lassign $ctxt(rest) char] append word $char if {$char eq "\\"} { set ctxt(rest) [lassign $ctxt(rest) char] append word $char continue } if {$char eq "\""} { break } } return $word } proc tokenizeBraceWord {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt set word {} while {1} { handleContinuation ctxt set ctxt(rest) [lassign $ctxt(rest) char] append word $char if {$char eq "\\"} { set ctxt(rest) [lassign $ctxt(rest) char] append word $char continue } if {$char eq "\{"} { append word [tokenizeBraceWord ctxt] continue } if {$char eq "\}"} { break } } return $word } proc tokenizeSubstWord {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt set word {} while {1} { handleContinuation ctxt set ctxt(rest) [lassign $ctxt(rest) char] append word $char if {$char eq "\\"} { set ctxt(rest) [lassign $ctxt(rest) char] append word $char continue } if {$char eq "\["} { append word [tokenizeSubstWord ctxt] continue } if {$char eq "\]"} { break } } return $word } proc tokenizeBareWord {ctxtVar} { #puts [lindex [info level 0] 0] upvar $ctxtVar ctxt set word {} while {1} { handleContinuation ctxt set ctxt(rest) [lassign $ctxt(rest) char] append word $char if {$char eq "\\"} { set ctxt(rest) [lassign $ctxt(rest) char] append word $char continue } if {$char eq "\["} { append word [tokenizeSubstWord ctxt] continue } if {[string is space $char]} { break } } return [string trimright $word] }