Posted to tcl by mjanssen at Thu Oct 08 16:52:40 GMT 2020view pretty

proc tokenize {script} {
    #puts [lindex [info level 0] 0]
    set ctxt(rest) [split $script {}]
    set ctxt(parsed) {}
    set ctxt(currentCmd) {}
    while {$ctxt(rest) ne {}} {
        tokenizeCommentOrCommand ctxt
    }
    return $ctxt(parsed)
}

proc handleContinuation {ctxtVar} {
    
    upvar $ctxtVar ctxt
    lassign $ctxt(rest) char next
    if {$char eq "\\" && $next eq "\n"} {
    # puts [lindex [info level 0] 0]
        set ctxt(rest) [lrange $ctxt(rest) 1 end]
        dropSpace ctxt
        set ctxt(rest) [linsert $ctxt(rest) 0 " "]
    }
}

proc dropSpace {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    while {1} {
        set new [lassign $ctxt(rest) char]
        if {[string is space $char] && $char ne {}} {
            set ctxt(rest) $new
            continue
        }
        break
    }
}

proc tokenizeCommentOrCommand {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    #  parray ctxt
    while {1} {
        dropSpace ctxt
        handleContinuation ctxt
        lassign $ctxt(rest) char
        if {$char eq "#"} {
            tokenizeComment ctxt
        } else {
            tokenizeCommand ctxt
        }
        return
    }
}

proc tokenizeComment {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    set comment {}
    while {1} {
        handleContinuation ctxt
        set ctxt(rest) [lassign $ctxt(rest) char]
        if {$char eq "\n" || $char eq {}} {
            lappend ctxt(parsed) [list "#" $comment]
            return
        }
        append comment $char
    }
}


proc tokenizeWord {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt

    set word {}
    
    # expand ?
    if {[join [lrange $ctxt(rest) 0 3] {}] eq "{*}" && ![string is space [lindex $ctxt(rest) 4]]} {
        append word "{*}"
        set ctxt(rest) [lrange $ctxt(rest) 0 3]
    }
    if {[string is space [lindex $ctxt(rest) 0 ]]} {
        error "Unexpected space at start of word"
    }
    set ctxt(rest) [lassign $ctxt(rest) char]
    append word $char
    switch -exact $char {
        "\{"  {append word [tokenizeBraceWord ctxt]}
        "\"" {append word [tokenizeQuoteWord ctxt]}
        "\[" {append word [tokenizeSubstWord ctxt]}
        default  {append word [tokenizeBareWord ctxt]}
    }
    lappend ctxt(currentCmd) $word
    
}

proc tokenizeCommand {ctxtVar} {
   #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    # parray ctxt
    set ctxt(currentCmd) {}
    set word {}
    while {1} {
        set new [lassign $ctxt(rest) char]
        if {$char eq "\n" || $char eq {}} {
            if {[llength $ctxt(currentCmd)] > 0} {
                 lappend ctxt(parsed) [list cmd {*}$ctxt(currentCmd)]
            }
            set ctxt(rest) $new
            return
        }
        dropSpace ctxt
        handleContinuation ctxt
        tokenizeWord ctxt
    }
}

proc tokenizeQuoteWord {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    set word {}
    while {1} {
        handleContinuation ctxt
        set ctxt(rest) [lassign $ctxt(rest) char]
        append word $char
        if {$char eq "\\"} {
            set ctxt(rest) [lassign $ctxt(rest) char]
            append word $char
            continue 
        }
        if {$char eq "\""} {
            break
        }
    }
    return $word
}

proc tokenizeBraceWord {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    set word {}
    while {1} {
        handleContinuation ctxt
        set ctxt(rest) [lassign $ctxt(rest) char]
        append word $char
        if {$char eq "\\"} {
            set ctxt(rest) [lassign $ctxt(rest) char]
            append word $char
            continue 
        }
        if {$char eq "\{"} {
            append word [tokenizeBraceWord ctxt]
            continue 
        }
        if {$char eq "\}"} {
            break
        }
    }
    return $word
}

proc tokenizeSubstWord {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    set word {}
    while {1} {
        handleContinuation ctxt
        set ctxt(rest) [lassign $ctxt(rest) char]
        append word $char
        if {$char eq "\\"} {
            set ctxt(rest) [lassign $ctxt(rest) char]
            append word $char
            continue 
        }
        if {$char eq "\["} {
            append word [tokenizeSubstWord ctxt]
            continue 
        }
        if {$char eq "\]"} {
            break
        }
        
    }
    return $word
}

proc tokenizeBareWord {ctxtVar} {
    #puts [lindex [info level 0] 0]
    upvar $ctxtVar ctxt
    set word {}
    while {1} {
        handleContinuation ctxt
        set ctxt(rest) [lassign $ctxt(rest) char]
        append word $char
        if {$char eq "\\"} {
            set ctxt(rest) [lassign $ctxt(rest) char]
            append word $char
            continue 
        }
        if {$char eq "\["} {
            append word [tokenizeSubstWord ctxt]
            continue 
        }
        if {[string is space $char]} {
            break
        }
    }
    return [string trimright $word]
}