Posted to tcl by aspect at Wed Oct 08 04:04:37 GMT 2014view pretty

# thematically, this belongs on http://wiki.tcl.tk/21701 which inspired it
# but I am too deeply troubled by the renaming to edit directly there.

# splits a Tcl word into its constituent parts, where each part is either:
#   a variable substitution beginning with $
#   a command substitution beginning with [ and ending with ]
#   a literal
proc wordSplit {word} {
    set parts {}
    set idx 0
    while {$idx < [string length $word]} {
        set c [string index $word $idx]
        switch -exact $c {
            \\ {
                ;#debug log {\\ substitution starting at $idx}
                if {[regexp -expanded {
                        ^([\\]
                            (?:
                                (?:[0-7]{,3})|          # octal byte
                                (?:x[0-9a-fA-F]{1,2})|  # hex byte
                                (?:u[0-9a-fA-F]{1,4})|  # unicode
                                (?:U[0-9a-fA-F]{1,8})|  # wide unicode -- NOTE
                                (?:\n\s*)|              # newline plus whitespace
                                [abfnrtv]|              # control
                                .                       # anything else is literal
                            )
                        )
                } [string range $word $idx end] -> match]} {
                    lappend parts $match
                    incr idx [string length $match]
                    #set match [subst $match]    ;# let Tcl figure it out
                } else {
                    error "Don't know how to parse this backslash substitution.  This is bad."
                }
            }
            \$ {
                ;#debug log {$ substitution starting at $idx}
                if {[regexp -expanded {
                        ^(\$
                            (?:
                                \{[^\}]*\}|     # braced var ref
                                (?:             # unbraced var ref
                                    (?:
                                        [a-zA-Z0-9_]+|  # name part
                                        ::+             # namespace separator
                                    )*
                                )(\(?)          # array ref?
                            )
                        )
                } [string range $word $idx end] -> match array]} {
                    if {$array ne ""} {
                        set rest [string range $word $idx end]
                        set match ""
                        foreach part [split $rest ")"] {
                            append match $part
                            if {[info complete $match\)]} {
                                break
                                #regexp {(\\*)$} $match -> bs  ;# count trailing backslashes
                                #if {[string length $bs] % 2 == 0} break
                            }
                            append match ")"
                        }
                        if {[string index $word [expr {$idx+[string length $match]}]] ne ")"} {
                            error "Expecting ) at $idx, token started with \"[string range $match 0 20]...\""
                        }
                        append match ")"
                    }
                    lappend parts $match
                    incr idx [string length $match]
                } else {
                    error "Error parsing variable substitution.  This is bad."
                }
            }
            \[ {
                ;#debug log {\[ substitution starting at $idx}
                incr idx 1
                set rest [string range $word $idx end]
                set match {}
                foreach part [split $rest "]"] {
                    append match $part
                    if {[info complete $match]} {
                        regexp {(\\*)$} $match -> bs  ;# count trailing backslashes
                        if {[string length $bs] % 2 == 0} break
                    }
                    append match "]"
                }
                incr idx [string length $match]
                if {[string index $word $idx] ne "]"} {
                    error "Expecting \] at $idx, command started with \"[string range $match 0 20]...\""
                }
                incr idx
                lappend parts "\[$match\]"
            }
            default {
                ;#debug log {verbatim substitution starting at $idx}
                # no substitutions:
                if {[regexp -expanded {^([^\\\[\$]*)} [string range $word $idx end] -> match]} {
                    lappend parts $match
                    incr idx [string length $match]
                }
            }
        }
    }
    return $parts
}