Posted to tcl by colin at Wed Jan 05 07:00:03 GMT 2011view pretty

# zen - parse and manipulate zencode

package provide zen 1.0

if {[catch {package require Debug}]} {
    #proc Debug.zen {args} {}
    proc Debug.zen {args} {puts stderr zen@[uplevel subst $args]}
} else {
    Debug define zen 10
}

oo::class create Zen {
    method token {string {type wordchar}} {
	set failed 0
	set match [string is $type -strict -failindex failed $string]
	Debug.zen {token: string is $type until: $failed over '$string'}
	if {$match} {
	    return [list $string ""]	;# whole string matches
	} elseif {$failed} {
	    return [list [string range $string 0 $failed-1] [string range $string $failed end]]
	} else {
	    return [list "" $string]
	}
    }

    method tokenize {match_var rest_var {type wordchar}} {
	upvar 1 $match_var match
	upvar 1 $rest_var rest

	lassign [my token $rest $type] match rest
	set match [string trim $match]
	set rest [string trim $rest]

	Debug.zen {tokenize $type: '$match' '[string range $rest 0 10]'}
	return [expr {$match ne ""}]
    }

    method punct {rest_var} {
	upvar 1 $rest_var rest
	set rest [string trim $rest]
	set punct [string index $rest 0]
	Debug.zen {is '$punct' punct?}
	if {$punct in {. # * \[ | > + (}} {
	    set rest [string range $rest 1 end]
	    Debug.zen {punct '$punct' '[string range $rest 0 10]'}
	    return $punct
	} else {
	    Debug.zen {punct failed over '$rest'}
	    return ""
	}
    }

    method upto {what match_var rest_var} {
	upvar 1 $match_var match
	upvar 1 $rest_var rest

	set index [string first $what $rest]
	if {$index < 0} {
	    # no match
	    return 0
	} else {
	    set match [string trim [string range $rest 0 $index-1]]
	    set rest [string trim [string range $rest $index+1 end]]
	    return 1
	}
    }

    method compound {rest_var} {
	upvar 1 $rest_var rest
	set $rest [string trim $rest]
	Debug.zen {compound '$rest'}
	if {$rest eq ""} {
	    return ""
	}

	# look for leading punctuation
	set punct [my punct rest]
	Debug.zen {compound punct: '$punct'}

	switch -- $punct {
	    "" {
		error "Can't parse '$rest' - no copula or compound punctuation"
	    }
	    . {
		# class
		if {![my tokenize class rest]} {
		    error "trailing '.' with no identifier"
		}
		return [list class $class]
	    }

	    # {
		# id
		if {![my tokenize id rest]} {
		    error "trailing '#' with no identifier"
		}
		return [list id $id]
	    }

	    * {
		# multiplier
		if {[my tokenize mult rest integer]} {
		    return [list mult $mult]
		} else {
		    return [list mult ""]
		}
	    }

	    \[ {
		# attribute
		if {[my upto \] match rest]} {
		    return [list attr $match]
		} else {
		    error "no matching \] parsing '$rest'"
		}
	    }

	    | -
	    > -
	    + {
		# connector - not compound
		set rest ${punct}$rest
		return ""
	    }

	    \( {
		error "misplaced '(' in $rest"
	    }
	}
    }

    method compounding {rest_var} {
	upvar 1 $rest_var rest
	set rest [string trim $rest]

	Debug.zen {compounding '$rest'}
	if {$rest eq ""} {
	    return ""
	}

	set result {}
	while {1} {
	    set compound [my compound rest]
	    if {$compound eq ""} break
	    lappend result {*}$compound
	}

	Debug.zen {compounded '$result' remaining '$rest'}
	return $result
    }

    method id {rest_var} {
	upvar 1 $rest_var rest
	set rest [string trim $rest]
	Debug.zen {looking for id in: '$rest'}
	if {$rest eq ""} {
	    return ""
	}

	# look for leading word
	my tokenize word rest
	if {$word ne ""} {
	    Debug.zen {leading word: '$word'}
	    set result [list word $word {*}[my compounding rest]]
	    Debug.zen {id is: '$result' remaining: $rest}
	    return $result
	} else {
	    # look for id punctuation
	    set punct [my punct rest]
	    Debug.zen {id punct: '$punct'}

	    switch -- $punct {
		. {
		    return [list default . {*}[my compounding rest]]
		}
		\# {
		    return [list default \# {*}[my compounding rest]]
		}

		* {
		    return [list default * {*}[my compounding rest]]
		}

		\( {
		    return [my copula rest]
		}

		> -
		+ -
		\[ -
		\| {
		    error "naked '$punct' in '$rest'.  Expecting an identifier"
		}

		default {
		    error "unknown punctuation '$punct' in '$rest'"
		}
	    }
	}
    }

    # copula - having found an id/subexpr on the left,
    # find a copula (+,>) and an id on the right
    method copula {rest_var} {
	upvar 1 $rest_var rest
	set rest [string trim $rest]
	Debug.zen {looking for copula in '$rest'}
	if {$rest eq ""} {
	    return ""
	}

	# look for leading punctuation
	set punct [my punct rest]
	Debug.zen {copula punct: '$punct'}
	switch -- $punct {
	    > {
		return child
	    }

	    + {
		return sib
	    }

	    default {
		error "unknown punctuation '$punct' in '$rest'"
	    }
	}
    }

    method parser {rest_var} {
	upvar 1 $rest_var rest
	set rest [string trim $rest]
	Debug.zen {parser over: rest: '$rest'}
	if {$rest eq ""} {
	    return ""
	}
	
	# get lhs id/phrase
	set result [list [my id rest]]
	Debug.zen {parse lhs: '$result', rest: '$rest'}

	while {$rest ne ""} {
	    Debug.zen {parse looking for copula in '$rest'}
	    set copula [my copula rest]
	    Debug.zen {parse copula: '$copula', rest: '$rest'}

	    switch -- $copula {
		child -
		sib {
		    # get rhs id/phrase
		    set rhs [my id rest]
		    Debug.zen {parsed $copula rhs: '$rhs', rest: '$rest'}

		    lappend result $copula $rhs
		}

		"" {
		    Debug.zen {parsed: $result}
		    return $result
		}

		default {
		    error "unknown copula '$copyla'"
		}
	    }
	}

	Debug.zen {completed: $result}
	return $result
    }

    method parse {rest} {
	set result [my parser rest]
	Debug.zen {parse intermediate: '$result'}
	set cmd [list \[my generate]
	set level 1
	foreach {el op} $result {
	    if {$op eq "child"} {
		if {[dict exists $el mult]} {
		    set mult [dict get $el mult]
		    dict unset el mult
		    lappend cmd \[my child \[mult $mult \[my $el\]
		    incr level
		} else {
		    lappend cmd \[my child \[my $el\]
		}
		incr level
	    } else {
		if {[dict exists $el mult]} {
		    set mult [dict get $el mult]
		    dict unset el mult
		    lappend cmd \[my mult $mult \[my $el\]\]
		} else {
		    lappend cmd \[my $el\]
		}
	    }
	}
	set cmd [join $cmd]
	append cmd [string repeat \] $level]
	return $cmd
    }

    destructor {}
    constructor {args} {
	variable {*}$args
    }
}

if {[info exists argv0] && ($argv0 eq [info script])} {
    package require tcltest
    namespace import ::tcltest::*

    variable SETUP {Zen create zen}
    variable CLEANUP {zen destroy}

    test complex-1 {} -setup $SETUP -body {
	zen parse {div#page>div.logo+ul#navigation>li*5>a}
    } -cleanup $CLEANUP -result {[my generate [my child [my word div id page] [my word div class logo] [my child [my word ul id navigation] [my child [mult 5 [my word li] [my word a]]]]]]}

    set count 0
    foreach {from to} {
	div#name {[my generate [my word div id name]]}
	div.class {[my generate [my word div class class]]}
	div.one.two {[my generate [my word div class one class two]]}
	div#name.one.two {[my generate [my word div id name class one class two]]}
	head>link {[my generate [my child [my word head] [my word link]]]}
	table>tr>td {[my generate [my child [my word table] [my child [my word tr] [my word td]]]]}
	ul#name>li.item {[my generate [my child [my word ul id name] [my word li class item]]]}
	p+p {[my generate [my word p] [my word p]]}
	div#name>p.one+p.two {[my generate [my child [my word div id name] [my word p class one] [my word p class two]]]}
	p[title] {[my generate [my word p attr title]]}
	td[colspan=2] {[my generate [my word td attr colspan=2]]}
	{span[title="Hello" rel]} {[my generate [my word span attr {title="Hello" rel}]]}
	p.title|e .
	p*3 {[my generate [my mult 3 [my word p]]]}
	ul#name>li.item*3 {[my generate [my child [my word ul id name] [my mult 3 [my word li class item]]]]}
	p.name-$*3 .
	select>option#item-$*3 .
	ul+ .
	table+ .
    } {
	incr count
	test simple-$count {} -setup $SETUP -body [list zen parse $from] -cleanup $CLEANUP -result $to
    }

    # To see test statistics (Total/Passed/Skipped/Failed), best put this line in the end:
    cleanupTests
}