Posted to tcl by colin at Tue Feb 19 07:14:08 GMT 2013view pretty
#!/usr/bin/env tclsh #lappend ::auto_path [pwd] package require itcl package require ylex package require yeti # # Set up the macros and tokenizing rules for CSS # # http://www.w3.org/TR/CSS21/grammar.html # namespace eval css { proc MkScanner {} { variable sg [yeti::ylex #auto -name ::css::Scanner] $sg configure -case 0 $sg macro h {[0-9a-f]} $sg macro nl {\n|\r\n|\r|\f} $sg macro s {[ \t\r\n\f]+} $sg macro w {<s>?} $sg macro nonascii {[\240-\377]} $sg macro unicode {\\<h>{1,6}(\r\n|[ \t\r\n\f])?} $sg macro escape {<unicode>|\\[^\r\n\f0-9a-f]} $sg macro nmstart {[_a-z]|<nonascii>|<escape>} $sg macro nmchar {[_a-z0-9-]|<nonascii>|<escape>} $sg macro ident {-?<nmstart><nmchar>*} $sg macro string1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\"} $sg macro string2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\'} $sg macro string {<string1>|<string2>} $sg macro badstring1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\\?} $sg macro badstring2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\\?} $sg macro badstring {<badstring1>|<badstring2>} $sg macro badcomment1 {\/\*[^*]*\*+([^/*][^*]*\*+)*} $sg macro badcomment2 {\/\*[^*]*(\*+[^/*][^*]*)*} $sg macro badcomment {<badcomment1>|<badcomment2>} $sg macro baduri1 {url\(<w>([!#$%&*-\[\]-~]|<nonascii>|<escape>)*<w>} $sg macro baduri2 {url\(<w><string><w>} $sg macro baduri3 {url\(<w><badstring>} $sg macro baduri {<baduri1>|<baduri2>|<baduri3>} $sg macro comment {\/\*[^*]*\*+([^/*][^*]*\*+)*\/} $sg macro name {<nmchar>+} $sg macro num {[0-9]+|[0-9]*\.[0-9]+} $sg macro url {([!#$%&*-~]|<nonascii>|<escape>)*} $sg macro meta {[$]<ident>} $sg macro A {a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?} $sg macro C {c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?} $sg macro D {d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?} $sg macro E {e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?} $sg macro G {g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g} $sg macro H {h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h} $sg macro I {i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i} $sg macro K {k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k} $sg macro L {l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l} $sg macro M {m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m} $sg macro N {n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n} $sg macro O {o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o} $sg macro P {p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p} $sg macro R {r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r} $sg macro S {s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s} $sg macro T {t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t} $sg macro U {u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\u} $sg macro X {x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x} $sg macro Z {z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z} $sg add {<s>} {return S;} $sg add {\/\*[^*]*\*+([^/*][^*]*\*+)*\/} {}; # ignore comments $sg add {<badcomment>} {}; # unclosed comment at EOF $sg add {<!--} { return CDO } $sg add -- {-->} { return CDC } $sg add {~=} { return INCLUDES } $sg add {\|=} { return DASHMATCH } $sg add {<string>} { return [list STRING $yytext] } $sg add {<badstring>} { return BAD_STRING } $sg add {<ident>} { return [list IDENT $yytext] } $sg add {\#<name>} { return [list HASH $yytext] } $sg add {@<I><M><P><O><R><T>} { return IMPORT_SYM } $sg add {@<P><A><G><E>} { return PAGE_SYM } $sg add {@<M><E><D><I><A>} { return MEDIA_SYM } $sg add {@charset } { return CHARSET_SYM } $sg add {!(<w>|<comment>)*<I><M><P><O><R><T><A><N><T>} { return [list IMPORTANT_SYM $yytext] } $sg add {<num><E><M>} { return [list EMS $yytext] } $sg add {<num><E><X>} { return [list EXS $yytext] } $sg add {<num><P><X>} { return [list LENGTH $yytext] } $sg add {<num><C><M>} { return [list LENGTH $yytext] } $sg add {<num><M><M>} { return [list LENGTH $yytext] } $sg add {<num><I><N>} { return [list LENGTH $yytext] } $sg add {<num><P><T>} { return [list LENGTH $yytext] } $sg add {<num><P><C>} { return [list LENGTH $yytext] } $sg add {<num><D><E><G>} { return [list ANGLE $yytext] } $sg add {<num><R><A><D>} { return [list ANGLE $yytext] } $sg add {<num><G><R><A><D>} { return [list ANGLE $yytext] } $sg add {<num><M><S>} { return [list TIME $yytext] } $sg add {<num><S>} { return [list TIME $yytext] } $sg add {<num><H><Z>} { return [list FREQ $yytext] } $sg add {<num><K><H><Z>} { return [list FREQ $yytext] } $sg add {<num><ident>} { return [list DIMENSION $yytext] } $sg add {<num>%} { return [list PERCENTAGE $yytext] } $sg add {<num>} { return [list NUMBER $yytext] } $sg add {url\(<w><string><w>\)} { return [list URI $yytext] } $sg add {url\(<w><url><w>\)} { return [list URI $yytext] } $sg add {<baduri>} { return BAD_URI } $sg add {(<ident>)\(} { return [list FUNCTION $1] } $sg add {\{} { return LBRACE } $sg add {\}} { return RBRACE } $sg add {\(} { return LPAREN } $sg add {\)} { return RPAREN } $sg add {\[} { return LBRACKET } $sg add {\]} { return RBRACKET } $sg add {\*} { return STAR } $sg add {;+} { return ";" } $sg add {.} { return $yytext } } proc MkParser {} { # # Set up the parsing rules # variable pg [yeti::yeti #auto -name ::css::Parser] $pg add start { stylesheet } { return $1 } # stylesheet # : [ CHARSET_SYM STRING ';' ]? # [S|CDO|CDC]* [ import [ CDO S* | CDC S* ]* ]* # [ [ ruleset | media | page ] [ CDO S* | CDC S* ]* ]* # ; $pg add { stylesheet { CHARSET_SYM STRING ; stylesheet } { return [concat [list charset $2] $4] } | { ruleset stylesheet } { return [concat [list ruleset $1] $2] } | { import stylesheet } { return [concat [list import $1] $2] } | { media stylesheet } { return [list media $1 $2] } | { page stylesheet } { return [concat [list page $1] $2] } | { S* stylesheet } { return $2 } | {} - } # import # : IMPORT_SYM S* # [STRING|URI] S* media_list? ';' S* # ; $pg add { import { IMPORT_SYM S* STRING S* media_list ; S* } { return [list $3 $5] } | { IMPORT_SYM S* STRING S* ; S* } { return $3 } | { IMPORT_SYM S* URI S* media_list ; S* } { return [list $3 $5] } | { IMPORT_SYM S* URI S* ; S* } { return $3 } } # media # : MEDIA_SYM S* media_list '{' S* ruleset* '}' S* # ; $pg add media { MEDIA_SYM S* media_list LBRACE S* ruleset_seq RBRACE S* } { return [list $3 $6] } $pg add { ruleset_seq { ruleset ruleset_seq } { return [concat [list ruleset $1] $2] } | { ruleset } { return $1 } | {} - } # media_list # : medium [ COMMA S* medium]* # ; $pg add { media_list { medium , S* media_list } { return [concat $1 {*}$4] } | { medium } { return $1 } } # medium # : IDENT S* # ; $pg add medium { IDENT S* } { return $1 } # page # : PAGE_SYM S* pseudo_page? # '{' S* declaration? [ ';' S* declaration? ]* '}' S* # ; $pg add { page { PAGE_SYM S* pseudo_page LBRACE S* declaration_seq RBRACE S* } { return [list $3 $6] } | { PAGE_SYM S* LBRACE S* declaration_seq RBRACE S* } { return [list $2 $5] } } # pseudo_page # : ':' IDENT S* # ; $pg add pseudo_page { : IDENT S* } { return :$2 } # operator # : '/' S* | ',' S* # ; $pg add { operator { / S* } { return "/" } | { , S* } { return "," } | { = S* } { return "=" } } # combinator # : '+' S* # | '>' S* # ; $pg add { combinator { + S* } { return "+" } | { > S* } { return ">" } } # unary_operator # : '-' | '+' # ; $pg add { unary_operator { - } { return "-" } | { + } { return "+" } } # property # : IDENT S* # ; $pg add property { IDENT S* } { return $1 } # ruleset # : selector [ ',' S* selector ]* # '{' S* declaration? [ ';' S* declaration? ]* '}' S* # ; $pg add ruleset { selector_seq S* LBRACE S* declaration_seq RBRACE S* } { return [list $1 $5] } $pg add { selector_seq { selector , S* selector_seq } { return [list $1 {*}$4] } | { selector } { return [list $1] } | {} - } $pg add { declaration_seq { declaration ; S* declaration_seq } { return [list $1 {*}$4] } | { declaration } { return $1 } | {} - } # selector # : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? # ; $pg add { selector { simple_selector } { return $1 } | { simple_selector combinator selector } { return [concat $1 $2 {*}$3] } | { simple_selector S S* combinator selector } { return [concat $1 $4 {*}$5] } | { simple_selector S S* selector } { return [concat $1 {*}$4] } | {} - } # simple_selector # : element_name [ HASH | class | attrib | pseudo ]* # | [ HASH | class | attrib | pseudo ]+ # ; $pg add { simple_selector { element_name selector_sub } { return [join [list $1 {*}$2] ""] } | { element_name } { return $1 } | { selector_sub } { return $1 } } $pg add { selector_sub { HASH selector_sub } { return [concat $1 {*}$2] } | { class selector_sub } { return [concat $1 {*}$2] } | { attrib selector_sub } { return [concat $1 {*}$2] } | { pseudo selector_sub } { return [concat $1 {*}$2] } | { HASH } { return $1 } | { class } { return $1 } | { attrib } { return $1 } | { pseudo } { return $1 } } # class # : '.' IDENT # ; $pg add class { . IDENT } { return .$2 } # element_name # : IDENT | '*' # ; $pg add { element_name { IDENT } { return $1 } | { STAR } { return $1 } } # attrib # : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* # [ IDENT | STRING ] S* ]? ']' # ; $pg add { attrib { LBRACKET S* IDENT S* RBRACKET } { return $3 } | { LBRACKET S* IDENT S* = S* IDENT S* RBRACKET } { return [list $3 = $7] } | { LBRACKET S* IDENT S* = S* STRING S* RBRACKET } { return [list $3 = $7] } | { LBRACKET S* IDENT S* INCLUDES S* IDENT S* RBRACKET } { return [list $3 $5 $7] } | { LBRACKET S* IDENT S* INCLUDES S* STRING S* RBRACKET } { return [list $3 $5 $7] } | { LBRACKET S* IDENT S* DASHMATCH S* IDENT S* RBRACKET } { return [list $3 $5 $7] } | { LBRACKET S* IDENT S* DASHMATCH S* STRING S* RBRACKET } { return [list $3 $5 $7] } } # pseudo # : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] # ; $pg add { pseudo { : IDENT } { return :$2 } | { : FUNCTION S* RPAREN } { return :$2 } | { : FUNCTION S* IDENT S* RPARENT } { return [list :$2 $4] } } # declaration # : property ':' S* expr prio? # ; $pg add { declaration { property : S* expr } { return [list $1 $4] } | { property : S* expr prio } { return [list $1 $4 $5] } } # prio # : IMPORTANT_SYM S* # ; $pg add prio { IMPORTANT_SYM S* } { return $1 } # expr # : term [ operator? term ]* # ; $pg add { expr { term } { return $1 } | { term expr } { return [list $1 {*}$2] } | { term operator expr } { return [list $1 $2 {*}$3] } } # term # : unary_operator? # [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | # TIME S* | FREQ S* ] # | STRING S* | IDENT S* | URI S* | hexcolor | function # ; $pg add { term { unary_operator term_numeric } { return $1$2 } | { term_numeric } { return $1 } | { STRING S* } { return $1 } | { IDENT S* } { return $1 } | { URI S* } { return $1 } | { hexcolor } { return $1 } | { function } { return $1 } } $pg add { term_numeric { NUMBER S* } { return $1 } | { PERCENTAGE S* } { return $1 } | { LENGTH S* } { return $1 } | { EMS S* } { return $1 } | { EXS S* } { return $1 } | { ANGLE S* } { return $1 } | { TIME S* } { return $1 } | { FREQ S* } { return $1 } } # function # : FUNCTION S* expr ')' S* # ; $pg add function { FUNCTION S* expr RPAREN S* } { return [list $1 $3] } # /* # * There is a constraint on the color that it must # * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F]) # * after the "#"; e.g., "#000" is OK, but "#abcd" is not. # */ # hexcolor # : HASH S* # ; $pg add hexcolor { HASH S* } { return $1 } $pg add { S* { S S* } { return S } | { S } { return S } | {} {} } #$pg configure -verbose 3 } proc Timer {} { lappend result MkScanner [time {MkScanner; ::css::Scanner destroy}] lappend result MkParser [time {MkParser; ::css::Parser destroy}] variable sg; lappend result SG [time {eval [$sg dump]}] variable pg; lappend result PG [time {eval [$pg dump]}] return $result } # # Now, build and eval the scanner and parser # MkScanner; eval [$sg dump] MkParser; eval [$pg dump] # # identity - turn parse tree into CSS # proc identity { css { indent 0 } } { set in0 [string repeat " " $indent] set in1 [string repeat " " [expr {$indent+2}]] foreach { type block } $css { switch $type { ruleset { lassign $block name rules puts "$in0[join $name ", "] \{ " foreach rule $rules { lassign $rule name value puts "$in1$name: $value;" } puts "$in0\}" } media { lassign $block type rules puts "$in0@media $type \{" ppcss $rules 2 puts "$in0\}" } } } } Scanner ::css::scanner Parser ::css::parser -scanner ::css::scanner proc parse {text} { scanner start $text parser reset return [parser parse] } namespace export -clear * namespace ensemble create -subcommands {} } # # And, the test code.. # if {[info exists argv0] && [file normalize $argv0] eq [file normalize [info script]]} { # # .. this is a simple pretty-printer for lists # proc pplist { l { indent 0 } } { if { !$indent } { puts "\{" pplist $l [expr {$indent+1}] puts "\}" } else { foreach v $l { set i [string repeat " " $indent] if { [llength $v] <= 1 } { puts "$i$v" } else { puts "$i\{" pplist $v [expr {$indent+1}] puts "$i\}" } } } } if {[llength $argv] == 0} { puts "Usage: $argv0 (demo|dump|parse)" puts "" puts "The \"demo\" mode runs the parser on a small test-case. The \"dump\"" puts "mode dumps out the lexer and scanner code. The \"parse\" mode runs" puts "the parser on stdin." } else { set demo { @charset "utf-8"; body, h1 { margin: 0px; font-family: "URW Palladio L", Palatino, "Times New Roman", Times, serif; color: #000; } input { background-color: #111; filter:alpha(opacity=40); } .button:moop, .button:hover { border: solid thin red !important; } .button img { float: left ; } .button img.nopad { padding: 0px; } } switch [lindex $argv 0] { demo { # # Demo mode - use a simple built-in test case # # # Run the scanner # puts [::css parse $demo] } time { puts PARSE:[time {::css parse $demo}] puts Scanner:[time {::css::Scanner #auto}] puts Parser:[time {::css::Parser #auto -scanner ::css::scanner}] puts [::css::Timer] } dump { # # Dump the scanner and lexer code # puts [::css::Scanner dump] puts [::css::Parser dump] } parse { # # Parse on stdin # while { ![eof stdin] } { append demo [gets stdin] } # # Run the scanner # pplist [css parse $demo] } } } }