Posted to tcl by colin at Sun Feb 17 22:10:33 GMT 2013view raw
- #!/usr/bin/env tclsh
- package require itcl
- package require ylex
- package require yeti
- #
- # Set up the macros and tokenizing rules for CSS
- #
- # http://www.w3.org/TR/CSS21/grammar.html
- #
- namespace eval css {
- variable sg [yeti::ylex #auto -name ::css::Scanner]
- $sg configure -case 0
- $sg macro h {[0-9a-f]}
- $sg macro nl {\n|\r\n|\r|\f}
- $sg macro s {[ \t\r\n\f]+}
- $sg macro w {<s>?}
- $sg macro nonascii {[\240-\377]}
- $sg macro unicode {\\<h>{1,6}(\r\n|[ \t\r\n\f])?}
- $sg macro escape {<unicode>|\\[^\r\n\f0-9a-f]}
- $sg macro nmstart {[_a-z]|<nonascii>|<escape>}
- $sg macro nmchar {[_a-z0-9-]|<nonascii>|<escape>}
- $sg macro string1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\"}
- $sg macro string2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\'}
- $sg macro string {<string1>|<string2>}
- $sg macro badstring1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\\?}
- $sg macro badstring2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\\?}
- $sg macro badstring {<badstring1>|<badstring2>}
- $sg macro badcomment1 {\/\*[^*]*\*+([^/*][^*]*\*+)*}
- $sg macro badcomment2 {\/\*[^*]*(\*+[^/*][^*]*)*}
- $sg macro badcomment {<badcomment1>|<badcomment2>}
- $sg macro baduri1 {url\(<w>([!#$%&*-\[\]-~]|<nonascii>|<escape>)*<w>}
- $sg macro baduri2 {url\(<w><string><w>}
- $sg macro baduri3 {url\(<w><badstring>}
- $sg macro baduri {<baduri1>|<baduri2>|<baduri3>}
- $sg macro comment {\/\*[^*]*\*+([^/*][^*]*\*+)*\/}
- $sg macro ident {-?<nmstart><nmchar>*}
- $sg macro name {<nmchar>+}
- $sg macro num {[0-9]+|[0-9]*\.[0-9]+}
- $sg macro url {([!#$%&*-~]|<nonascii>|<escape>)*}
- $sg macro A {a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?}
- $sg macro C {c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?}
- $sg macro D {d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?}
- $sg macro E {e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?}
- $sg macro G {g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g}
- $sg macro H {h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h}
- $sg macro I {i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i}
- $sg macro K {k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k}
- $sg macro L {l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l}
- $sg macro M {m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m}
- $sg macro N {n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n}
- $sg macro O {o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o}
- $sg macro P {p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p}
- $sg macro R {r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r}
- $sg macro S {s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s}
- $sg macro T {t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t}
- $sg macro U {u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\u}
- $sg macro X {x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x}
- $sg macro Z {z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z}
- $sg add {<s>} {return S;}
- $sg add {\/\*[^*]*\*+([^/*][^*]*\*+)*\/} {}; # ignore comments
- $sg add {<badcomment>} {}; # unclosed comment at EOF
- $sg add {<!--} { return CDO }
- $sg add -- {-->} { return CDC }
- $sg add {~=} { return INCLUDES }
- $sg add {\|=} { return DASHMATCH }
- $sg add {<string>} { return [list STRING $yytext] }
- $sg add {<badstring>} { return BAD_STRING }
- $sg add {<ident>} { return [list IDENT $yytext] }
- $sg add {\#<name>} { return [list HASH $yytext] }
- $sg add {@<I><M><P><O><R><T>} { return IMPORT_SYM }
- $sg add {@<P><A><G><E>} { return PAGE_SYM }
- $sg add {@<M><E><D><I><A>} { return MEDIA_SYM }
- $sg add {@charset } { return CHARSET_SYM }
- $sg add {!(<w>|<comment>)*<I><M><P><O><R><T><A><N><T>} { return [list IMPORTANT_SYM $yytext] }
- $sg add {<num><E><M>} { return [list EMS $yytext] }
- $sg add {<num><E><X>} { return [list EXS $yytext] }
- $sg add {<num><P><X>} { return [list LENGTH $yytext] }
- $sg add {<num><C><M>} { return [list LENGTH $yytext] }
- $sg add {<num><M><M>} { return [list LENGTH $yytext] }
- $sg add {<num><I><N>} { return [list LENGTH $yytext] }
- $sg add {<num><P><T>} { return [list LENGTH $yytext] }
- $sg add {<num><P><C>} { return [list LENGTH $yytext] }
- $sg add {<num><D><E><G>} { return [list ANGLE $yytext] }
- $sg add {<num><R><A><D>} { return [list ANGLE $yytext] }
- $sg add {<num><G><R><A><D>} { return [list ANGLE $yytext] }
- $sg add {<num><M><S>} { return [list TIME $yytext] }
- $sg add {<num><S>} { return [list TIME $yytext] }
- $sg add {<num><H><Z>} { return [list FREQ $yytext] }
- $sg add {<num><K><H><Z>} { return [list FREQ $yytext] }
- $sg add {<num><ident>} { return [list DIMENSION $yytext] }
- $sg add {<num>%} { return [list PERCENTAGE $yytext] }
- $sg add {<num>} { return [list NUMBER $yytext] }
- $sg add {url\(<w><string><w>\)} { return [list URI $yytext] }
- $sg add {url\(<w><url><w>\)} { return [list URI $yytext] }
- $sg add {<baduri>} { return BAD_URI }
- $sg add {(<ident>)\(} { return [list FUNCTION $1] }
- $sg add {\{} { return LBRACE }
- $sg add {\}} { return RBRACE }
- $sg add {\(} { return LPAREN }
- $sg add {\)} { return RPAREN }
- $sg add {\[} { return LBRACKET }
- $sg add {\]} { return RBRACKET }
- $sg add {\*} { return STAR }
- $sg add {;+} { return ";" }
- $sg add {.} { return $yytext }
- #
- # Set up the parsing rules
- #
- variable pg [yeti::yeti #auto -name ::css::Parser]
- $pg add start { stylesheet } { return $1 }
- # stylesheet
- # : [ CHARSET_SYM STRING ';' ]?
- # [S|CDO|CDC]* [ import [ CDO S* | CDC S* ]* ]*
- # [ [ ruleset | media | page ] [ CDO S* | CDC S* ]* ]*
- # ;
- $pg add {
- stylesheet { CHARSET_SYM STRING ; stylesheet } { return [concat [list charset $2] $4] }
- | { ruleset stylesheet } { return [concat [list ruleset $1] $2] }
- | { import stylesheet } { return [concat [list import $1] $2] }
- | { media stylesheet } { return [list media $1 $2] }
- | { page stylesheet } { return [concat [list page $1] $2] }
- | { S* stylesheet } { return $2 }
- | {} -
- }
- # import
- # : IMPORT_SYM S*
- # [STRING|URI] S* media_list? ';' S*
- # ;
- $pg add {
- import { IMPORT_SYM S* STRING S* media_list ; S* } { return [list $3 $5] }
- | { IMPORT_SYM S* STRING S* ; S* } { return $3 }
- | { IMPORT_SYM S* URI S* media_list ; S* } { return [list $3 $5] }
- | { IMPORT_SYM S* URI S* ; S* } { return $3 }
- }
- # media
- # : MEDIA_SYM S* media_list '{' S* ruleset* '}' S*
- # ;
- $pg add media { MEDIA_SYM S* media_list LBRACE S* ruleset_seq RBRACE S* } { return [list $3 $6] }
- $pg add {
- ruleset_seq { ruleset ruleset_seq } { return [concat [list ruleset $1] $2] }
- | { ruleset } { return $1 }
- | {} -
- }
- # media_list
- # : medium [ COMMA S* medium]*
- # ;
- $pg add {
- media_list { medium , S* media_list } { return [concat $1 {*}$4] }
- | { medium } { return $1 }
- }
- # medium
- # : IDENT S*
- # ;
- $pg add medium { IDENT S* } { return $1 }
- # page
- # : PAGE_SYM S* pseudo_page?
- # '{' S* declaration? [ ';' S* declaration? ]* '}' S*
- # ;
- $pg add {
- page { PAGE_SYM S* pseudo_page LBRACE S* declaration_seq RBRACE S* } { return [list $3 $6] }
- | { PAGE_SYM S* LBRACE S* declaration_seq RBRACE S* } { return [list $2 $5] }
- }
- # pseudo_page
- # : ':' IDENT S*
- # ;
- $pg add pseudo_page { : IDENT S* } { return :$2 }
- # operator
- # : '/' S* | ',' S*
- # ;
- $pg add {
- operator { / S* } { return "/" }
- | { , S* } { return "," }
- | { = S* } { return "=" }
- }
- # combinator
- # : '+' S*
- # | '>' S*
- # ;
- $pg add {
- combinator { + S* } { return "+" }
- | { > S* } { return ">" }
- }
- # unary_operator
- # : '-' | '+'
- # ;
- $pg add {
- unary_operator { - } { return "-" }
- | { + } { return "+" }
- }
- # property
- # : IDENT S*
- # ;
- $pg add property { IDENT S* } { return $1 }
- # ruleset
- # : selector [ ',' S* selector ]*
- # '{' S* declaration? [ ';' S* declaration? ]* '}' S*
- # ;
- $pg add ruleset { selector_seq S* LBRACE S* declaration_seq RBRACE S* } { return [list $1 $5] }
- $pg add {
- selector_seq { selector , S* selector_seq } { return [list $1 {*}$4] }
- | { selector } { return [list $1] }
- | {} -
- }
- $pg add {
- declaration_seq { declaration ; S* declaration_seq } { return [list $1 {*}$4] }
- | { declaration } { return $1 }
- | {} -
- }
- # selector
- # : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
- # ;
- $pg add {
- selector { simple_selector } { return $1 }
- | { simple_selector combinator selector } { return [concat $1 $2 {*}$3] }
- | { simple_selector S S* combinator selector } { return [concat $1 $4 {*}$5] }
- | { simple_selector S S* selector } { return [concat $1 {*}$4] }
- | {} -
- }
- # simple_selector
- # : element_name [ HASH | class | attrib | pseudo ]*
- # | [ HASH | class | attrib | pseudo ]+
- # ;
- $pg add {
- simple_selector { element_name selector_sub } { return [join [list $1 {*}$2] ""] }
- | { element_name } { return $1 }
- | { selector_sub } { return $1 }
- }
- $pg add {
- selector_sub { HASH selector_sub } { return [concat $1 {*}$2] }
- | { class selector_sub } { return [concat $1 {*}$2] }
- | { attrib selector_sub } { return [concat $1 {*}$2] }
- | { pseudo selector_sub } { return [concat $1 {*}$2] }
- | { HASH } { return $1 }
- | { class } { return $1 }
- | { attrib } { return $1 }
- | { pseudo } { return $1 }
- }
- # class
- # : '.' IDENT
- # ;
- $pg add class { . IDENT } { return .$2 }
- # element_name
- # : IDENT | '*'
- # ;
- $pg add {
- element_name { IDENT } { return $1 }
- | { STAR } { return $1 }
- }
- # attrib
- # : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
- # [ IDENT | STRING ] S* ]? ']'
- # ;
- $pg add {
- attrib { LBRACKET S* IDENT S* RBRACKET } { return $3 }
- | { LBRACKET S* IDENT S* = S* IDENT S* RBRACKET } { return [list $3 = $7] }
- | { LBRACKET S* IDENT S* = S* STRING S* RBRACKET } { return [list $3 = $7] }
- | { LBRACKET S* IDENT S* INCLUDES S* IDENT S* RBRACKET } { return [list $3 $5 $7] }
- | { LBRACKET S* IDENT S* INCLUDES S* STRING S* RBRACKET } { return [list $3 $5 $7] }
- | { LBRACKET S* IDENT S* DASHMATCH S* IDENT S* RBRACKET } { return [list $3 $5 $7] }
- | { LBRACKET S* IDENT S* DASHMATCH S* STRING S* RBRACKET } { return [list $3 $5 $7] }
- }
- # pseudo
- # : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
- # ;
- $pg add {
- pseudo { : IDENT } { return :$2 }
- | { : FUNCTION S* RPAREN } { return :$2 }
- | { : FUNCTION S* IDENT S* RPARENT } { return [list :$2 $4] }
- }
- # declaration
- # : property ':' S* expr prio?
- # ;
- $pg add {
- declaration { property : S* expr } { return [list $1 $4] }
- | { property : S* expr prio } { return [list $1 $4 $5] }
- }
- # prio
- # : IMPORTANT_SYM S*
- # ;
- $pg add prio { IMPORTANT_SYM S* } { return $1 }
- # expr
- # : term [ operator? term ]*
- # ;
- $pg add {
- expr { term } { return $1 }
- | { term expr } { return [list $1 {*}$2] }
- | { term operator expr } { return [list $1 $2 {*}$3] }
- }
- # term
- # : unary_operator?
- # [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
- # TIME S* | FREQ S* ]
- # | STRING S* | IDENT S* | URI S* | hexcolor | function
- # ;
- $pg add {
- term { unary_operator term_numeric } { return $1$2 }
- | { term_numeric } { return $1 }
- | { STRING S* } { return $1 }
- | { IDENT S* } { return $1 }
- | { URI S* } { return $1 }
- | { hexcolor } { return $1 }
- | { function } { return $1 }
- }
- $pg add {
- term_numeric { NUMBER S* } { return $1 }
- | { PERCENTAGE S* } { return $1 }
- | { LENGTH S* } { return $1 }
- | { EMS S* } { return $1 }
- | { EXS S* } { return $1 }
- | { ANGLE S* } { return $1 }
- | { TIME S* } { return $1 }
- | { FREQ S* } { return $1 }
- }
- # function
- # : FUNCTION S* expr ')' S*
- # ;
- $pg add function { FUNCTION S* expr RPAREN S* } { return [list $1 $3] }
- # /*
- # * There is a constraint on the color that it must
- # * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F])
- # * after the "#"; e.g., "#000" is OK, but "#abcd" is not.
- # */
- # hexcolor
- # : HASH S*
- # ;
- $pg add hexcolor { HASH S* } { return $1 }
- $pg add {
- S* { S S* } { return S }
- | { S } { return S }
- | {} {}
- }
- #
- # Now, build and eval the scanner and parser
- #
- eval [$sg dump]
- eval [$pg dump]
- #
- # identity - turn parse tree into CSS
- #
- proc identity { css { indent 0 } } {
- set in0 [string repeat " " $indent]
- set in1 [string repeat " " [expr {$indent+2}]]
- foreach { type block } $css {
- switch $type {
- ruleset {
- lassign $block name rules
- puts "$in0[join $name ", "] \{ "
- foreach rule $rules {
- lassign $rule name value
- puts "$in1$name: $value;"
- }
- puts "$in0\}"
- }
- media {
- lassign $block type rules
- puts "$in0@media $type \{"
- ppcss $rules 2
- puts "$in0\}"
- }
- }
- }
- }
- Scanner ::css::scanner
- Parser ::css::parser -scanner ::css::scanner
- proc parse {text} {
- scanner start $text
- parser reset
- return [parser parse]
- }
- namespace export -clear *
- namespace ensemble create -subcommands {}
- }
- #
- # And, the test code..
- #
- if {[info exists argv0] && [file normalize $argv0] eq [file normalize [info script]]} {
- #
- # .. this is a simple pretty-printer for lists
- #
- proc pplist { l { indent 0 } } {
- if { !$indent } {
- puts "\{"
- pplist $l [expr {$indent+1}]
- puts "\}"
- } else {
- foreach v $l {
- set i [string repeat " " $indent]
- if { [llength $v] <= 1 } {
- puts "$i$v"
- } else {
- puts "$i\{"
- pplist $v [expr {$indent+1}]
- puts "$i\}"
- }
- }
- }
- }
- if {[llength $argv] == 0} {
- puts "Usage: $argv0 (demo|dump|parse)"
- puts ""
- puts "The \"demo\" mode runs the parser on a small test-case. The \"dump\""
- puts "mode dumps out the lexer and scanner code. The \"parse\" mode runs"
- puts "the parser on stdin."
- } else {
- switch [lindex $argv 0] {
- demo {
- #
- # Demo mode - use a simple built-in test case
- #
- set demo {
- @charset "utf-8";
- body, h1 {
- margin: 0px;
- font-family: "URW Palladio L", Palatino, "Times New Roman", Times, serif;
- color: #000;
- }
- input {
- background-color: #111;
- filter:alpha(opacity=40);
- }
- .button:moop, .button:hover {
- border: solid thin red !important;
- }
- .button img { float: left ; }
- .button img.nopad { padding: 0px; }
- }
- #
- # Run the scanner
- #
- puts [::css parse $demo]
- }
- dump {
- #
- # Dump the scanner and lexer code
- #
- puts [::css::Scanner dump]
- puts [::css::Parser dump]
- }
- parse {
- #
- # Parse on stdin
- #
- while { ![eof stdin] } {
- append demo [gets stdin]
- }
- #
- # Run the scanner
- #
- pplist [css parse $demo]
- }
- }
- }
- }