Posted to tcl by colin at Tue Feb 19 07:14:08 GMT 2013view raw

  1. #!/usr/bin/env tclsh
  2. #lappend ::auto_path [pwd]
  3. package require itcl
  4. package require ylex
  5. package require yeti
  6.  
  7. #
  8. # Set up the macros and tokenizing rules for CSS
  9. #
  10. # http://www.w3.org/TR/CSS21/grammar.html
  11. #
  12. namespace eval css {
  13.  
  14. proc MkScanner {} {
  15. variable sg [yeti::ylex #auto -name ::css::Scanner]
  16. $sg configure -case 0
  17.  
  18. $sg macro h {[0-9a-f]}
  19. $sg macro nl {\n|\r\n|\r|\f}
  20. $sg macro s {[ \t\r\n\f]+}
  21. $sg macro w {<s>?}
  22. $sg macro nonascii {[\240-\377]}
  23. $sg macro unicode {\\<h>{1,6}(\r\n|[ \t\r\n\f])?}
  24. $sg macro escape {<unicode>|\\[^\r\n\f0-9a-f]}
  25. $sg macro nmstart {[_a-z]|<nonascii>|<escape>}
  26. $sg macro nmchar {[_a-z0-9-]|<nonascii>|<escape>}
  27. $sg macro ident {-?<nmstart><nmchar>*}
  28. $sg macro string1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\"}
  29. $sg macro string2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\'}
  30. $sg macro string {<string1>|<string2>}
  31. $sg macro badstring1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\\?}
  32. $sg macro badstring2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\\?}
  33. $sg macro badstring {<badstring1>|<badstring2>}
  34. $sg macro badcomment1 {\/\*[^*]*\*+([^/*][^*]*\*+)*}
  35. $sg macro badcomment2 {\/\*[^*]*(\*+[^/*][^*]*)*}
  36. $sg macro badcomment {<badcomment1>|<badcomment2>}
  37. $sg macro baduri1 {url\(<w>([!#$%&*-\[\]-~]|<nonascii>|<escape>)*<w>}
  38. $sg macro baduri2 {url\(<w><string><w>}
  39. $sg macro baduri3 {url\(<w><badstring>}
  40. $sg macro baduri {<baduri1>|<baduri2>|<baduri3>}
  41. $sg macro comment {\/\*[^*]*\*+([^/*][^*]*\*+)*\/}
  42. $sg macro name {<nmchar>+}
  43. $sg macro num {[0-9]+|[0-9]*\.[0-9]+}
  44. $sg macro url {([!#$%&*-~]|<nonascii>|<escape>)*}
  45. $sg macro meta {[$]<ident>}
  46.  
  47. $sg macro A {a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?}
  48. $sg macro C {c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?}
  49. $sg macro D {d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?}
  50. $sg macro E {e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?}
  51. $sg macro G {g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g}
  52. $sg macro H {h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h}
  53. $sg macro I {i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i}
  54. $sg macro K {k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k}
  55. $sg macro L {l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l}
  56. $sg macro M {m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m}
  57. $sg macro N {n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n}
  58. $sg macro O {o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o}
  59. $sg macro P {p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p}
  60. $sg macro R {r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r}
  61. $sg macro S {s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s}
  62. $sg macro T {t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t}
  63. $sg macro U {u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\u}
  64. $sg macro X {x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x}
  65. $sg macro Z {z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z}
  66.  
  67. $sg add {<s>} {return S;}
  68.  
  69. $sg add {\/\*[^*]*\*+([^/*][^*]*\*+)*\/} {}; # ignore comments
  70. $sg add {<badcomment>} {}; # unclosed comment at EOF
  71.  
  72. $sg add {<!--} { return CDO }
  73. $sg add -- {-->} { return CDC }
  74. $sg add {~=} { return INCLUDES }
  75. $sg add {\|=} { return DASHMATCH }
  76.  
  77. $sg add {<string>} { return [list STRING $yytext] }
  78. $sg add {<badstring>} { return BAD_STRING }
  79.  
  80. $sg add {<ident>} { return [list IDENT $yytext] }
  81.  
  82. $sg add {\#<name>} { return [list HASH $yytext] }
  83.  
  84. $sg add {@<I><M><P><O><R><T>} { return IMPORT_SYM }
  85. $sg add {@<P><A><G><E>} { return PAGE_SYM }
  86. $sg add {@<M><E><D><I><A>} { return MEDIA_SYM }
  87. $sg add {@charset } { return CHARSET_SYM }
  88.  
  89. $sg add {!(<w>|<comment>)*<I><M><P><O><R><T><A><N><T>} { return [list IMPORTANT_SYM $yytext] }
  90.  
  91. $sg add {<num><E><M>} { return [list EMS $yytext] }
  92. $sg add {<num><E><X>} { return [list EXS $yytext] }
  93. $sg add {<num><P><X>} { return [list LENGTH $yytext] }
  94. $sg add {<num><C><M>} { return [list LENGTH $yytext] }
  95. $sg add {<num><M><M>} { return [list LENGTH $yytext] }
  96. $sg add {<num><I><N>} { return [list LENGTH $yytext] }
  97. $sg add {<num><P><T>} { return [list LENGTH $yytext] }
  98. $sg add {<num><P><C>} { return [list LENGTH $yytext] }
  99. $sg add {<num><D><E><G>} { return [list ANGLE $yytext] }
  100. $sg add {<num><R><A><D>} { return [list ANGLE $yytext] }
  101. $sg add {<num><G><R><A><D>} { return [list ANGLE $yytext] }
  102. $sg add {<num><M><S>} { return [list TIME $yytext] }
  103. $sg add {<num><S>} { return [list TIME $yytext] }
  104. $sg add {<num><H><Z>} { return [list FREQ $yytext] }
  105. $sg add {<num><K><H><Z>} { return [list FREQ $yytext] }
  106. $sg add {<num><ident>} { return [list DIMENSION $yytext] }
  107.  
  108. $sg add {<num>%} { return [list PERCENTAGE $yytext] }
  109. $sg add {<num>} { return [list NUMBER $yytext] }
  110.  
  111. $sg add {url\(<w><string><w>\)} { return [list URI $yytext] }
  112. $sg add {url\(<w><url><w>\)} { return [list URI $yytext] }
  113. $sg add {<baduri>} { return BAD_URI }
  114.  
  115. $sg add {(<ident>)\(} { return [list FUNCTION $1] }
  116.  
  117. $sg add {\{} { return LBRACE }
  118. $sg add {\}} { return RBRACE }
  119. $sg add {\(} { return LPAREN }
  120. $sg add {\)} { return RPAREN }
  121. $sg add {\[} { return LBRACKET }
  122. $sg add {\]} { return RBRACKET }
  123. $sg add {\*} { return STAR }
  124. $sg add {;+} { return ";" }
  125.  
  126. $sg add {.} { return $yytext }
  127. }
  128.  
  129. proc MkParser {} {
  130. #
  131. # Set up the parsing rules
  132. #
  133. variable pg [yeti::yeti #auto -name ::css::Parser]
  134. $pg add start { stylesheet } { return $1 }
  135.  
  136.  
  137. # stylesheet
  138. # : [ CHARSET_SYM STRING ';' ]?
  139. # [S|CDO|CDC]* [ import [ CDO S* | CDC S* ]* ]*
  140. # [ [ ruleset | media | page ] [ CDO S* | CDC S* ]* ]*
  141. # ;
  142. $pg add {
  143. stylesheet { CHARSET_SYM STRING ; stylesheet } { return [concat [list charset $2] $4] }
  144. | { ruleset stylesheet } { return [concat [list ruleset $1] $2] }
  145. | { import stylesheet } { return [concat [list import $1] $2] }
  146. | { media stylesheet } { return [list media $1 $2] }
  147. | { page stylesheet } { return [concat [list page $1] $2] }
  148. | { S* stylesheet } { return $2 }
  149. | {} -
  150. }
  151.  
  152. # import
  153. # : IMPORT_SYM S*
  154. # [STRING|URI] S* media_list? ';' S*
  155. # ;
  156. $pg add {
  157. import { IMPORT_SYM S* STRING S* media_list ; S* } { return [list $3 $5] }
  158. | { IMPORT_SYM S* STRING S* ; S* } { return $3 }
  159. | { IMPORT_SYM S* URI S* media_list ; S* } { return [list $3 $5] }
  160. | { IMPORT_SYM S* URI S* ; S* } { return $3 }
  161. }
  162.  
  163. # media
  164. # : MEDIA_SYM S* media_list '{' S* ruleset* '}' S*
  165. # ;
  166. $pg add media { MEDIA_SYM S* media_list LBRACE S* ruleset_seq RBRACE S* } { return [list $3 $6] }
  167.  
  168. $pg add {
  169. ruleset_seq { ruleset ruleset_seq } { return [concat [list ruleset $1] $2] }
  170. | { ruleset } { return $1 }
  171. | {} -
  172. }
  173.  
  174. # media_list
  175. # : medium [ COMMA S* medium]*
  176. # ;
  177. $pg add {
  178. media_list { medium , S* media_list } { return [concat $1 {*}$4] }
  179. | { medium } { return $1 }
  180. }
  181.  
  182. # medium
  183. # : IDENT S*
  184. # ;
  185. $pg add medium { IDENT S* } { return $1 }
  186.  
  187. # page
  188. # : PAGE_SYM S* pseudo_page?
  189. # '{' S* declaration? [ ';' S* declaration? ]* '}' S*
  190. # ;
  191. $pg add {
  192. page { PAGE_SYM S* pseudo_page LBRACE S* declaration_seq RBRACE S* } { return [list $3 $6] }
  193. | { PAGE_SYM S* LBRACE S* declaration_seq RBRACE S* } { return [list $2 $5] }
  194. }
  195.  
  196. # pseudo_page
  197. # : ':' IDENT S*
  198. # ;
  199. $pg add pseudo_page { : IDENT S* } { return :$2 }
  200.  
  201. # operator
  202. # : '/' S* | ',' S*
  203. # ;
  204. $pg add {
  205. operator { / S* } { return "/" }
  206. | { , S* } { return "," }
  207. | { = S* } { return "=" }
  208. }
  209.  
  210. # combinator
  211. # : '+' S*
  212. # | '>' S*
  213. # ;
  214. $pg add {
  215. combinator { + S* } { return "+" }
  216. | { > S* } { return ">" }
  217. }
  218.  
  219. # unary_operator
  220. # : '-' | '+'
  221. # ;
  222. $pg add {
  223. unary_operator { - } { return "-" }
  224. | { + } { return "+" }
  225. }
  226.  
  227. # property
  228. # : IDENT S*
  229. # ;
  230. $pg add property { IDENT S* } { return $1 }
  231.  
  232. # ruleset
  233. # : selector [ ',' S* selector ]*
  234. # '{' S* declaration? [ ';' S* declaration? ]* '}' S*
  235. # ;
  236. $pg add ruleset { selector_seq S* LBRACE S* declaration_seq RBRACE S* } { return [list $1 $5] }
  237.  
  238. $pg add {
  239. selector_seq { selector , S* selector_seq } { return [list $1 {*}$4] }
  240. | { selector } { return [list $1] }
  241. | {} -
  242. }
  243. $pg add {
  244. declaration_seq { declaration ; S* declaration_seq } { return [list $1 {*}$4] }
  245. | { declaration } { return $1 }
  246. | {} -
  247. }
  248.  
  249.  
  250. # selector
  251. # : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
  252. # ;
  253. $pg add {
  254. selector { simple_selector } { return $1 }
  255. | { simple_selector combinator selector } { return [concat $1 $2 {*}$3] }
  256. | { simple_selector S S* combinator selector } { return [concat $1 $4 {*}$5] }
  257. | { simple_selector S S* selector } { return [concat $1 {*}$4] }
  258. | {} -
  259. }
  260.  
  261. # simple_selector
  262. # : element_name [ HASH | class | attrib | pseudo ]*
  263. # | [ HASH | class | attrib | pseudo ]+
  264. # ;
  265. $pg add {
  266. simple_selector { element_name selector_sub } { return [join [list $1 {*}$2] ""] }
  267. | { element_name } { return $1 }
  268. | { selector_sub } { return $1 }
  269. }
  270. $pg add {
  271. selector_sub { HASH selector_sub } { return [concat $1 {*}$2] }
  272. | { class selector_sub } { return [concat $1 {*}$2] }
  273. | { attrib selector_sub } { return [concat $1 {*}$2] }
  274. | { pseudo selector_sub } { return [concat $1 {*}$2] }
  275. | { HASH } { return $1 }
  276. | { class } { return $1 }
  277. | { attrib } { return $1 }
  278. | { pseudo } { return $1 }
  279. }
  280.  
  281. # class
  282. # : '.' IDENT
  283. # ;
  284. $pg add class { . IDENT } { return .$2 }
  285.  
  286. # element_name
  287. # : IDENT | '*'
  288. # ;
  289. $pg add {
  290. element_name { IDENT } { return $1 }
  291. | { STAR } { return $1 }
  292. }
  293.  
  294. # attrib
  295. # : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
  296. # [ IDENT | STRING ] S* ]? ']'
  297. # ;
  298. $pg add {
  299. attrib { LBRACKET S* IDENT S* RBRACKET } { return $3 }
  300. | { LBRACKET S* IDENT S* = S* IDENT S* RBRACKET } { return [list $3 = $7] }
  301. | { LBRACKET S* IDENT S* = S* STRING S* RBRACKET } { return [list $3 = $7] }
  302. | { LBRACKET S* IDENT S* INCLUDES S* IDENT S* RBRACKET } { return [list $3 $5 $7] }
  303. | { LBRACKET S* IDENT S* INCLUDES S* STRING S* RBRACKET } { return [list $3 $5 $7] }
  304. | { LBRACKET S* IDENT S* DASHMATCH S* IDENT S* RBRACKET } { return [list $3 $5 $7] }
  305. | { LBRACKET S* IDENT S* DASHMATCH S* STRING S* RBRACKET } { return [list $3 $5 $7] }
  306. }
  307.  
  308. # pseudo
  309. # : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
  310. # ;
  311. $pg add {
  312. pseudo { : IDENT } { return :$2 }
  313. | { : FUNCTION S* RPAREN } { return :$2 }
  314. | { : FUNCTION S* IDENT S* RPARENT } { return [list :$2 $4] }
  315. }
  316.  
  317. # declaration
  318. # : property ':' S* expr prio?
  319. # ;
  320. $pg add {
  321. declaration { property : S* expr } { return [list $1 $4] }
  322. | { property : S* expr prio } { return [list $1 $4 $5] }
  323. }
  324.  
  325. # prio
  326. # : IMPORTANT_SYM S*
  327. # ;
  328. $pg add prio { IMPORTANT_SYM S* } { return $1 }
  329.  
  330. # expr
  331. # : term [ operator? term ]*
  332. # ;
  333. $pg add {
  334. expr { term } { return $1 }
  335. | { term expr } { return [list $1 {*}$2] }
  336. | { term operator expr } { return [list $1 $2 {*}$3] }
  337. }
  338.  
  339. # term
  340. # : unary_operator?
  341. # [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
  342. # TIME S* | FREQ S* ]
  343. # | STRING S* | IDENT S* | URI S* | hexcolor | function
  344. # ;
  345. $pg add {
  346. term { unary_operator term_numeric } { return $1$2 }
  347. | { term_numeric } { return $1 }
  348. | { STRING S* } { return $1 }
  349. | { IDENT S* } { return $1 }
  350. | { URI S* } { return $1 }
  351. | { hexcolor } { return $1 }
  352. | { function } { return $1 }
  353. }
  354.  
  355. $pg add {
  356. term_numeric { NUMBER S* } { return $1 }
  357. | { PERCENTAGE S* } { return $1 }
  358. | { LENGTH S* } { return $1 }
  359. | { EMS S* } { return $1 }
  360. | { EXS S* } { return $1 }
  361. | { ANGLE S* } { return $1 }
  362. | { TIME S* } { return $1 }
  363. | { FREQ S* } { return $1 }
  364. }
  365.  
  366. # function
  367. # : FUNCTION S* expr ')' S*
  368. # ;
  369. $pg add function { FUNCTION S* expr RPAREN S* } { return [list $1 $3] }
  370.  
  371. # /*
  372. # * There is a constraint on the color that it must
  373. # * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F])
  374. # * after the "#"; e.g., "#000" is OK, but "#abcd" is not.
  375. # */
  376. # hexcolor
  377. # : HASH S*
  378. # ;
  379. $pg add hexcolor { HASH S* } { return $1 }
  380.  
  381. $pg add {
  382. S* { S S* } { return S }
  383. | { S } { return S }
  384. | {} {}
  385. }
  386. #$pg configure -verbose 3
  387. }
  388.  
  389. proc Timer {} {
  390. lappend result MkScanner [time {MkScanner; ::css::Scanner destroy}]
  391. lappend result MkParser [time {MkParser; ::css::Parser destroy}]
  392. variable sg; lappend result SG [time {eval [$sg dump]}]
  393. variable pg; lappend result PG [time {eval [$pg dump]}]
  394. return $result
  395. }
  396.  
  397. #
  398. # Now, build and eval the scanner and parser
  399. #
  400. MkScanner; eval [$sg dump]
  401. MkParser; eval [$pg dump]
  402.  
  403. #
  404. # identity - turn parse tree into CSS
  405. #
  406. proc identity { css { indent 0 } } {
  407. set in0 [string repeat " " $indent]
  408. set in1 [string repeat " " [expr {$indent+2}]]
  409.  
  410. foreach { type block } $css {
  411.  
  412. switch $type {
  413. ruleset {
  414. lassign $block name rules
  415. puts "$in0[join $name ", "] \{ "
  416. foreach rule $rules {
  417. lassign $rule name value
  418. puts "$in1$name: $value;"
  419. }
  420. puts "$in0\}"
  421. }
  422. media {
  423. lassign $block type rules
  424. puts "$in0@media $type \{"
  425. ppcss $rules 2
  426. puts "$in0\}"
  427. }
  428.  
  429. }
  430.  
  431. }
  432. }
  433. Scanner ::css::scanner
  434. Parser ::css::parser -scanner ::css::scanner
  435.  
  436. proc parse {text} {
  437. scanner start $text
  438. parser reset
  439. return [parser parse]
  440. }
  441.  
  442. namespace export -clear *
  443. namespace ensemble create -subcommands {}
  444. }
  445.  
  446. #
  447. # And, the test code..
  448. #
  449. if {[info exists argv0] && [file normalize $argv0] eq [file normalize [info script]]} {
  450. #
  451. # .. this is a simple pretty-printer for lists
  452. #
  453. proc pplist { l { indent 0 } } {
  454.  
  455. if { !$indent } {
  456. puts "\{"
  457. pplist $l [expr {$indent+1}]
  458. puts "\}"
  459. } else {
  460.  
  461. foreach v $l {
  462.  
  463. set i [string repeat " " $indent]
  464.  
  465. if { [llength $v] <= 1 } {
  466. puts "$i$v"
  467. } else {
  468. puts "$i\{"
  469. pplist $v [expr {$indent+1}]
  470. puts "$i\}"
  471. }
  472.  
  473. }
  474.  
  475. }
  476.  
  477. }
  478.  
  479. if {[llength $argv] == 0} {
  480.  
  481. puts "Usage: $argv0 (demo|dump|parse)"
  482. puts ""
  483. puts "The \"demo\" mode runs the parser on a small test-case. The \"dump\""
  484. puts "mode dumps out the lexer and scanner code. The \"parse\" mode runs"
  485. puts "the parser on stdin."
  486.  
  487. } else {
  488.  
  489. set demo {
  490. @charset "utf-8";
  491. body, h1 {
  492. margin: 0px;
  493. font-family: "URW Palladio L", Palatino, "Times New Roman", Times, serif;
  494. color: #000;
  495. }
  496. input {
  497. background-color: #111;
  498. filter:alpha(opacity=40);
  499. }
  500. .button:moop, .button:hover {
  501. border: solid thin red !important;
  502. }
  503. .button img { float: left ; }
  504. .button img.nopad { padding: 0px; }
  505. }
  506.  
  507. switch [lindex $argv 0] {
  508. demo {
  509.  
  510. #
  511. # Demo mode - use a simple built-in test case
  512. #
  513. #
  514. # Run the scanner
  515. #
  516. puts [::css parse $demo]
  517.  
  518. }
  519.  
  520. time {
  521. puts PARSE:[time {::css parse $demo}]
  522. puts Scanner:[time {::css::Scanner #auto}]
  523. puts Parser:[time {::css::Parser #auto -scanner ::css::scanner}]
  524. puts [::css::Timer]
  525. }
  526.  
  527. dump {
  528. #
  529. # Dump the scanner and lexer code
  530. #
  531. puts [::css::Scanner dump]
  532. puts [::css::Parser dump]
  533. }
  534.  
  535. parse {
  536. #
  537. # Parse on stdin
  538. #
  539. while { ![eof stdin] } {
  540. append demo [gets stdin]
  541. }
  542.  
  543. #
  544. # Run the scanner
  545. #
  546. pplist [css parse $demo]
  547. }
  548.  
  549. }
  550.  
  551. }
  552. }