Posted to tcl by colin at Sun Feb 17 22:10:33 GMT 2013view raw

  1. #!/usr/bin/env tclsh
  2. package require itcl
  3. package require ylex
  4. package require yeti
  5.  
  6. #
  7. # Set up the macros and tokenizing rules for CSS
  8. #
  9. # http://www.w3.org/TR/CSS21/grammar.html
  10. #
  11. namespace eval css {
  12.  
  13. variable sg [yeti::ylex #auto -name ::css::Scanner]
  14. $sg configure -case 0
  15.  
  16. $sg macro h {[0-9a-f]}
  17. $sg macro nl {\n|\r\n|\r|\f}
  18. $sg macro s {[ \t\r\n\f]+}
  19. $sg macro w {<s>?}
  20. $sg macro nonascii {[\240-\377]}
  21. $sg macro unicode {\\<h>{1,6}(\r\n|[ \t\r\n\f])?}
  22. $sg macro escape {<unicode>|\\[^\r\n\f0-9a-f]}
  23. $sg macro nmstart {[_a-z]|<nonascii>|<escape>}
  24. $sg macro nmchar {[_a-z0-9-]|<nonascii>|<escape>}
  25. $sg macro string1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\"}
  26. $sg macro string2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\'}
  27. $sg macro string {<string1>|<string2>}
  28. $sg macro badstring1 {\"([^\n\r\f\\"]|\\<nl>|<escape>)*\\?}
  29. $sg macro badstring2 {\'([^\n\r\f\\']|\\<nl>|<escape>)*\\?}
  30. $sg macro badstring {<badstring1>|<badstring2>}
  31. $sg macro badcomment1 {\/\*[^*]*\*+([^/*][^*]*\*+)*}
  32. $sg macro badcomment2 {\/\*[^*]*(\*+[^/*][^*]*)*}
  33. $sg macro badcomment {<badcomment1>|<badcomment2>}
  34. $sg macro baduri1 {url\(<w>([!#$%&*-\[\]-~]|<nonascii>|<escape>)*<w>}
  35. $sg macro baduri2 {url\(<w><string><w>}
  36. $sg macro baduri3 {url\(<w><badstring>}
  37. $sg macro baduri {<baduri1>|<baduri2>|<baduri3>}
  38. $sg macro comment {\/\*[^*]*\*+([^/*][^*]*\*+)*\/}
  39. $sg macro ident {-?<nmstart><nmchar>*}
  40. $sg macro name {<nmchar>+}
  41. $sg macro num {[0-9]+|[0-9]*\.[0-9]+}
  42. $sg macro url {([!#$%&*-~]|<nonascii>|<escape>)*}
  43.  
  44. $sg macro A {a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?}
  45. $sg macro C {c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?}
  46. $sg macro D {d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?}
  47. $sg macro E {e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?}
  48. $sg macro G {g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g}
  49. $sg macro H {h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h}
  50. $sg macro I {i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i}
  51. $sg macro K {k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k}
  52. $sg macro L {l|\\0{0,4}(4c|6c)(\r\n|[ \t\r\n\f])?|\\l}
  53. $sg macro M {m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m}
  54. $sg macro N {n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n}
  55. $sg macro O {o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o}
  56. $sg macro P {p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p}
  57. $sg macro R {r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r}
  58. $sg macro S {s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s}
  59. $sg macro T {t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t}
  60. $sg macro U {u|\\0{0,4}(55|75)(\r\n|[ \t\r\n\f])?|\\u}
  61. $sg macro X {x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x}
  62. $sg macro Z {z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z}
  63.  
  64. $sg add {<s>} {return S;}
  65.  
  66. $sg add {\/\*[^*]*\*+([^/*][^*]*\*+)*\/} {}; # ignore comments
  67. $sg add {<badcomment>} {}; # unclosed comment at EOF
  68.  
  69. $sg add {<!--} { return CDO }
  70. $sg add -- {-->} { return CDC }
  71. $sg add {~=} { return INCLUDES }
  72. $sg add {\|=} { return DASHMATCH }
  73.  
  74. $sg add {<string>} { return [list STRING $yytext] }
  75. $sg add {<badstring>} { return BAD_STRING }
  76.  
  77. $sg add {<ident>} { return [list IDENT $yytext] }
  78.  
  79. $sg add {\#<name>} { return [list HASH $yytext] }
  80.  
  81. $sg add {@<I><M><P><O><R><T>} { return IMPORT_SYM }
  82. $sg add {@<P><A><G><E>} { return PAGE_SYM }
  83. $sg add {@<M><E><D><I><A>} { return MEDIA_SYM }
  84. $sg add {@charset } { return CHARSET_SYM }
  85.  
  86. $sg add {!(<w>|<comment>)*<I><M><P><O><R><T><A><N><T>} { return [list IMPORTANT_SYM $yytext] }
  87.  
  88. $sg add {<num><E><M>} { return [list EMS $yytext] }
  89. $sg add {<num><E><X>} { return [list EXS $yytext] }
  90. $sg add {<num><P><X>} { return [list LENGTH $yytext] }
  91. $sg add {<num><C><M>} { return [list LENGTH $yytext] }
  92. $sg add {<num><M><M>} { return [list LENGTH $yytext] }
  93. $sg add {<num><I><N>} { return [list LENGTH $yytext] }
  94. $sg add {<num><P><T>} { return [list LENGTH $yytext] }
  95. $sg add {<num><P><C>} { return [list LENGTH $yytext] }
  96. $sg add {<num><D><E><G>} { return [list ANGLE $yytext] }
  97. $sg add {<num><R><A><D>} { return [list ANGLE $yytext] }
  98. $sg add {<num><G><R><A><D>} { return [list ANGLE $yytext] }
  99. $sg add {<num><M><S>} { return [list TIME $yytext] }
  100. $sg add {<num><S>} { return [list TIME $yytext] }
  101. $sg add {<num><H><Z>} { return [list FREQ $yytext] }
  102. $sg add {<num><K><H><Z>} { return [list FREQ $yytext] }
  103. $sg add {<num><ident>} { return [list DIMENSION $yytext] }
  104.  
  105. $sg add {<num>%} { return [list PERCENTAGE $yytext] }
  106. $sg add {<num>} { return [list NUMBER $yytext] }
  107.  
  108. $sg add {url\(<w><string><w>\)} { return [list URI $yytext] }
  109. $sg add {url\(<w><url><w>\)} { return [list URI $yytext] }
  110. $sg add {<baduri>} { return BAD_URI }
  111.  
  112. $sg add {(<ident>)\(} { return [list FUNCTION $1] }
  113.  
  114. $sg add {\{} { return LBRACE }
  115. $sg add {\}} { return RBRACE }
  116. $sg add {\(} { return LPAREN }
  117. $sg add {\)} { return RPAREN }
  118. $sg add {\[} { return LBRACKET }
  119. $sg add {\]} { return RBRACKET }
  120. $sg add {\*} { return STAR }
  121. $sg add {;+} { return ";" }
  122.  
  123. $sg add {.} { return $yytext }
  124.  
  125. #
  126. # Set up the parsing rules
  127. #
  128. variable pg [yeti::yeti #auto -name ::css::Parser]
  129. $pg add start { stylesheet } { return $1 }
  130.  
  131.  
  132. # stylesheet
  133. # : [ CHARSET_SYM STRING ';' ]?
  134. # [S|CDO|CDC]* [ import [ CDO S* | CDC S* ]* ]*
  135. # [ [ ruleset | media | page ] [ CDO S* | CDC S* ]* ]*
  136. # ;
  137. $pg add {
  138. stylesheet { CHARSET_SYM STRING ; stylesheet } { return [concat [list charset $2] $4] }
  139. | { ruleset stylesheet } { return [concat [list ruleset $1] $2] }
  140. | { import stylesheet } { return [concat [list import $1] $2] }
  141. | { media stylesheet } { return [list media $1 $2] }
  142. | { page stylesheet } { return [concat [list page $1] $2] }
  143. | { S* stylesheet } { return $2 }
  144. | {} -
  145. }
  146.  
  147. # import
  148. # : IMPORT_SYM S*
  149. # [STRING|URI] S* media_list? ';' S*
  150. # ;
  151. $pg add {
  152. import { IMPORT_SYM S* STRING S* media_list ; S* } { return [list $3 $5] }
  153. | { IMPORT_SYM S* STRING S* ; S* } { return $3 }
  154. | { IMPORT_SYM S* URI S* media_list ; S* } { return [list $3 $5] }
  155. | { IMPORT_SYM S* URI S* ; S* } { return $3 }
  156. }
  157.  
  158. # media
  159. # : MEDIA_SYM S* media_list '{' S* ruleset* '}' S*
  160. # ;
  161. $pg add media { MEDIA_SYM S* media_list LBRACE S* ruleset_seq RBRACE S* } { return [list $3 $6] }
  162.  
  163. $pg add {
  164. ruleset_seq { ruleset ruleset_seq } { return [concat [list ruleset $1] $2] }
  165. | { ruleset } { return $1 }
  166. | {} -
  167. }
  168.  
  169. # media_list
  170. # : medium [ COMMA S* medium]*
  171. # ;
  172. $pg add {
  173. media_list { medium , S* media_list } { return [concat $1 {*}$4] }
  174. | { medium } { return $1 }
  175. }
  176.  
  177. # medium
  178. # : IDENT S*
  179. # ;
  180. $pg add medium { IDENT S* } { return $1 }
  181.  
  182. # page
  183. # : PAGE_SYM S* pseudo_page?
  184. # '{' S* declaration? [ ';' S* declaration? ]* '}' S*
  185. # ;
  186. $pg add {
  187. page { PAGE_SYM S* pseudo_page LBRACE S* declaration_seq RBRACE S* } { return [list $3 $6] }
  188. | { PAGE_SYM S* LBRACE S* declaration_seq RBRACE S* } { return [list $2 $5] }
  189. }
  190.  
  191. # pseudo_page
  192. # : ':' IDENT S*
  193. # ;
  194. $pg add pseudo_page { : IDENT S* } { return :$2 }
  195.  
  196. # operator
  197. # : '/' S* | ',' S*
  198. # ;
  199. $pg add {
  200. operator { / S* } { return "/" }
  201. | { , S* } { return "," }
  202. | { = S* } { return "=" }
  203. }
  204.  
  205. # combinator
  206. # : '+' S*
  207. # | '>' S*
  208. # ;
  209. $pg add {
  210. combinator { + S* } { return "+" }
  211. | { > S* } { return ">" }
  212. }
  213.  
  214. # unary_operator
  215. # : '-' | '+'
  216. # ;
  217. $pg add {
  218. unary_operator { - } { return "-" }
  219. | { + } { return "+" }
  220. }
  221.  
  222. # property
  223. # : IDENT S*
  224. # ;
  225. $pg add property { IDENT S* } { return $1 }
  226.  
  227. # ruleset
  228. # : selector [ ',' S* selector ]*
  229. # '{' S* declaration? [ ';' S* declaration? ]* '}' S*
  230. # ;
  231. $pg add ruleset { selector_seq S* LBRACE S* declaration_seq RBRACE S* } { return [list $1 $5] }
  232.  
  233. $pg add {
  234. selector_seq { selector , S* selector_seq } { return [list $1 {*}$4] }
  235. | { selector } { return [list $1] }
  236. | {} -
  237. }
  238. $pg add {
  239. declaration_seq { declaration ; S* declaration_seq } { return [list $1 {*}$4] }
  240. | { declaration } { return $1 }
  241. | {} -
  242. }
  243.  
  244.  
  245. # selector
  246. # : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
  247. # ;
  248. $pg add {
  249. selector { simple_selector } { return $1 }
  250. | { simple_selector combinator selector } { return [concat $1 $2 {*}$3] }
  251. | { simple_selector S S* combinator selector } { return [concat $1 $4 {*}$5] }
  252. | { simple_selector S S* selector } { return [concat $1 {*}$4] }
  253. | {} -
  254. }
  255.  
  256. # simple_selector
  257. # : element_name [ HASH | class | attrib | pseudo ]*
  258. # | [ HASH | class | attrib | pseudo ]+
  259. # ;
  260. $pg add {
  261. simple_selector { element_name selector_sub } { return [join [list $1 {*}$2] ""] }
  262. | { element_name } { return $1 }
  263. | { selector_sub } { return $1 }
  264. }
  265. $pg add {
  266. selector_sub { HASH selector_sub } { return [concat $1 {*}$2] }
  267. | { class selector_sub } { return [concat $1 {*}$2] }
  268. | { attrib selector_sub } { return [concat $1 {*}$2] }
  269. | { pseudo selector_sub } { return [concat $1 {*}$2] }
  270. | { HASH } { return $1 }
  271. | { class } { return $1 }
  272. | { attrib } { return $1 }
  273. | { pseudo } { return $1 }
  274. }
  275.  
  276. # class
  277. # : '.' IDENT
  278. # ;
  279. $pg add class { . IDENT } { return .$2 }
  280.  
  281. # element_name
  282. # : IDENT | '*'
  283. # ;
  284. $pg add {
  285. element_name { IDENT } { return $1 }
  286. | { STAR } { return $1 }
  287. }
  288.  
  289. # attrib
  290. # : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
  291. # [ IDENT | STRING ] S* ]? ']'
  292. # ;
  293. $pg add {
  294. attrib { LBRACKET S* IDENT S* RBRACKET } { return $3 }
  295. | { LBRACKET S* IDENT S* = S* IDENT S* RBRACKET } { return [list $3 = $7] }
  296. | { LBRACKET S* IDENT S* = S* STRING S* RBRACKET } { return [list $3 = $7] }
  297. | { LBRACKET S* IDENT S* INCLUDES S* IDENT S* RBRACKET } { return [list $3 $5 $7] }
  298. | { LBRACKET S* IDENT S* INCLUDES S* STRING S* RBRACKET } { return [list $3 $5 $7] }
  299. | { LBRACKET S* IDENT S* DASHMATCH S* IDENT S* RBRACKET } { return [list $3 $5 $7] }
  300. | { LBRACKET S* IDENT S* DASHMATCH S* STRING S* RBRACKET } { return [list $3 $5 $7] }
  301. }
  302.  
  303. # pseudo
  304. # : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
  305. # ;
  306. $pg add {
  307. pseudo { : IDENT } { return :$2 }
  308. | { : FUNCTION S* RPAREN } { return :$2 }
  309. | { : FUNCTION S* IDENT S* RPARENT } { return [list :$2 $4] }
  310. }
  311.  
  312. # declaration
  313. # : property ':' S* expr prio?
  314. # ;
  315. $pg add {
  316. declaration { property : S* expr } { return [list $1 $4] }
  317. | { property : S* expr prio } { return [list $1 $4 $5] }
  318. }
  319.  
  320. # prio
  321. # : IMPORTANT_SYM S*
  322. # ;
  323. $pg add prio { IMPORTANT_SYM S* } { return $1 }
  324.  
  325. # expr
  326. # : term [ operator? term ]*
  327. # ;
  328. $pg add {
  329. expr { term } { return $1 }
  330. | { term expr } { return [list $1 {*}$2] }
  331. | { term operator expr } { return [list $1 $2 {*}$3] }
  332. }
  333.  
  334. # term
  335. # : unary_operator?
  336. # [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
  337. # TIME S* | FREQ S* ]
  338. # | STRING S* | IDENT S* | URI S* | hexcolor | function
  339. # ;
  340. $pg add {
  341. term { unary_operator term_numeric } { return $1$2 }
  342. | { term_numeric } { return $1 }
  343. | { STRING S* } { return $1 }
  344. | { IDENT S* } { return $1 }
  345. | { URI S* } { return $1 }
  346. | { hexcolor } { return $1 }
  347. | { function } { return $1 }
  348. }
  349.  
  350. $pg add {
  351. term_numeric { NUMBER S* } { return $1 }
  352. | { PERCENTAGE S* } { return $1 }
  353. | { LENGTH S* } { return $1 }
  354. | { EMS S* } { return $1 }
  355. | { EXS S* } { return $1 }
  356. | { ANGLE S* } { return $1 }
  357. | { TIME S* } { return $1 }
  358. | { FREQ S* } { return $1 }
  359. }
  360.  
  361. # function
  362. # : FUNCTION S* expr ')' S*
  363. # ;
  364. $pg add function { FUNCTION S* expr RPAREN S* } { return [list $1 $3] }
  365.  
  366. # /*
  367. # * There is a constraint on the color that it must
  368. # * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F])
  369. # * after the "#"; e.g., "#000" is OK, but "#abcd" is not.
  370. # */
  371. # hexcolor
  372. # : HASH S*
  373. # ;
  374. $pg add hexcolor { HASH S* } { return $1 }
  375.  
  376. $pg add {
  377. S* { S S* } { return S }
  378. | { S } { return S }
  379. | {} {}
  380. }
  381.  
  382.  
  383. #
  384. # Now, build and eval the scanner and parser
  385. #
  386. eval [$sg dump]
  387. eval [$pg dump]
  388.  
  389. #
  390. # identity - turn parse tree into CSS
  391. #
  392. proc identity { css { indent 0 } } {
  393. set in0 [string repeat " " $indent]
  394. set in1 [string repeat " " [expr {$indent+2}]]
  395.  
  396. foreach { type block } $css {
  397.  
  398. switch $type {
  399. ruleset {
  400. lassign $block name rules
  401. puts "$in0[join $name ", "] \{ "
  402. foreach rule $rules {
  403. lassign $rule name value
  404. puts "$in1$name: $value;"
  405. }
  406. puts "$in0\}"
  407. }
  408. media {
  409. lassign $block type rules
  410. puts "$in0@media $type \{"
  411. ppcss $rules 2
  412. puts "$in0\}"
  413. }
  414.  
  415. }
  416.  
  417. }
  418. }
  419. Scanner ::css::scanner
  420. Parser ::css::parser -scanner ::css::scanner
  421.  
  422. proc parse {text} {
  423. scanner start $text
  424. parser reset
  425. return [parser parse]
  426. }
  427.  
  428. namespace export -clear *
  429. namespace ensemble create -subcommands {}
  430. }
  431.  
  432. #
  433. # And, the test code..
  434. #
  435. if {[info exists argv0] && [file normalize $argv0] eq [file normalize [info script]]} {
  436. #
  437. # .. this is a simple pretty-printer for lists
  438. #
  439. proc pplist { l { indent 0 } } {
  440.  
  441. if { !$indent } {
  442. puts "\{"
  443. pplist $l [expr {$indent+1}]
  444. puts "\}"
  445. } else {
  446.  
  447. foreach v $l {
  448.  
  449. set i [string repeat " " $indent]
  450.  
  451. if { [llength $v] <= 1 } {
  452. puts "$i$v"
  453. } else {
  454. puts "$i\{"
  455. pplist $v [expr {$indent+1}]
  456. puts "$i\}"
  457. }
  458.  
  459. }
  460.  
  461. }
  462.  
  463. }
  464.  
  465. if {[llength $argv] == 0} {
  466.  
  467. puts "Usage: $argv0 (demo|dump|parse)"
  468. puts ""
  469. puts "The \"demo\" mode runs the parser on a small test-case. The \"dump\""
  470. puts "mode dumps out the lexer and scanner code. The \"parse\" mode runs"
  471. puts "the parser on stdin."
  472.  
  473. } else {
  474.  
  475. switch [lindex $argv 0] {
  476. demo {
  477.  
  478. #
  479. # Demo mode - use a simple built-in test case
  480. #
  481. set demo {
  482. @charset "utf-8";
  483. body, h1 {
  484. margin: 0px;
  485. font-family: "URW Palladio L", Palatino, "Times New Roman", Times, serif;
  486. color: #000;
  487. }
  488. input {
  489. background-color: #111;
  490. filter:alpha(opacity=40);
  491. }
  492. .button:moop, .button:hover {
  493. border: solid thin red !important;
  494. }
  495. .button img { float: left ; }
  496. .button img.nopad { padding: 0px; }
  497. }
  498.  
  499. #
  500. # Run the scanner
  501. #
  502. puts [::css parse $demo]
  503.  
  504. }
  505.  
  506. dump {
  507. #
  508. # Dump the scanner and lexer code
  509. #
  510. puts [::css::Scanner dump]
  511. puts [::css::Parser dump]
  512. }
  513.  
  514. parse {
  515.  
  516. #
  517. # Parse on stdin
  518. #
  519. while { ![eof stdin] } {
  520. append demo [gets stdin]
  521. }
  522.  
  523. #
  524. # Run the scanner
  525. #
  526. pplist [css parse $demo]
  527.  
  528. }
  529.  
  530. }
  531.  
  532. }
  533. }