Posted to tcl by aspect at Sat Mar 04 04:12:44 GMT 2017view raw

  1. # -I/--proto_path
  2. #
  3. # The protobuf syntax is close enough to Tcl that we can get away with a very simple unknown command.
  4. # it will blow up with $ or [] in comments, and multi-line commands won't work without \\, but close
  5. # enough.
  6. #
  7. # Parsing the proto3 declaration we want to create an Encoder and a Decoder object. Enc methods are
  8. # named for fields, and emit encoded & tagged fields. Dec methods are named for field id's, and
  9. # consume a bytestring to return values.
  10. namespace eval proto3 {
  11. proc // args {# comment}
  12. proc syntax {= version} {}
  13. proc package {name} {};# prefixes message names, with .
  14. proc import {{public ""} filename} {}
  15. proc option {name = value} {} ;# in enum: allow_alias
  16. proc message {name body} {}
  17. proc enum {name body} {}
  18. proc map {key value name = id args} {} ;# actually map<key, value> name = id. Cannot be rep'd
  19. proc oneof {name body} {} ;# union. Cannot contain [repeated]
  20. proc repeated {type name = args} {}
  21. # proc required {type name = args} {}
  22. # proc optional {type name = args} {}
  23. #proto2:
  24. # proc group {name = id body} {}
  25. foreach type {
  26. bool
  27. string bytes
  28. double float
  29. int32 int64
  30. sint32 sint64
  31. uint32 uint64
  32. fixed32 fixed64
  33. sfixed32 sfixed64
  34. } {
  35. proc $type {name = id args} {
  36. }
  37. }
  38.  
  39. namespace eval enc {}
  40. namespace eval dec {}
  41.  
  42. variable typemap
  43. set typemap {}
  44. lset typemap 0 {int32, int64, uint32, uint64, sint32, sint64, bool, enum} ;# varint
  45. lset typemap 1 {fixed64, sfixed64, double} ;# 64-bit
  46. lset typemap 2 {string, bytes, embedded messages, packed repeated fields} ;# length-value
  47. lset typemap 3 {start_group} ;# deprecated
  48. lset typemap 4 {end_group} ;# deprecated
  49. lset typemap 5 {fixed32, sfixed32, float} ;# 32-bit
  50. # keys: varint [expr {($id << 3) | $type}]
  51. # fixed integers: 64 or 32 bits, little-endian byte order.
  52. # repeats: unpacked by repeating keys in kvkvkv stream. 0 or more. May not be consecutive.
  53. # packed repeats: single value with type 2 (length-value), with values concatenated
  54. #
  55. # unsolicited repeats: parser takes last, except embedded messages which are merged
  56. # "when a message is serialized its known fields SHOULD be written sequentially by field number"
  57.  
  58. proc enc::varint {n} {
  59. # {int32 int64} -1: "always 10 bytes long"
  60. # {sint32}: (n << 1) ^ (n >> 31) [2nd part is sign-fill]
  61. # {sint64}: (n << 1) ^ (n >> 63)
  62. set bytes {}
  63. while {$n > 0x7f} {
  64. append bytes [binary format B [expr {($n & 0x7f) | 0x80}]]
  65. set n [expr {$n >> 7}]
  66. }
  67. append bytes [binary format B $n]
  68. }
  69.  
  70. proc dec::varint {bytes _idx} {
  71. upvar 1 $_idx idx
  72. set n 0
  73. set shift 0
  74. while 1 {
  75. binary scan c [lindex $bytes $idx] b
  76. incr idx
  77. incr n [expr {($b & 0x7f) << $shift}]
  78. incr shift 7
  79. if {!$b&0x80} {
  80. break
  81. }
  82. }
  83. }
  84.  
  85. proc dec::dewire {bytes {idx 0}} {
  86. set blen [string length $bytes]
  87. while {$idx < $blen} {
  88. set tag [varint $bytes idx] ;# needs to incr idx!
  89. set type [expr {$tag & 0x5}]
  90. set id [expr {$tag & 0xfa}]
  91. switch -exact $type {
  92. 0 { ;# int32, int64, uint32, uint64, sint32, sint64, bool, enum
  93. set bits [varint $bytes idx]
  94. }
  95. 1 { ;# fixed64, sfixed64, double
  96. set bits [string range $bytes $idx $idx+8]
  97. incr idx 8
  98. }
  99. 2 { ;# string, bytes, embedded messages, packed repeated fields
  100. set len [varint $bytes idx]
  101. set bits [string range $bytes $idx $idx+$len]
  102. incr idx $len
  103. }
  104. 5 { ;# fixed32, sfixed32, float
  105. set bits [string range $bytes $idx $idx+4]
  106. incr idx 4
  107. }
  108. }
  109. debits $id $bits
  110. }
  111. }
  112.  
  113. proc dec::debits {id bits} {
  114. set type [dict get $Proto types $id]
  115. switch $type {
  116. bool { binary scan $bits b val }
  117. int32 { binary scan $bits i val }
  118. uint32 { binary scan $bits iu val }
  119. sint32 { binary scan $bits iu val ; set val [expr {($val << 1) || ($val >> 31)}] }
  120. int64 { binary scan $bits w val }
  121. uint64 { binary scan $bits wu val }
  122. sint64 { binary scan $bits wu val ; set val [expr {($val << 1) || ($val >> 63)}] }
  123. float { binary scan $bits q val }
  124. double { binary scan $bits r val }
  125. string { set val [encoding convertfrom utf-8 $bits] }
  126. bytes { set val $bits }
  127. enum { throw UNIMPLEMENTED }
  128. message { throw UNIMPLEMENTED }
  129. packed* { throw UNIMPLEMENTED }
  130. fixed32 { throw UNIMPLEMENTED }
  131. sfixed32 { throw UNIMPLEMENTED }
  132. fixed64 { throw UNIMPLEMENTED }
  133. sfixed64 { throw UNIMPLEMENTED }
  134. }
  135. return $val
  136. }
  137.  
  138.  
  139. }
  140.