Posted to tcl by aspect at Sat Mar 04 04:12:44 GMT 2017view raw
- # -I/--proto_path
- #
- # The protobuf syntax is close enough to Tcl that we can get away with a very simple unknown command.
- # it will blow up with $ or [] in comments, and multi-line commands won't work without \\, but close
- # enough.
- #
- # Parsing the proto3 declaration we want to create an Encoder and a Decoder object. Enc methods are
- # named for fields, and emit encoded & tagged fields. Dec methods are named for field id's, and
- # consume a bytestring to return values.
- namespace eval proto3 {
- proc // args {# comment}
- proc syntax {= version} {}
- proc package {name} {};# prefixes message names, with .
- proc import {{public ""} filename} {}
- proc option {name = value} {} ;# in enum: allow_alias
- proc message {name body} {}
- proc enum {name body} {}
- proc map {key value name = id args} {} ;# actually map<key, value> name = id. Cannot be rep'd
- proc oneof {name body} {} ;# union. Cannot contain [repeated]
- proc repeated {type name = args} {}
- # proc required {type name = args} {}
- # proc optional {type name = args} {}
- #proto2:
- # proc group {name = id body} {}
- foreach type {
- bool
- string bytes
- double float
- int32 int64
- sint32 sint64
- uint32 uint64
- fixed32 fixed64
- sfixed32 sfixed64
- } {
- proc $type {name = id args} {
- }
- }
- namespace eval enc {}
- namespace eval dec {}
- variable typemap
- set typemap {}
- lset typemap 0 {int32, int64, uint32, uint64, sint32, sint64, bool, enum} ;# varint
- lset typemap 1 {fixed64, sfixed64, double} ;# 64-bit
- lset typemap 2 {string, bytes, embedded messages, packed repeated fields} ;# length-value
- lset typemap 3 {start_group} ;# deprecated
- lset typemap 4 {end_group} ;# deprecated
- lset typemap 5 {fixed32, sfixed32, float} ;# 32-bit
- # keys: varint [expr {($id << 3) | $type}]
- # fixed integers: 64 or 32 bits, little-endian byte order.
- # repeats: unpacked by repeating keys in kvkvkv stream. 0 or more. May not be consecutive.
- # packed repeats: single value with type 2 (length-value), with values concatenated
- #
- # unsolicited repeats: parser takes last, except embedded messages which are merged
- # "when a message is serialized its known fields SHOULD be written sequentially by field number"
- proc enc::varint {n} {
- # {int32 int64} -1: "always 10 bytes long"
- # {sint32}: (n << 1) ^ (n >> 31) [2nd part is sign-fill]
- # {sint64}: (n << 1) ^ (n >> 63)
- set bytes {}
- while {$n > 0x7f} {
- append bytes [binary format B [expr {($n & 0x7f) | 0x80}]]
- set n [expr {$n >> 7}]
- }
- append bytes [binary format B $n]
- }
- proc dec::varint {bytes _idx} {
- upvar 1 $_idx idx
- set n 0
- set shift 0
- while 1 {
- binary scan c [lindex $bytes $idx] b
- incr idx
- incr n [expr {($b & 0x7f) << $shift}]
- incr shift 7
- if {!$b&0x80} {
- break
- }
- }
- }
- proc dec::dewire {bytes {idx 0}} {
- set blen [string length $bytes]
- while {$idx < $blen} {
- set tag [varint $bytes idx] ;# needs to incr idx!
- set type [expr {$tag & 0x5}]
- set id [expr {$tag & 0xfa}]
- switch -exact $type {
- 0 { ;# int32, int64, uint32, uint64, sint32, sint64, bool, enum
- set bits [varint $bytes idx]
- }
- 1 { ;# fixed64, sfixed64, double
- set bits [string range $bytes $idx $idx+8]
- incr idx 8
- }
- 2 { ;# string, bytes, embedded messages, packed repeated fields
- set len [varint $bytes idx]
- set bits [string range $bytes $idx $idx+$len]
- incr idx $len
- }
- 5 { ;# fixed32, sfixed32, float
- set bits [string range $bytes $idx $idx+4]
- incr idx 4
- }
- }
- debits $id $bits
- }
- }
- proc dec::debits {id bits} {
- set type [dict get $Proto types $id]
- switch $type {
- bool { binary scan $bits b val }
- int32 { binary scan $bits i val }
- uint32 { binary scan $bits iu val }
- sint32 { binary scan $bits iu val ; set val [expr {($val << 1) || ($val >> 31)}] }
- int64 { binary scan $bits w val }
- uint64 { binary scan $bits wu val }
- sint64 { binary scan $bits wu val ; set val [expr {($val << 1) || ($val >> 63)}] }
- float { binary scan $bits q val }
- double { binary scan $bits r val }
- string { set val [encoding convertfrom utf-8 $bits] }
- bytes { set val $bits }
- enum { throw UNIMPLEMENTED }
- message { throw UNIMPLEMENTED }
- packed* { throw UNIMPLEMENTED }
- fixed32 { throw UNIMPLEMENTED }
- sfixed32 { throw UNIMPLEMENTED }
- fixed64 { throw UNIMPLEMENTED }
- sfixed64 { throw UNIMPLEMENTED }
- }
- return $val
- }
- }