Posted to tcl by aspect at Sat Mar 04 04:12:44 GMT 2017view pretty

# -I/--proto_path
#
# The protobuf syntax is close enough to Tcl that we can get away with a very simple unknown command.
# it will blow up with $ or [] in comments, and multi-line commands won't work without \\, but close
# enough.
#
# Parsing the proto3 declaration we want to create an Encoder and a Decoder object.  Enc methods are
# named for fields, and emit encoded & tagged fields.  Dec methods are named for field id's, and
# consume a bytestring to return values.
namespace eval proto3 {
    proc // args {# comment}
    proc syntax {= version} {}
    proc package {name} {};# prefixes message names, with .
    proc import {{public ""} filename} {}
    proc option {name = value} {}   ;# in enum: allow_alias
    proc message {name body} {}
    proc enum {name body} {}
    proc map {key value name = id args} {}    ;# actually map<key, value> name = id.  Cannot be rep'd
    proc oneof {name body} {}   ;# union.  Cannot contain [repeated]
    proc repeated {type name = args} {}
    # proc required {type name = args} {}
    # proc optional {type name = args} {}
    #proto2:
    # proc group {name = id body} {}
    foreach type {
            bool
            string bytes
            double float
             int32  int64
            sint32 sint64
            uint32 uint64
             fixed32  fixed64
            sfixed32 sfixed64
    } {
        proc $type {name = id args} {
        }
    }

    namespace eval enc {}
    namespace eval dec {}

    variable typemap
    set typemap {}
    lset typemap 0 {int32, int64, uint32, uint64, sint32, sint64, bool, enum}  ;# varint
    lset typemap 1 {fixed64, sfixed64, double} ;# 64-bit
    lset typemap 2 {string, bytes, embedded messages, packed repeated fields}  ;# length-value
    lset typemap 3 {start_group}   ;# deprecated
    lset typemap 4 {end_group}     ;# deprecated
    lset typemap 5 {fixed32, sfixed32, float}  ;# 32-bit
    # keys:  varint [expr {($id << 3) | $type}]
    # fixed integers:  64 or 32 bits, little-endian byte order.
    # repeats: unpacked by repeating keys in kvkvkv stream.  0 or more.  May not be consecutive.
    # packed repeats: single value with type 2 (length-value), with values concatenated
    #
    # unsolicited repeats: parser takes last, except embedded messages which are merged
    # "when a message is serialized its known fields SHOULD be written sequentially by field number"

    proc enc::varint {n} {
        # {int32 int64} -1: "always 10 bytes long"
        # {sint32}:  (n << 1) ^ (n >> 31)   [2nd part is sign-fill]
        # {sint64}:  (n << 1) ^ (n >> 63)
        set bytes {}
        while {$n > 0x7f} {
            append bytes [binary format B [expr {($n & 0x7f) | 0x80}]]
            set n [expr {$n >> 7}]
        }
        append bytes [binary format B $n]
    }

    proc dec::varint {bytes _idx} {
        upvar 1 $_idx idx
        set n 0
        set shift 0
        while 1 {
            binary scan c [lindex $bytes $idx] b
            incr idx
            incr n [expr {($b & 0x7f) << $shift}]
            incr shift 7
            if {!$b&0x80} {
                break
            }
        }
    }

    proc dec::dewire {bytes {idx 0}} {
        set blen [string length $bytes]
        while {$idx < $blen} {
            set tag [varint $bytes idx]    ;# needs to incr idx!
            set type [expr {$tag & 0x5}]
            set id   [expr {$tag & 0xfa}]
            switch -exact $type {
                0 {     ;# int32, int64, uint32, uint64, sint32, sint64, bool, enum
                    set bits [varint $bytes idx]
                }
                1 {     ;# fixed64, sfixed64, double
                    set bits [string range $bytes $idx $idx+8]
                    incr idx 8
                }
                2 {     ;# string, bytes, embedded messages, packed repeated fields
                    set len [varint $bytes idx]
                    set bits [string range $bytes $idx $idx+$len]
                    incr idx $len
                }
                5 {     ;# fixed32, sfixed32, float
                    set bits [string range $bytes $idx $idx+4]
                    incr idx 4
                }
            }
            debits $id $bits
        }
    }

    proc dec::debits {id bits} {
        set type [dict get $Proto types $id]
        switch $type {
            bool    { binary scan $bits b val }
            int32   { binary scan $bits i val }
            uint32  { binary scan $bits iu val }
            sint32  { binary scan $bits iu val ; set val [expr {($val << 1) || ($val >> 31)}] }
            int64   { binary scan $bits w val }
            uint64  { binary scan $bits wu val }
            sint64  { binary scan $bits wu val ; set val [expr {($val << 1) || ($val >> 63)}] }
            float   { binary scan $bits q val }
            double  { binary scan $bits r val }
            string  { set val [encoding convertfrom utf-8 $bits] }
            bytes   { set val $bits }
            enum    { throw UNIMPLEMENTED }
            message { throw UNIMPLEMENTED }
            packed* { throw UNIMPLEMENTED }
            fixed32 { throw UNIMPLEMENTED }
           sfixed32 { throw UNIMPLEMENTED }
            fixed64 { throw UNIMPLEMENTED }
           sfixed64 { throw UNIMPLEMENTED }
        }
        return $val
    }


}