Posted to tcl by DasBrain at Thu Aug 08 03:26:28 GMT 2024view raw
- namespace eval ::cesu {}
- proc ::cesu::formatQuery args {
- package require http
- set result ""
- set sep ""
- foreach i $args {
- append result $sep [::cesu::mapReply $i]
- if {$sep eq "="} {
- set sep &
- } else {
- set sep =
- }
- }
- return $result
- }
- proc ::cesu::mapReply string {
- variable ::http::formMap
- set string [encoding convertto utf-8 $string]
- set string [::cesu::cesu2utf $string]
- return [string map $formMap $string]
- }
- proc ::cesu::cesu2utf str {
- if {[regexp {\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])} str]} {
- set str [string map {\ \\ \[ \\\[ \] \\\]} $str]
- return [subst -novariables [regsub -all {^\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])$} $str {[::cesu::cesu2utfR \1 \2 \3 \4]} ]]
- } else {
- return $str
- }
- }
- proc ::cesu::cesu2utfR {1 2 3 4} {
- # UTF-8: 11110xxx 10xx xxxx 10xx xxxx 10xxxxxx
- # CESU-8: 11101101 1010 yy yy 10xxxx xx 11101101 1011xxxx 10xxxxxx
- binary scan $1 c 1
- binary scan $2 c 2
- binary scan $3 c 3
- puts [list $1 $2 $3]
- #binary scan $4 c 4
- incr 1
- return [binary format ccca \
- [expr {0xF0 | (($1 & 0xC) >> 2)}] \
- [expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \
- [expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \
- $4]
- }
- proc cesu::cesu2utfC char {
- # UTF-8: 11110xxx 10xx xxxx 10xx xxxx 10xxxxxx
- # CESU-8: 11101101 1010 yy yy 10xxxx xx 11101101 1011xxxx 10xxxxxx
- if {[regexp {^\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])$} $char -> 1 2 3 4]} {
- binary scan $1 c 1
- binary scan $2 c 2
- binary scan $3 c 3
- puts [list $1 $2 $3]
- #binary scan $4 c 4
- incr 1
- return [binary format ccca \
- [expr {0xF0 | (($1 & 0xC) >> 2)}] \
- [expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \
- [expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \
- $4]
- } else {
- puts "Invalid sequence: $char"
- return $char
- }
- }