Posted to tcl by DasBrain at Thu Aug 08 03:26:28 GMT 2024view raw

  1. namespace eval ::cesu {}
  2.  
  3. proc ::cesu::formatQuery args {
  4. package require http
  5. set result ""
  6. set sep ""
  7. foreach i $args {
  8. append result $sep [::cesu::mapReply $i]
  9. if {$sep eq "="} {
  10. set sep &
  11. } else {
  12. set sep =
  13. }
  14. }
  15. return $result
  16. }
  17.  
  18. proc ::cesu::mapReply string {
  19. variable ::http::formMap
  20.  
  21. set string [encoding convertto utf-8 $string]
  22. set string [::cesu::cesu2utf $string]
  23. return [string map $formMap $string]
  24. }
  25.  
  26.  
  27.  
  28. proc ::cesu::cesu2utf str {
  29. if {[regexp {\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])} str]} {
  30. set str [string map {\ \\ \[ \\\[ \] \\\]} $str]
  31. return [subst -novariables [regsub -all {^\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])$} $str {[::cesu::cesu2utfR \1 \2 \3 \4]} ]]
  32. } else {
  33. return $str
  34. }
  35. }
  36.  
  37. proc ::cesu::cesu2utfR {1 2 3 4} {
  38. # UTF-8: 11110xxx 10xx xxxx 10xx xxxx 10xxxxxx
  39. # CESU-8: 11101101 1010 yy yy 10xxxx xx 11101101 1011xxxx 10xxxxxx
  40. binary scan $1 c 1
  41. binary scan $2 c 2
  42. binary scan $3 c 3
  43. puts [list $1 $2 $3]
  44. #binary scan $4 c 4
  45. incr 1
  46.  
  47. return [binary format ccca \
  48. [expr {0xF0 | (($1 & 0xC) >> 2)}] \
  49. [expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \
  50. [expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \
  51. $4]
  52. }
  53.  
  54. proc cesu::cesu2utfC char {
  55. # UTF-8: 11110xxx 10xx xxxx 10xx xxxx 10xxxxxx
  56. # CESU-8: 11101101 1010 yy yy 10xxxx xx 11101101 1011xxxx 10xxxxxx
  57. if {[regexp {^\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])$} $char -> 1 2 3 4]} {
  58. binary scan $1 c 1
  59. binary scan $2 c 2
  60. binary scan $3 c 3
  61. puts [list $1 $2 $3]
  62. #binary scan $4 c 4
  63. incr 1
  64.  
  65. return [binary format ccca \
  66. [expr {0xF0 | (($1 & 0xC) >> 2)}] \
  67. [expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \
  68. [expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \
  69. $4]
  70.  
  71. } else {
  72. puts "Invalid sequence: $char"
  73. return $char
  74. }
  75. }
  76.