Posted to tcl by kbk at Mon Apr 16 15:41:55 GMT 2007view raw

  1. set utf8re [makeUtf8Regexp]
  2. puts "utf8re is [string length $utf8re] chars"
  3. set utf8str $utf8re+
  4.  
  5. proc fixBadUtf8 {data} {
  6. variable utf8str
  7. set retval {}
  8. set i 0
  9. foreach pair [regexp -all -indices -inline $utf8str $data] {
  10. foreach {start end} $pair break
  11. append retval [string repeat \ufffd [expr {$start-$i}]] \
  12. [string range $data $start $end]
  13. set i [expr {$end+1}]
  14. }
  15. append retval [string repeat \ufffd [expr {[string length $data]-$i}]]
  16. return $retval
  17. }