Posted to tcl by miguel at Sat Apr 23 16:18:25 GMT 2011view raw

  1. set inputFname /foo/bar/sum.pdf
  2. set outputFname /foo/bar/sum2.pdf
  3. set indicesFname /foo/bar/idxes
  4. set chunkSize 4000
  5. set pagemarker \f; #or whatever it is
  6.  
  7. set f [open $idxes]
  8. set idxdata [read $f]
  9. close $f
  10. # build a list of indices and replacements in idxlist/idxrepl
  11. set idxlist [....]
  12. set idxrepl [....]
  13. unset idxdata
  14.  
  15.  
  16.  
  17. set in [open $inputFname]
  18. set out [open $outputFname w]
  19.  
  20.  
  21. set inbuf {}
  22. set i 0
  23. set idxused 0
  24. set done 0
  25.  
  26. while {!$done} {
  27. append inbuf [read $in $chunkSize]
  28. set inlist [split $inbuf $pagemarker]
  29. if {![eof $in]} {
  30. set inbuf [lindex $inlist end]
  31. set inlist [lrange $inlist 0 end-1]
  32. } else {
  33. set pagemarker {}
  34. }
  35.  
  36. set re ([lindex $idxlist $i])sh
  37. foreach page $inlist {
  38. while 1 {
  39. if {!$done} {
  40. set found [regsub $re $page ([lindex $idxrepl $i])sh page]
  41. if {$found} {
  42. set idxused 1
  43. } elseif {$idxused} {
  44. if {[incr i] == [llength $idxlist]} {
  45. set done 1
  46. } else {
  47. set re ([lindex $idxlist $i])sh
  48. set idxused 0
  49. }
  50. continue
  51. }
  52. }
  53. puts -nonewline $out $page$pagemarker
  54. break
  55. }
  56. }
  57. }
  58.  
  59. fcopy $in $out