Posted to tcl by aspect at Sun Jul 20 14:31:39 GMT 2014view raw

  1. package require http
  2. package require tdom
  3.  
  4. proc geturl {url} {
  5. try {
  6. puts "Getting $url ..."
  7. set url [string map {\ %20} $url] ;# erk
  8. set tok [::http::geturl $url]
  9. upvar #0 $tok state
  10. if {$state(status) ne "ok"} {
  11. error $state(status)
  12. }
  13. return $state(body)
  14. } finally {
  15. ::http::cleanup $tok
  16. }
  17. }
  18.  
  19. proc write_file {filename data} {
  20. puts "Saving $filename ..."
  21. if {[file exists $filename]} {
  22. error "$filename exists!"
  23. }
  24. set fd [open $filename w]
  25. chan configure $fd -translation binary
  26. puts -nonewline $fd $data
  27. close $fd
  28. }
  29.  
  30. set html [geturl http://blogs.msdn.com/b/mssmallbiz/archive/2014/07/07/largest-collection-of-free-microsoft-ebooks-ever-including-windows-8-1-windows-8-windows-7-office-2013-office-365-office-2010-sharepoint-2013-dynamics-crm-powershell-exchange-server-lync-2013-system-center-azure-cloud-sql.aspx]
  31. set dom [dom parse -html $html]
  32.  
  33. set urls [$dom selectNodes {//a[.="PDF"]/@href}]
  34. set urls [lmap x $urls {lindex $x 1}]
  35.  
  36. foreach url $urls {
  37. while {1} { ;# follow redirections, the lazy man's way
  38. set html [geturl $url]
  39. if {![string match {<html*} $html]} {break}
  40. set dom [dom parse -html $html]
  41. set nodes [$dom selectNodes {//a/@href}]
  42. if {[llength $nodes] != 1} {
  43. error "I don't know how to follow redirects from $url .."
  44. }
  45. set url [lindex $nodes 0 1]
  46. }
  47. set filename [file tail $url]
  48. write_file $filename $html
  49. }
  50.