Posted to tcl by aspect at Sun Jul 20 14:31:39 GMT 2014view pretty
package require http package require tdom proc geturl {url} { try { puts "Getting $url ..." set url [string map {\ %20} $url] ;# erk set tok [::http::geturl $url] upvar #0 $tok state if {$state(status) ne "ok"} { error $state(status) } return $state(body) } finally { ::http::cleanup $tok } } proc write_file {filename data} { puts "Saving $filename ..." if {[file exists $filename]} { error "$filename exists!" } set fd [open $filename w] chan configure $fd -translation binary puts -nonewline $fd $data close $fd } set html [geturl http://blogs.msdn.com/b/mssmallbiz/archive/2014/07/07/largest-collection-of-free-microsoft-ebooks-ever-including-windows-8-1-windows-8-windows-7-office-2013-office-365-office-2010-sharepoint-2013-dynamics-crm-powershell-exchange-server-lync-2013-system-center-azure-cloud-sql.aspx] set dom [dom parse -html $html] set urls [$dom selectNodes {//a[.="PDF"]/@href}] set urls [lmap x $urls {lindex $x 1}] foreach url $urls { while {1} { ;# follow redirections, the lazy man's way set html [geturl $url] if {![string match {<html*} $html]} {break} set dom [dom parse -html $html] set nodes [$dom selectNodes {//a/@href}] if {[llength $nodes] != 1} { error "I don't know how to follow redirects from $url .." } set url [lindex $nodes 0 1] } set filename [file tail $url] write_file $filename $html }