Posted to tcl by aspect at Sun Jul 20 14:31:39 GMT 2014view raw
- package require http
- package require tdom
- proc geturl {url} {
- try {
- puts "Getting $url ..."
- set url [string map {\ %20} $url] ;# erk
- set tok [::http::geturl $url]
- upvar #0 $tok state
- if {$state(status) ne "ok"} {
- error $state(status)
- }
- return $state(body)
- } finally {
- ::http::cleanup $tok
- }
- }
- proc write_file {filename data} {
- puts "Saving $filename ..."
- if {[file exists $filename]} {
- error "$filename exists!"
- }
- set fd [open $filename w]
- chan configure $fd -translation binary
- puts -nonewline $fd $data
- close $fd
- }
- set html [geturl http://blogs.msdn.com/b/mssmallbiz/archive/2014/07/07/largest-collection-of-free-microsoft-ebooks-ever-including-windows-8-1-windows-8-windows-7-office-2013-office-365-office-2010-sharepoint-2013-dynamics-crm-powershell-exchange-server-lync-2013-system-center-azure-cloud-sql.aspx]
- set dom [dom parse -html $html]
- set urls [$dom selectNodes {//a[.="PDF"]/@href}]
- set urls [lmap x $urls {lindex $x 1}]
- foreach url $urls {
- while {1} { ;# follow redirections, the lazy man's way
- set html [geturl $url]
- if {![string match {<html*} $html]} {break}
- set dom [dom parse -html $html]
- set nodes [$dom selectNodes {//a/@href}]
- if {[llength $nodes] != 1} {
- error "I don't know how to follow redirects from $url .."
- }
- set url [lindex $nodes 0 1]
- }
- set filename [file tail $url]
- write_file $filename $html
- }