Posted to tcl by aspect at Sun Jul 20 14:31:39 GMT 2014view pretty

package require http
package require tdom

proc geturl {url} {
    try {
        puts "Getting $url ..."
        set url [string map {\  %20} $url]  ;# erk
        set tok [::http::geturl $url]
        upvar #0 $tok state
        if {$state(status) ne "ok"} {
            error $state(status)
        }
        return $state(body)
    } finally {
        ::http::cleanup $tok
    }
}

proc write_file {filename data} {
    puts "Saving $filename ..."
    if {[file exists $filename]} {
        error "$filename exists!"
    }
    set fd [open $filename w]
    chan configure $fd -translation binary
    puts -nonewline $fd $data
    close $fd
}

set html [geturl http://blogs.msdn.com/b/mssmallbiz/archive/2014/07/07/largest-collection-of-free-microsoft-ebooks-ever-including-windows-8-1-windows-8-windows-7-office-2013-office-365-office-2010-sharepoint-2013-dynamics-crm-powershell-exchange-server-lync-2013-system-center-azure-cloud-sql.aspx]
set dom [dom parse -html $html]

set urls [$dom selectNodes {//a[.="PDF"]/@href}]
set urls [lmap x $urls {lindex $x 1}]

foreach url $urls {
    while {1} { ;# follow redirections, the lazy man's way
        set html [geturl $url]
        if {![string match {<html*} $html]} {break}
        set dom [dom parse -html $html]
        set nodes [$dom selectNodes {//a/@href}]
        if {[llength $nodes] != 1} {
            error "I don't know how to follow redirects from $url .."
        }
        set url [lindex $nodes 0 1]
    }
    set filename [file tail $url]
    write_file $filename $html
}