Posted to tcl by dbohdan at Fri Apr 24 16:07:02 GMT 2015view pretty

#!/usr/bin/env tclsh
# Sqawk, an SQL Awk.
# Copyright (C) 2015 Danyil Bohdan
# License: MIT
package require Tcl 8.5
package require cmdline
package require snit 2
package require sqlite3
package require textutil

namespace eval ::sqawk {
    variable version 0.5.0
}
namespace eval ::sqawk::script {
    variable debug 0
}

::snit::type ::sqawk::table {
    option -database
    option -dbtable
    option -keyprefix
    option -maxnf

    destructor {
        [$self cget -database] eval "DROP TABLE [$self cget -dbtable]"
    }

    # Create a database table for the table object.
    method initialize {} {
        set fields {}
        set keyPrefix [$self cget -keyprefix]
        set command {
            CREATE TABLE [$self cget -dbtable] (
                ${keyPrefix}nr INTEGER PRIMARY KEY,
                ${keyPrefix}nf INTEGER,
                [join $fields ","]
            )
        }
        set maxNF [$self cget -maxnf]
        for {set i 0} {$i <= $maxNF} {incr i} {
            lappend fields "$keyPrefix$i INTEGER"
        }
        [$self cget -database] eval [subst $command]
    }

    # Insert each from the list $rows into the table.
    method insert-rows rows {
        set db [$self cget -database]
        set keyPrefix [$self cget -keyprefix]
        set tableName [$self cget -dbtable]

        set commands {}

        set rowInsertCommand {
            INSERT INTO $tableName ([join $insertColumnNames ,])
            VALUES ([join $insertValues ,]);
        }

        $db transaction {
            foreach row $rows {
                set insertColumnNames "${keyPrefix}nf,${keyPrefix}0"
                set insertValues {$nf,$row}
                set i 1
                set nf [llength $row]
                foreach field $row {
                    set $keyPrefix$i $field
                    lappend insertColumnNames "$keyPrefix$i"
                    lappend insertValues "\$$keyPrefix$i"
                    incr i
                }
                $db eval [subst $rowInsertCommand]
            }
        }
    }
}

# If key $key is absent in dictionary variable $dictVarName set it to $value.
proc ::sqawk::dict-ensure-default {dictVarName key value} {
    upvar 1 $dictVarName dictionary
    set dictionary [dict merge [list $key $value] $dictionary]
}

::snit::type ::sqawk::sqawk {
    variable tables {}
    variable defaultTableNames [split {abcdefghijklmnopqrstuvwxyz} ""]

    option -database
    option -ofs
    option -ors

    destructor {
        dict for {_ tableObj} q$tables {
            $tableObj destroy
        }
    }

    # Read data from the file specified in the dictionary $fileData into a new
    # database table.
    method read-file fileData {
        # Default table name ("a", "b", "c", ..., "z").
        set defaultTableName [lindex $defaultTableNames [dict size $tables]]
        ::sqawk::dict-ensure-default fileData table $defaultTableName
        # Default keyprefix (equal to table name).
        ::sqawk::dict-ensure-default fileData prefix [dict get $fileData table]
        ::sqawk::dict-ensure-default fileData regexp 1

        array set metadata $fileData

        # Make a new table.
        set newTable [::sqawk::table create %AUTO%]
        $newTable configure -database [$self cget -database]
        $newTable configure -dbtable $metadata(table)
        $newTable configure -keyprefix $metadata(prefix)
        $newTable configure -maxnf $metadata(NF)
        $newTable initialize

        # Read data.
        if {$metadata(filename) eq "-"} {
            set ch stdin
        } else {
            set ch [open $metadata(filename)]
        }
        if {$metadata(regexp)} {
            set records [::textutil::splitx [read $ch] $metadata(RS)]

            # Insert rows in the table.
            if {[lindex $records end] eq ""} {
                set records [lrange $records 0 end-1]
            }
            set rows {}
            foreach record $records {
                lappend rows [::textutil::splitx $record $metadata(FS)]
            }
            $newTable insert-rows $rows
        }
        close $ch

        dict set tables $metadata(table) $newTable
        return $newTable
    }

    # Perform query $query and output the result to $channel.
    method perform-query {query {channel stdout}} {
        # For each row returned...
        [$self cget -database] eval $query results {
            set output {}
            set keys $results(*)
            foreach key $keys {
                lappend output $results($key)
            }
            set outputRecord [join $output [$self cget -ofs]][$self cget -ors]
            puts -nonewline $channel $outputRecord
        }
    }
}

# Remove and return $n elements from the list stored in the variable $varName.
proc ::sqawk::script::lshift! {varName {n 1}} {
    upvar 1 $varName list
    set result [lrange $list 0 $n-1]
    set list [lrange $list $n end]
    return $result
}

# Return a subdictionary of $dictionary with only the keys in $keyList.
proc ::sqawk::script::filter-keys {dictionary keyList {mustExist 1}} {
    set result {}
    foreach key $keyList {
        if {!$mustExist && ![dict exists $dictionary $key]} {
            continue
        }
        dict set result $key [dict get $dictionary $key]
    }
    return $result
}

# Process $argv into per-file options.
proc ::sqawk::script::process-options {argv} {
    set options {
        {FS.arg {[ \t]+} "Input field separator for all files (regexp)"}
        {RS.arg {\n} "Input record separator for all files (regexp by default)"}
        {OFS.arg { } "Output field separator"}
        {ORS.arg {\n} "Output record separator"}
        {NF.arg 10 "Maximum NF value"}
        {v "Print version"}
        {1 "One field only. A shortcut for -FS '^$'"}
    }

    set usage {[options] script [[setting=value ...] filename ...]}
    set cmdOptions [::cmdline::getoptions argv $options $usage]

    # Report version.
    if {[dict get $cmdOptions v]} {
        puts $::sqawk::version
        exit 0
    }

    lassign [lshift! argv] script
    if {$script eq ""} {
        error "empty script"
    }

    if {[dict get $cmdOptions 1]} {
        dict set cmdOptions FS ^$
    }

    # Substitute slashes. (In FS, RS, FSx and RSx the regexp engine will
    # do this for us.)
    foreach option {OFS ORS} {
        dict set cmdOptions $option [subst -nocommands -novariables \
                [dict get $cmdOptions $option]]
    }

    # Global settings.
    set globalOptions [::sqawk::script::filter-keys $cmdOptions { OFS ORS }]

    # File settings.
    set fileCount 0
    set usedStdin 0
    set fileSettings {}
    set defaultFileSettings [::sqawk::script::filter-keys $cmdOptions {
        FS RS NF
    }]
    set currentFileSettings $defaultFileSettings
    while {[llength $argv] > 0} {
        lassign [lshift! argv] elem
        # setting=value
        if {[regexp {([^=]+)=(.*)} $elem _ key value]} {
            dict set currentFileSettings $key $value
        } else {
            # Filename.
            if {[file exists $elem] || ($elem eq "-")} {
                dict set currentFileSettings filename $elem
                lappend fileSettings $currentFileSettings
                set currentFileSettings $defaultFileSettings
                incr fileCount
            } else {
                error "can't find file \"$elem\""
            }
        }
    }
    # If no files are given add "-" (standard input) with the current settings
    # to fileSettings.
    if {$fileCount == 0} {
        dict set currentFileSettings filename -
        lappend fileSettings $currentFileSettings
    }

    return [list $script $globalOptions $fileSettings]
}

proc ::sqawk::script::create-database {database} {
    variable debug

    if {$debug} {
        file delete /tmp/sqawk.db
        ::sqlite3 $database /tmp/sqawk.db
    } else {
        ::sqlite3 $database :memory:
    }
}

proc ::sqawk::script::main {argv0 argv {databaseHandle db}} {
    set error [catch {
        lassign [::sqawk::script::process-options $argv] \
                script options fileSettings
    } errorMessage]
    if {$error} {
        puts "error: $errorMessage"
        exit 1
    }

    ::sqawk::script::create-database $databaseHandle
    set obj [::sqawk::sqawk create %AUTO%]
    $obj configure -database $databaseHandle
    $obj configure -ofs [dict get $options OFS]
    $obj configure -ors [dict get $options ORS]

    foreach file $fileSettings {
        $obj read-file $file
    }

    set error [catch { $obj perform-query $script } errorMessage errorOptions]
    if {$error} {
        # Ignore errors caused by stdout being closed during output (e.g., if
        # someone is piping the output to head(1)).
        if {[lrange [dict get $errorOptions -errorcode] 0 1] ne {POSIX EPIPE}} {
            return -options $errorOptions $errorMessage
        }
    }
    $obj destroy
}

# If this is the main script...
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    ::sqawk::script::main $argv0 $argv
}