Posted to tcl by dbohdan at Fri Apr 24 16:07:02 GMT 2015view raw
- #!/usr/bin/env tclsh
- # Sqawk, an SQL Awk.
- # Copyright (C) 2015 Danyil Bohdan
- # License: MIT
- package require Tcl 8.5
- package require cmdline
- package require snit 2
- package require sqlite3
- package require textutil
- namespace eval ::sqawk {
- variable version 0.5.0
- }
- namespace eval ::sqawk::script {
- variable debug 0
- }
- ::snit::type ::sqawk::table {
- option -database
- option -dbtable
- option -keyprefix
- option -maxnf
- destructor {
- [$self cget -database] eval "DROP TABLE [$self cget -dbtable]"
- }
- # Create a database table for the table object.
- method initialize {} {
- set fields {}
- set keyPrefix [$self cget -keyprefix]
- set command {
- CREATE TABLE [$self cget -dbtable] (
- ${keyPrefix}nr INTEGER PRIMARY KEY,
- ${keyPrefix}nf INTEGER,
- [join $fields ","]
- )
- }
- set maxNF [$self cget -maxnf]
- for {set i 0} {$i <= $maxNF} {incr i} {
- lappend fields "$keyPrefix$i INTEGER"
- }
- [$self cget -database] eval [subst $command]
- }
- # Insert each from the list $rows into the table.
- method insert-rows rows {
- set db [$self cget -database]
- set keyPrefix [$self cget -keyprefix]
- set tableName [$self cget -dbtable]
- set commands {}
- set rowInsertCommand {
- INSERT INTO $tableName ([join $insertColumnNames ,])
- VALUES ([join $insertValues ,]);
- }
- $db transaction {
- foreach row $rows {
- set insertColumnNames "${keyPrefix}nf,${keyPrefix}0"
- set insertValues {$nf,$row}
- set i 1
- set nf [llength $row]
- foreach field $row {
- set $keyPrefix$i $field
- lappend insertColumnNames "$keyPrefix$i"
- lappend insertValues "\$$keyPrefix$i"
- incr i
- }
- $db eval [subst $rowInsertCommand]
- }
- }
- }
- }
- # If key $key is absent in dictionary variable $dictVarName set it to $value.
- proc ::sqawk::dict-ensure-default {dictVarName key value} {
- upvar 1 $dictVarName dictionary
- set dictionary [dict merge [list $key $value] $dictionary]
- }
- ::snit::type ::sqawk::sqawk {
- variable tables {}
- variable defaultTableNames [split {abcdefghijklmnopqrstuvwxyz} ""]
- option -database
- option -ofs
- option -ors
- destructor {
- dict for {_ tableObj} q$tables {
- $tableObj destroy
- }
- }
- # Read data from the file specified in the dictionary $fileData into a new
- # database table.
- method read-file fileData {
- # Default table name ("a", "b", "c", ..., "z").
- set defaultTableName [lindex $defaultTableNames [dict size $tables]]
- ::sqawk::dict-ensure-default fileData table $defaultTableName
- # Default keyprefix (equal to table name).
- ::sqawk::dict-ensure-default fileData prefix [dict get $fileData table]
- ::sqawk::dict-ensure-default fileData regexp 1
- array set metadata $fileData
- # Make a new table.
- set newTable [::sqawk::table create %AUTO%]
- $newTable configure -database [$self cget -database]
- $newTable configure -dbtable $metadata(table)
- $newTable configure -keyprefix $metadata(prefix)
- $newTable configure -maxnf $metadata(NF)
- $newTable initialize
- # Read data.
- if {$metadata(filename) eq "-"} {
- set ch stdin
- } else {
- set ch [open $metadata(filename)]
- }
- if {$metadata(regexp)} {
- set records [::textutil::splitx [read $ch] $metadata(RS)]
- # Insert rows in the table.
- if {[lindex $records end] eq ""} {
- set records [lrange $records 0 end-1]
- }
- set rows {}
- foreach record $records {
- lappend rows [::textutil::splitx $record $metadata(FS)]
- }
- $newTable insert-rows $rows
- }
- close $ch
- dict set tables $metadata(table) $newTable
- return $newTable
- }
- # Perform query $query and output the result to $channel.
- method perform-query {query {channel stdout}} {
- # For each row returned...
- [$self cget -database] eval $query results {
- set output {}
- set keys $results(*)
- foreach key $keys {
- lappend output $results($key)
- }
- set outputRecord [join $output [$self cget -ofs]][$self cget -ors]
- puts -nonewline $channel $outputRecord
- }
- }
- }
- # Remove and return $n elements from the list stored in the variable $varName.
- proc ::sqawk::script::lshift! {varName {n 1}} {
- upvar 1 $varName list
- set result [lrange $list 0 $n-1]
- set list [lrange $list $n end]
- return $result
- }
- # Return a subdictionary of $dictionary with only the keys in $keyList.
- proc ::sqawk::script::filter-keys {dictionary keyList {mustExist 1}} {
- set result {}
- foreach key $keyList {
- if {!$mustExist && ![dict exists $dictionary $key]} {
- continue
- }
- dict set result $key [dict get $dictionary $key]
- }
- return $result
- }
- # Process $argv into per-file options.
- proc ::sqawk::script::process-options {argv} {
- set options {
- {FS.arg {[ \t]+} "Input field separator for all files (regexp)"}
- {RS.arg {\n} "Input record separator for all files (regexp by default)"}
- {OFS.arg { } "Output field separator"}
- {ORS.arg {\n} "Output record separator"}
- {NF.arg 10 "Maximum NF value"}
- {v "Print version"}
- {1 "One field only. A shortcut for -FS '^$'"}
- }
- set usage {[options] script [[setting=value ...] filename ...]}
- set cmdOptions [::cmdline::getoptions argv $options $usage]
- # Report version.
- if {[dict get $cmdOptions v]} {
- puts $::sqawk::version
- exit 0
- }
- lassign [lshift! argv] script
- if {$script eq ""} {
- error "empty script"
- }
- if {[dict get $cmdOptions 1]} {
- dict set cmdOptions FS ^$
- }
- # Substitute slashes. (In FS, RS, FSx and RSx the regexp engine will
- # do this for us.)
- foreach option {OFS ORS} {
- dict set cmdOptions $option [subst -nocommands -novariables \
- [dict get $cmdOptions $option]]
- }
- # Global settings.
- set globalOptions [::sqawk::script::filter-keys $cmdOptions { OFS ORS }]
- # File settings.
- set fileCount 0
- set usedStdin 0
- set fileSettings {}
- set defaultFileSettings [::sqawk::script::filter-keys $cmdOptions {
- FS RS NF
- }]
- set currentFileSettings $defaultFileSettings
- while {[llength $argv] > 0} {
- lassign [lshift! argv] elem
- # setting=value
- if {[regexp {([^=]+)=(.*)} $elem _ key value]} {
- dict set currentFileSettings $key $value
- } else {
- # Filename.
- if {[file exists $elem] || ($elem eq "-")} {
- dict set currentFileSettings filename $elem
- lappend fileSettings $currentFileSettings
- set currentFileSettings $defaultFileSettings
- incr fileCount
- } else {
- error "can't find file \"$elem\""
- }
- }
- }
- # If no files are given add "-" (standard input) with the current settings
- # to fileSettings.
- if {$fileCount == 0} {
- dict set currentFileSettings filename -
- lappend fileSettings $currentFileSettings
- }
- return [list $script $globalOptions $fileSettings]
- }
- proc ::sqawk::script::create-database {database} {
- variable debug
- if {$debug} {
- file delete /tmp/sqawk.db
- ::sqlite3 $database /tmp/sqawk.db
- } else {
- ::sqlite3 $database :memory:
- }
- }
- proc ::sqawk::script::main {argv0 argv {databaseHandle db}} {
- set error [catch {
- lassign [::sqawk::script::process-options $argv] \
- script options fileSettings
- } errorMessage]
- if {$error} {
- puts "error: $errorMessage"
- exit 1
- }
- ::sqawk::script::create-database $databaseHandle
- set obj [::sqawk::sqawk create %AUTO%]
- $obj configure -database $databaseHandle
- $obj configure -ofs [dict get $options OFS]
- $obj configure -ors [dict get $options ORS]
- foreach file $fileSettings {
- $obj read-file $file
- }
- set error [catch { $obj perform-query $script } errorMessage errorOptions]
- if {$error} {
- # Ignore errors caused by stdout being closed during output (e.g., if
- # someone is piping the output to head(1)).
- if {[lrange [dict get $errorOptions -errorcode] 0 1] ne {POSIX EPIPE}} {
- return -options $errorOptions $errorMessage
- }
- }
- $obj destroy
- }
- # If this is the main script...
- if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
- ::sqawk::script::main $argv0 $argv
- }