Posted to tcl by dbohdan at Fri Apr 24 16:07:02 GMT 2015view raw

  1. #!/usr/bin/env tclsh
  2. # Sqawk, an SQL Awk.
  3. # Copyright (C) 2015 Danyil Bohdan
  4. # License: MIT
  5. package require Tcl 8.5
  6. package require cmdline
  7. package require snit 2
  8. package require sqlite3
  9. package require textutil
  10.  
  11. namespace eval ::sqawk {
  12. variable version 0.5.0
  13. }
  14. namespace eval ::sqawk::script {
  15. variable debug 0
  16. }
  17.  
  18. ::snit::type ::sqawk::table {
  19. option -database
  20. option -dbtable
  21. option -keyprefix
  22. option -maxnf
  23.  
  24. destructor {
  25. [$self cget -database] eval "DROP TABLE [$self cget -dbtable]"
  26. }
  27.  
  28. # Create a database table for the table object.
  29. method initialize {} {
  30. set fields {}
  31. set keyPrefix [$self cget -keyprefix]
  32. set command {
  33. CREATE TABLE [$self cget -dbtable] (
  34. ${keyPrefix}nr INTEGER PRIMARY KEY,
  35. ${keyPrefix}nf INTEGER,
  36. [join $fields ","]
  37. )
  38. }
  39. set maxNF [$self cget -maxnf]
  40. for {set i 0} {$i <= $maxNF} {incr i} {
  41. lappend fields "$keyPrefix$i INTEGER"
  42. }
  43. [$self cget -database] eval [subst $command]
  44. }
  45.  
  46. # Insert each from the list $rows into the table.
  47. method insert-rows rows {
  48. set db [$self cget -database]
  49. set keyPrefix [$self cget -keyprefix]
  50. set tableName [$self cget -dbtable]
  51.  
  52. set commands {}
  53.  
  54. set rowInsertCommand {
  55. INSERT INTO $tableName ([join $insertColumnNames ,])
  56. VALUES ([join $insertValues ,]);
  57. }
  58.  
  59. $db transaction {
  60. foreach row $rows {
  61. set insertColumnNames "${keyPrefix}nf,${keyPrefix}0"
  62. set insertValues {$nf,$row}
  63. set i 1
  64. set nf [llength $row]
  65. foreach field $row {
  66. set $keyPrefix$i $field
  67. lappend insertColumnNames "$keyPrefix$i"
  68. lappend insertValues "\$$keyPrefix$i"
  69. incr i
  70. }
  71. $db eval [subst $rowInsertCommand]
  72. }
  73. }
  74. }
  75. }
  76.  
  77. # If key $key is absent in dictionary variable $dictVarName set it to $value.
  78. proc ::sqawk::dict-ensure-default {dictVarName key value} {
  79. upvar 1 $dictVarName dictionary
  80. set dictionary [dict merge [list $key $value] $dictionary]
  81. }
  82.  
  83. ::snit::type ::sqawk::sqawk {
  84. variable tables {}
  85. variable defaultTableNames [split {abcdefghijklmnopqrstuvwxyz} ""]
  86.  
  87. option -database
  88. option -ofs
  89. option -ors
  90.  
  91. destructor {
  92. dict for {_ tableObj} q$tables {
  93. $tableObj destroy
  94. }
  95. }
  96.  
  97. # Read data from the file specified in the dictionary $fileData into a new
  98. # database table.
  99. method read-file fileData {
  100. # Default table name ("a", "b", "c", ..., "z").
  101. set defaultTableName [lindex $defaultTableNames [dict size $tables]]
  102. ::sqawk::dict-ensure-default fileData table $defaultTableName
  103. # Default keyprefix (equal to table name).
  104. ::sqawk::dict-ensure-default fileData prefix [dict get $fileData table]
  105. ::sqawk::dict-ensure-default fileData regexp 1
  106.  
  107. array set metadata $fileData
  108.  
  109. # Make a new table.
  110. set newTable [::sqawk::table create %AUTO%]
  111. $newTable configure -database [$self cget -database]
  112. $newTable configure -dbtable $metadata(table)
  113. $newTable configure -keyprefix $metadata(prefix)
  114. $newTable configure -maxnf $metadata(NF)
  115. $newTable initialize
  116.  
  117. # Read data.
  118. if {$metadata(filename) eq "-"} {
  119. set ch stdin
  120. } else {
  121. set ch [open $metadata(filename)]
  122. }
  123. if {$metadata(regexp)} {
  124. set records [::textutil::splitx [read $ch] $metadata(RS)]
  125.  
  126. # Insert rows in the table.
  127. if {[lindex $records end] eq ""} {
  128. set records [lrange $records 0 end-1]
  129. }
  130. set rows {}
  131. foreach record $records {
  132. lappend rows [::textutil::splitx $record $metadata(FS)]
  133. }
  134. $newTable insert-rows $rows
  135. }
  136. close $ch
  137.  
  138. dict set tables $metadata(table) $newTable
  139. return $newTable
  140. }
  141.  
  142. # Perform query $query and output the result to $channel.
  143. method perform-query {query {channel stdout}} {
  144. # For each row returned...
  145. [$self cget -database] eval $query results {
  146. set output {}
  147. set keys $results(*)
  148. foreach key $keys {
  149. lappend output $results($key)
  150. }
  151. set outputRecord [join $output [$self cget -ofs]][$self cget -ors]
  152. puts -nonewline $channel $outputRecord
  153. }
  154. }
  155. }
  156.  
  157. # Remove and return $n elements from the list stored in the variable $varName.
  158. proc ::sqawk::script::lshift! {varName {n 1}} {
  159. upvar 1 $varName list
  160. set result [lrange $list 0 $n-1]
  161. set list [lrange $list $n end]
  162. return $result
  163. }
  164.  
  165. # Return a subdictionary of $dictionary with only the keys in $keyList.
  166. proc ::sqawk::script::filter-keys {dictionary keyList {mustExist 1}} {
  167. set result {}
  168. foreach key $keyList {
  169. if {!$mustExist && ![dict exists $dictionary $key]} {
  170. continue
  171. }
  172. dict set result $key [dict get $dictionary $key]
  173. }
  174. return $result
  175. }
  176.  
  177. # Process $argv into per-file options.
  178. proc ::sqawk::script::process-options {argv} {
  179. set options {
  180. {FS.arg {[ \t]+} "Input field separator for all files (regexp)"}
  181. {RS.arg {\n} "Input record separator for all files (regexp by default)"}
  182. {OFS.arg { } "Output field separator"}
  183. {ORS.arg {\n} "Output record separator"}
  184. {NF.arg 10 "Maximum NF value"}
  185. {v "Print version"}
  186. {1 "One field only. A shortcut for -FS '^$'"}
  187. }
  188.  
  189. set usage {[options] script [[setting=value ...] filename ...]}
  190. set cmdOptions [::cmdline::getoptions argv $options $usage]
  191.  
  192. # Report version.
  193. if {[dict get $cmdOptions v]} {
  194. puts $::sqawk::version
  195. exit 0
  196. }
  197.  
  198. lassign [lshift! argv] script
  199. if {$script eq ""} {
  200. error "empty script"
  201. }
  202.  
  203. if {[dict get $cmdOptions 1]} {
  204. dict set cmdOptions FS ^$
  205. }
  206.  
  207. # Substitute slashes. (In FS, RS, FSx and RSx the regexp engine will
  208. # do this for us.)
  209. foreach option {OFS ORS} {
  210. dict set cmdOptions $option [subst -nocommands -novariables \
  211. [dict get $cmdOptions $option]]
  212. }
  213.  
  214. # Global settings.
  215. set globalOptions [::sqawk::script::filter-keys $cmdOptions { OFS ORS }]
  216.  
  217. # File settings.
  218. set fileCount 0
  219. set usedStdin 0
  220. set fileSettings {}
  221. set defaultFileSettings [::sqawk::script::filter-keys $cmdOptions {
  222. FS RS NF
  223. }]
  224. set currentFileSettings $defaultFileSettings
  225. while {[llength $argv] > 0} {
  226. lassign [lshift! argv] elem
  227. # setting=value
  228. if {[regexp {([^=]+)=(.*)} $elem _ key value]} {
  229. dict set currentFileSettings $key $value
  230. } else {
  231. # Filename.
  232. if {[file exists $elem] || ($elem eq "-")} {
  233. dict set currentFileSettings filename $elem
  234. lappend fileSettings $currentFileSettings
  235. set currentFileSettings $defaultFileSettings
  236. incr fileCount
  237. } else {
  238. error "can't find file \"$elem\""
  239. }
  240. }
  241. }
  242. # If no files are given add "-" (standard input) with the current settings
  243. # to fileSettings.
  244. if {$fileCount == 0} {
  245. dict set currentFileSettings filename -
  246. lappend fileSettings $currentFileSettings
  247. }
  248.  
  249. return [list $script $globalOptions $fileSettings]
  250. }
  251.  
  252. proc ::sqawk::script::create-database {database} {
  253. variable debug
  254.  
  255. if {$debug} {
  256. file delete /tmp/sqawk.db
  257. ::sqlite3 $database /tmp/sqawk.db
  258. } else {
  259. ::sqlite3 $database :memory:
  260. }
  261. }
  262.  
  263. proc ::sqawk::script::main {argv0 argv {databaseHandle db}} {
  264. set error [catch {
  265. lassign [::sqawk::script::process-options $argv] \
  266. script options fileSettings
  267. } errorMessage]
  268. if {$error} {
  269. puts "error: $errorMessage"
  270. exit 1
  271. }
  272.  
  273. ::sqawk::script::create-database $databaseHandle
  274. set obj [::sqawk::sqawk create %AUTO%]
  275. $obj configure -database $databaseHandle
  276. $obj configure -ofs [dict get $options OFS]
  277. $obj configure -ors [dict get $options ORS]
  278.  
  279. foreach file $fileSettings {
  280. $obj read-file $file
  281. }
  282.  
  283. set error [catch { $obj perform-query $script } errorMessage errorOptions]
  284. if {$error} {
  285. # Ignore errors caused by stdout being closed during output (e.g., if
  286. # someone is piping the output to head(1)).
  287. if {[lrange [dict get $errorOptions -errorcode] 0 1] ne {POSIX EPIPE}} {
  288. return -options $errorOptions $errorMessage
  289. }
  290. }
  291. $obj destroy
  292. }
  293.  
  294. # If this is the main script...
  295. if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
  296. ::sqawk::script::main $argv0 $argv
  297. }
  298.