Posted to tcl by kbk at Sat Aug 01 21:04:07 GMT 2009view pretty

if {![package vsatisfies [info tclversion] 8.5]} {
    package require newclock
}

namespace eval rfc2822 {

    variable datepats {}
    
}

# AddDatePat --
#
#	Internal procedure that adds a date pattern to the pattern list
#
# Parameters:
#	wpat - Regexp pattern that matches the weekday
#	wgrp - Format group that matches the weekday
#	ypat - Regexp pattern that matches the year
#	ygrp - Format group that matches the year
#	spat - Regexp pattern that matches the seconds of the minute
#	sgrp - Format group that matches the seconds of the minute
#	zpat - Regexp pattern that matches the time zone
#	zgrp - Format group that matches the time zone
#
# Results:
#	None
#
# Side effects:
#	Adds a complete regexp and a complete [clock scan] pattern to
#	'datepats'

proc rfc2822::AddDatePat { wpat wgrp ypat ygrp spat sgrp zpat zgrp } {
	
	variable datepats
	set regexp {^[[:space:]]*}
	set pat {}
	append regexp $wpat {\d\d?[[:space:]]+[[:alpha:]]+[[:space:]]+} $ypat
	append pat $wgrp {%d %b } $ygrp
	append regexp {[[:space:]]+\d\d?:\d\d} $spat
	append pat { %H:%M} $sgrp
	append regexp $zpat
	append pat $zgrp
	append regexp {[[:space:]]*$}
	lappend datepats $regexp $pat
	return
    }
    
# InitDatePats --
#
#	Internal rocedure that initializes the set of date patterns allowed in
#	an RFC2822 date
#
# Parameters:
#	permissible - 1 if erroneous (but common) time zones are to be
#		      allowed, 0 if they are to be rejected
#
# Results:
#	None.
#
# Side effects:

proc rfc2822::InitDatePats { permissible } {
	
    # Produce formats for the observed variants of ISO2822 dates.  Permissible
    # variants come first in the list; impermissible ones come later.
    
    # The year may be two digits, or four. Four digit year is done first.
    
    foreach ypat {{\d\d\d\d} {\d\d}} ygrp {%Y %y} {
	
	# The seconds of the minute may be provided, or omitted.
	
	foreach spat {{:\d\d} {}} sgrp {:%S {}} {
	    
	    # The weekday may be provided or omitted. It is common but
	    # impermissible to omit the comma after the weekday name.
	    
	    foreach wpat {
		{(?:Mon|T(?:ue|hu)|Wed|Fri|S(?:at|un)),[[:space:]]+}
		{(?:Mon|T(?:ue|hu)|Wed|Fri|S(?:at|un))[[:space:]]+}
		{}
	    } wgrp {
		{%a, }
		{%a }
		{}
	    } wperm {
		1
		0
		1
	    } {
		
		# Time zone is defined as +/- hhmm, or as a named time zone.
		# Other common but buggy formats are GMT+-hh:mm, a time
		# zone name in quotation marks, and complete omission of
		# the time zone.
		
		foreach zpat {
		    {[[:space:]]+(?:[-+]\d\d\d\d|[[:alpha:]]+)} 
		    {[[:space:]]+GMT[-+]\d\d:\d\d}
		    {[[:space:]]+"[[:alpha:]]+"}
		    {}
		} zgrp {
		    { %Z}
		    { GMT%Z}
		    { "%Z"}
		    {}
		} zperm {
		    1
		    0
		    0
		    0
		} {
		    if { ($zperm && $wperm) == $permissible } {
			AddDatePat $wpat $wgrp $ypat $ygrp \
			    $spat $sgrp $zpat $zgrp
		    }
		}
	    }
	}
    }
    return
}

# Initialize the date patterns

namespace eval rfc2822 {
    InitDatePats 1
    InitDatePats 0
    rename AddDatePat {}
    rename InitDatePats {}
}

# rfc2822::parseDate --
#
#	Parses a date expressed in RFC2822 format
#
# Parameters:
#	date - The date to parse
#
# Results:
#	Returns the date expressed in seconds from the Epoch, or throws
#	an error if the date could not be parsed.

proc rfc2822::parseDate { date } {
    variable datepats

    # Strip comments and excess whitespace from the date field

    regsub -all -expanded {
	\(		# open parenthesis
        (:?
              [^()[.\.]]   # character other than ()\
              |\\.         # or backslash escape
        )*              # any number of times
        \)              # close paren
    } $date {} date
    set date [string trim $date]

    # Match the patterns in order of preference, returning the first success

    foreach {regexp pat} $datepats {
	if { [regexp -nocase $regexp $date] } {
	    return [clock scan $date -format $pat]
	}
    }

    return -code error -errorcode {RFC2822 BADDATE} \
	"expected an RFC2822 date, got \"$date\""

}

# Usage example

if {![info exists ::argv0] || [info script] ne $::argv0} return
puts [clock format \
	  [rfc2822::parseDate {Mon(day), 23 Aug(ust) 2004 01:23:45 UT}]]