#!/usr/bin/env tclsh ## -*- tcl -*- # Extract and report oscon schedule package require struct package require csv package require report package require htmlparse package require textutil package require log # Restrict logging to levels 'info' and higher. log::lvSuppressLE debug # 1. CSV structure filled by the parser = main data table # ---------------------------------------------------- # Day Time/Start Time/End Track Tower Room Speaker Title # # Matrices: "dmain" and "dmainr" # # Difference: dmainr contains gratituous newlines in the # speaker column which make for a better TXT report (less # wide). # # This is also report 'main'. # # 2. Schedule report to see conflicts, CSV structure # ---------------------------------------------- # Day Time Location-Columns, one per Room # (15min granularity) (Content: Speaker + Topic) # # Matrices: "sched" and "schedr". Difference as for dmain(r) # and the location columns # # This will be report 'sched'. proc main {} { global pfx argv set pfx [lindex $argv 0] set files [lrange $argv 1 end] if {($pfx == {}) || ([llength $files] == 0)} { usage exit -1 } initialize foreach f $files { log::log info "Scanning \"$f\" ..." parse $f } gen_schedule dump_main dump_schedule postscript return } proc usage {} { global argv0 puts "usage: $argv0 prefix file..." } proc initialize {} { global rooms tracks ::struct::matrix::matrix dmain ; # data 1 ::struct::matrix::matrix dmainr ; # data 1r ::struct::matrix::matrix sched ; # data 2 ::struct::matrix::matrix schedr ; # data 2r array set rooms {} array set tracks {} dmain add columns 8 dmain add row {Day Start End Track Tower Room Speaker Title} dmainr add columns 8 dmainr add row {Day Start End Track Tower Room Speaker Title} return } proc parse {htmlfile} { global rooms tracks ::struct::tree::tree t log::log info "Reading \"$htmlfile\" ..." set html [read [set fh [open $htmlfile]]] close $fh log::log info "Parsing \"$htmlfile\" ..." htmlparse::2tree $html t htmlparse::removeVisualFluff t htmlparse::removeFormDefs t log::log info "Extracting information" #puts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Navigate and extract the information #t walk root -command {print %t %n} #exit set base [walk {1 1 0 1 1 0 1 0 1 0}] set day [walkf $base {0 0}] set day [escape [t get $day -key data]] log::log debug "Day = $day" set day [string range $day 0 2] # Walk through the sessions of that day. set sess [t next $base] while {$sess != {}} { set start [cvtdate [escape [t get [walkf $sess {0 0}] -key data]]] set track [string trim [escape [t get [walkf $sess {1 0}] -key data]]] set loc [escape [t get [walkf $sess {1 1 0}] -key data]] set loc [string trimright $loc "\n\r\t:"] log::log debug " $start - $track - $loc" # Separate Room/Tower information ... regexp {(.*) in the (.*) Tower} $loc -> room tower set room [string trim $room] set tower [string trim $tower] set rooms($tower/$room) . set tracks($track) . set talk [walkf $sess {1 1 3}] while {$talk != {}} { set time [escape [t get $talk -key data]] set talk [t next $talk] set title [escape [t get [walkf $talk {0 0 0}] -key data]] set speaker [escape [t get [walkf $talk {0 2}] -key data]] # Now we have everything to fill the main table ... # (After a bit of munging of the strings we got) foreach {start end} [split $time -] break set start [cvtdate $start] set end [cvtdate $end] regsub -all \r $speaker \n speaker regsub -all \n+ $speaker \n speaker regsub -all " *\n *" $speaker "\n" speaker set speakerc [split $speaker "\n"] set speakerc [join $speakerc ", "] log::log debug " $start - $end - $speakerc - $title" #puts >>$speakerc<< #puts >>$speaker<< # Day Time/Start Time/End Tower Room Speaker Title dmainr add row [list $day $start $end $track $tower $room $speaker $title] dmain add row [list $day $start $end $track $tower $room $speakerc $title] # Forward to next talk catch {set talk [t next $talk]} catch {set talk [t next $talk]} } set sess [t next $sess] } t destroy return } proc print {t n} { set tp [$t get $n -key type] set d [$t depth $n] set idx "" catch {set idx [$t index $n]} incr d $d incr d $d switch -exact -- $tp { a { log::log debug "[textutil::strRepeat " " $d]$idx $tp ([$t get $n -key data]...)" } PCDATA { log::log debug "[textutil::strRepeat " " $d]$idx $tp ([string range [$t get $n -key data] 0 20]...)" } default { log::log debug "[textutil::strRepeat " " $d]$idx $tp" } } } proc walkf {n p} { #log::log info "$n + $p =" foreach idx $p { if {$n == ""} {break} set n [lindex [t children $n] $idx] #log::log info "$idx :- $n" } return $n } proc walk {p} { return [walkf root $p] } proc cvtdate {date} { clock format [clock scan $date] -format "%H:%M" } proc escape {text} { # Special escape for nbsp, convert into space and not the # character specified by the standard. regsub -all { } $text { } text htmlparse::mapEscapes $text } proc gen_schedule {} { global rooms tracks dmain set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmain get rect 0 1 end end]]] dmainr set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmainr get rect 0 1 end end]]] sched add columns 2 schedr add columns 2 #sched add columns [array size rooms] #schedr add columns [array size rooms] sched add columns [array size tracks] schedr add columns [array size tracks] #log::log info Tracks=[array size tracks] #log::log info Rooms.=[array size rooms] set res [list Day Time] set c 2 foreach k [lsort [array names tracks]] { lappend res $k set tracks($k) $c incr c } sched add row $res schedr add row $res # Data in dmain is already sorted by day. By starting time only # partially, there are back references. # Just move them to the correct rooms and rows! #-- Day Time Location-Columns, one per Room -- set n [dmain rows] set p 0 array set rmap {} for {set r 1} {$r < $n} {incr r} { foreach {day start end track tower room speaker title} [dmain get row $r] break #[list $day $start $end $tower $room $speakerc $title] set key $day,$start if {![info exists rmap($key)]} { log::log info "Track schedule $day $start" sched add row schedr add row incr p set rmap($key) $p sched set cell 0 $p $day sched set cell 1 $p $start schedr set cell 0 $p $day schedr set cell 1 $p $start } sched set cell $tracks($track) $rmap($key) "$tower; $room; $speaker; $title" schedr set cell $tracks($track) $rmap($key) "$tower $room\n$speaker\n$title" } # Squeeze the columns 2+ in the report matrix set cols [schedr columns] for {set c 2} {$c < $cols} {incr c} { if {[schedr columnwidth $c] > 21} { log::log debug "Squeezing $c" set col [schedr get column $c] set res [list] foreach item $col { lappend res [wrap $item 21] } schedr set column $c $res } } # Now sort by day (primary key) and starting time (secondary key). # (Meaning we have to sort by time first, and then the day) # sched setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [sched getrect 0 0 end end]]] # schedr setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [schedr getrect 0 0 end end]]] return } proc dump_main {} { global pfx log::log info "Writing talk information /CSV" set f [open ${pfx}.main.csv w] csv::writematrix dmain $f close $f log::log info "Writing talk information /TXT" # Compute width of report and squeeze the title column to fit # below 80 char/line # Day Time/Start Time/End Track Tower Room Speaker Title set total 0 incr total [dmain columnwidth 0] incr total [dmain columnwidth 1] incr total [dmain columnwidth 2] incr total [dmain columnwidth 3] incr total [dmain columnwidth 4] incr total [dmain columnwidth 5] incr total [dmain columnwidth 6] #log::log info Total=$total if {$total < 80} { set total [expr {80 - $total}] set titles [dmain getcolumn 7] set res [list] foreach t $titles { lappend res [textutil::adjust $t -length $total] } dmain setcolumn 7 $res } ::report::report r [dmainr columns] style captionedtable 1 set f [open ${pfx}.main.txt w] r printmatrix2channel dmainr $f close $f r destroy # Now the HTML report, use 'dmain' as base, actually formatting # into lines is done by the browser. log::log info "Writing talk information /HTML" ::report::report r [dmain columns] style html set f [open ${pfx}.main.html w] puts $f "