require 'trollop'
require "sup"
+PROGRESS_UPDATE_INTERVAL = 15 # seconds
+
class Float
def to_s; sprintf '%.2f', self; end
def to_time_s
IO.foreach opts[:restore] do |l|
l =~ /^(\S+) \((.*?)\)$/ or raise "Can't read dump line: #{l.inspect}"
mid, labels = $1, $2
- dump[mid] = labels.split(" ").map { |x| x.intern }
+ dump[mid] = labels.symbolistize
end
$stderr.puts "Read #{dump.size} entries from dump file."
dump
unless target == :new
if opts[:start_at]
- sources.each { |s| s.seek_to! opts[:start_at] }
+ Trollop::die :start_at, "can only be used on one source" unless sources.size == 1
+ sources.first.seek_to! opts[:start_at]
+ sources.first.correct_offset! if sources.first.respond_to?(:correct_offset!)
else
sources.each { |s| s.reset! }
end
num_added = num_updated = num_scanned = num_restored = 0
last_info_time = start_time = Time.now
- Redwood::PollManager.add_messages_from source, :force_overwrite => true do |m, offset, entry|
+ Redwood::PollManager.add_messages_from source, :force_overwrite => true do |m_old, m, offset|
num_scanned += 1
seen[m.id] = true
+ if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL
+ last_info_time = Time.now
+ elapsed = last_info_time - start_time
+ start = opts[:start_at] || source.start_offset
+ pctdone = 100.0 * (source.cur_offset - start).to_f / (source.end_offset - start).to_f
+ remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
+ $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s
+ end
+
## skip if we're operating only on changed messages, the message
## is in the index, and it's unchanged from what the source is
## reporting.
- next if target == :changed && entry && entry[:source_id].to_i == source.id && entry[:source_info].to_i == offset
+ next if target == :changed && m_old && m_old.source.id == source.id && m_old.source_info == offset
## get the state currently in the index
- index_state =
- if entry
- entry[:label].split(/\s+/).map { |x| x.intern }
- else
- nil
- end
+ index_state = m_old.labels.dup if m_old
## skip if we're operating on restored messages, and this one
## ain't.
## to default source state modification flags.
m.labels -= [:inbox] if opts[:archive]
m.labels -= [:unread] if opts[:read]
- m.labels += opts[:extra_labels].split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels]
+ m.labels += opts[:extra_labels].strip.split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels]
## assign message labels based on the operation we're performing
case op
when :asis
- m.labels = index_state if index_state
+ m.labels = ((m.labels - [:unread, :inbox]) + index_state).uniq if index_state
when :restore
## if the entry exists on disk
if restored_state[m.id]
## nothin! use default source labels
end
- if Time.now - last_info_time > 60
+ if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL
last_info_time = Time.now
elapsed = last_info_time - start_time
pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f
remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
- $stderr.puts "## #{num_scanned} (#{pctdone}%) read; #{elapsed.to_time_s} elapsed; #{remaining.to_time_s} remaining"
+ $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s
end
if index_state.nil?
- puts "Adding message #{source}##{offset} with state {#{m.labels * ', '}}" if opts[:verbose]
+ puts "Adding message #{source}##{offset} from #{m.from} with state {#{m.labels * ', '}}" if opts[:verbose]
num_added += 1
else
- puts "Updating message #{source}##{offset}, source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose]
+ puts "Updating message #{source}##{offset}, source #{m_old.source.id} => #{source.id}, offset #{m_old.source_info} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose]
num_updated += 1
end
## delete any messages in the index that claim they're from one of
## these sources, but that we didn't see.
- ##
- ## kinda crappy code here, because we delve directly into the Ferret
- ## API.
- ##
- ## TODO: move this to Index, i suppose.
-
-
- if target == :all || target == :changed
+ if (target == :all || target == :changed)
$stderr.puts "Deleting missing messages from the index..."
num_del, num_scanned = 0, 0
sources.each do |source|
raise "no source id for #{source}" unless source.id
- q = "+source_id:#{source.id}"
- q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at]
- index.index.search_each(q, :limit => :all) do |docid, score|
+ index.each_message :source_id => source.id do |m|
num_scanned += 1
- mid = index.index[docid][:message_id]
- unless seen[mid]
- puts "Deleting #{mid}" if opts[:verbose]
- index.index.delete docid unless opts[:dry_run]
+ unless seen[m.id]
+ next unless m.source_info >= opts[:start_at] if opts[:start_at]
+ puts "Deleting #{m.id}" if opts[:verbose]
+ index.delete m.id unless opts[:dry_run]
num_del += 1
end
end
if opts[:optimize]
$stderr.puts "Optimizing index..."
- optt = time { index.index.optimize unless opts[:dry_run] }
+ optt = time { index.optimize unless opts[:dry_run] }
$stderr.puts "Optimized index of size #{index.size} in #{optt}s."
end
rescue Redwood::FatalSourceError => e