require 'trollop'
require "sup"
+PROGRESS_UPDATE_INTERVAL = 15 # seconds
+
class Float
def to_s; sprintf '%.2f', self; end
+ def to_time_s
+ infinite? ? "unknown" : super
+ end
end
class Numeric
sup-sync [options] <source>*
where <source>* is zero or more source URIs. If no sources are given,
-sync from all usual sources. All supported source URI schemes can
-be seen by running "sup-add --help".
+sync from all usual sources. Supported source URI schemes can be seen
+by running "sup-add --help".
Options controlling WHICH messages sup-sync operates on:
EOS
Redwood::start
index = Redwood::Index.new
-index.load
restored_state =
if opts[:restore]
IO.foreach opts[:restore] do |l|
l =~ /^(\S+) \((.*?)\)$/ or raise "Can't read dump line: #{l.inspect}"
mid, labels = $1, $2
- dump[mid] = labels.split(" ").map { |x| x.intern }
+ dump[mid] = labels.symbolistize
end
$stderr.puts "Read #{dump.size} entries from dump file."
dump
{}
end
-sources = ARGV.map do |uri|
- index.source_for uri or Trollop::die "Unknown source: #{uri}. Did you add it with sup-add first?"
-end
-
-sources = index.usual_sources if sources.empty?
-sources = index.sources if opts[:all_sources]
-
-unless target == :new
- if opts[:start_at]
- sources.each { |s| s.seek_to! opts[:start_at] }
- else
- sources.each { |s| s.reset! }
- end
-end
-
seen = {}
+index.lock_or_die
begin
+ index.load
+
+ sources = ARGV.map do |uri|
+ index.source_for uri or Trollop::die "Unknown source: #{uri}. Did you add it with sup-add first?"
+ end
+
+ sources = index.usual_sources if sources.empty?
+ sources = index.sources if opts[:all_sources]
+
+ unless target == :new
+ if opts[:start_at]
+ Trollop::die :start_at, "can only be used on one source" unless sources.size == 1
+ sources.first.seek_to! opts[:start_at]
+ sources.first.correct_offset! if sources.first.respond_to?(:correct_offset!)
+ else
+ sources.each { |s| s.reset! }
+ end
+ end
+
sources.each do |source|
$stderr.puts "Scanning #{source}..."
num_added = num_updated = num_scanned = num_restored = 0
last_info_time = start_time = Time.now
- Redwood::PollManager.add_messages_from source do |m, offset, entry|
+ Redwood::PollManager.add_messages_from source, :force_overwrite => true do |m_old, m, offset|
num_scanned += 1
seen[m.id] = true
+ if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL
+ last_info_time = Time.now
+ elapsed = last_info_time - start_time
+ start = opts[:start_at] || source.start_offset
+ pctdone = 100.0 * (source.cur_offset - start).to_f / (source.end_offset - start).to_f
+ remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
+ $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s
+ end
+
## skip if we're operating only on changed messages, the message
## is in the index, and it's unchanged from what the source is
## reporting.
- next if target == :changed && entry && entry[:source_id].to_i == source.id && entry[:source_info].to_i == offset
+ next if target == :changed && m_old && m_old.source.id == source.id && m_old.source_info == offset
## get the state currently in the index
- index_state =
- if entry
- entry[:label].split(/\s+/).map { |x| x.intern }
- else
- nil
- end
+ index_state = m_old.labels.dup if m_old
## skip if we're operating on restored messages, and this one
## ain't.
- next if target == :restored && (!restored_state[m.id] || restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s })
+ next if target == :restored && (!restored_state[m.id] || (index_state && restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s }))
## m.labels is the default source labels. tweak these according
## to default source state modification flags.
m.labels -= [:inbox] if opts[:archive]
m.labels -= [:unread] if opts[:read]
- m.labels += opts[:extra_labels].split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels]
+ m.labels += opts[:extra_labels].strip.split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels]
## assign message labels based on the operation we're performing
case op
when :asis
- m.labels = index_state if index_state
+ m.labels = ((m.labels - [:unread, :inbox]) + index_state).uniq if index_state
when :restore
## if the entry exists on disk
if restored_state[m.id]
## nothin! use default source labels
end
- if Time.now - last_info_time > 60
+ if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL
last_info_time = Time.now
elapsed = last_info_time - start_time
pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f
remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
- $stderr.puts "## #{num_added + num_updated} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining (for this source)"
+ $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s
end
if index_state.nil?
- puts "Adding message #{source}##{offset} with state {#{m.labels * ', '}}" if opts[:verbose]
+ puts "Adding message #{source}##{offset} from #{m.from} with state {#{m.labels * ', '}}" if opts[:verbose]
num_added += 1
else
- puts "Updating message #{source}##{offset}, source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose]
+ puts "Updating message #{source}##{offset}, source #{m_old.source.id} => #{source.id}, offset #{m_old.source_info} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose]
num_updated += 1
end
$stderr.puts "Scanned #{num_scanned}, added #{num_added}, updated #{num_updated} messages from #{source}."
$stderr.puts "Restored state on #{num_restored} (#{100.0 * num_restored / num_scanned}%) messages." if num_restored > 0
end
-rescue Exception => e
- File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace }
- raise
-ensure
- index.save
- Redwood::finish
-end
-## delete any messages in the index that claim they're from one of
-## these sources, but that we didn't see.
-##
-## kinda crappy code here, because we delve directly into the Ferret
-## API.
-##
-## TODO: move this to Index, i suppose.
-if target == :all || target == :changed
- $stderr.puts "Deleting missing messages from the index..."
- num_del, num_scanned = 0, 0
- sources.each do |source|
- raise "no source id for #{source}" unless source.id
- q = "+source_id:#{source.id}"
- q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at]
- index.index.search_each(q, :limit => :all) do |docid, score|
- num_scanned += 1
- mid = index.index[docid][:message_id]
- unless seen[mid]
- puts "Deleting #{mid}" if opts[:verbose]
- index.index.delete docid unless opts[:dry_run]
- num_del += 1
+ ## delete any messages in the index that claim they're from one of
+ ## these sources, but that we didn't see.
+ if (target == :all || target == :changed)
+ $stderr.puts "Deleting missing messages from the index..."
+ num_del, num_scanned = 0, 0
+ sources.each do |source|
+ raise "no source id for #{source}" unless source.id
+ index.each_message :source_id => source.id do |m|
+ num_scanned += 1
+ unless seen[m.id]
+ next unless m.source_info >= opts[:start_at] if opts[:start_at]
+ puts "Deleting #{m.id}" if opts[:verbose]
+ index.delete m.id unless opts[:dry_run]
+ num_del += 1
+ end
end
end
+ $stderr.puts "Deleted #{num_del} / #{num_scanned} messages"
end
- $stderr.puts "Deleted #{num_del} / #{num_scanned} messages"
-end
-if opts[:optimize]
- $stderr.puts "Optimizing index..."
- optt = time { index.index.optimize unless opts[:dry_run] }
- $stderr.puts "Optimized index of size #{index.size} in #{optt}s."
+ index.save
+
+ if opts[:optimize]
+ $stderr.puts "Optimizing index..."
+ optt = time { index.optimize unless opts[:dry_run] }
+ $stderr.puts "Optimized index of size #{index.size} in #{optt}s."
+ end
+rescue Redwood::FatalSourceError => e
+ $stderr.puts "Sorry, I couldn't communicate with a source: #{e.message}"
+rescue Exception => e
+ File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace }
+ raise
+ensure
+ Redwood::finish
+ index.unlock
end