#!/usr/bin/env ruby require 'uri' require 'rubygems' require 'trollop' require "sup" PROGRESS_UPDATE_INTERVAL = 15 # seconds class Float def to_s; sprintf '%.2f', self; end def to_time_s; infinite? ? "unknown" : super end end class Numeric def to_time_s i = to_i sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60 end end class Set def to_s; to_a * ',' end end def time startt = Time.now yield Time.now - startt end opts = Trollop::options do version "sup-sync (sup #{Redwood::VERSION})" banner <* where * is zero or more source URIs. If no sources are given, sync from all usual sources. Supported source URI schemes can be seen by running "sup-add --help". Options controlling WHICH messages sup-sync operates on: EOS opt :new, "Operate on new messages only. Don't scan over the entire source. (Default.)", :short => :none opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source." opt :restored, "Operate only on those messages included in a dump file as specified by --restore which have changed state." opt :all, "Operate on all messages in the source, regardless of newness or changedness." opt :start_at, "For --changed, --restored and --all, start at a particular offset.", :type => :int text < :none opt :restore, "Restore message state from a dump file created with sup-dump. If a message is not in this dumpfile, act as --asis.", :type => String, :short => :none opt :discard, "Discard any message state in the index and use the default source state. Dangerous!", :short => :none opt :archive, "When using the default source state, mark messages as archived.", :short => "-x" opt :read, "When using the default source state, mark messages as read." opt :extra_labels, "When using the default source state, also apply these user-defined labels (a comma-separated list)", :default => "", :short => :none text < :none opt :dry_run, "Don't actually modify the index. Probably only useful with --verbose.", :short => "-n" opt :version, "Show version information", :short => :none conflicts :changed, :all, :new, :restored conflicts :asis, :restore, :discard end Trollop::die :restored, "requires --restore" if opts[:restored] unless opts[:restore] if opts[:start_at] Trollop::die :start_at, "must be non-negative" if opts[:start_at] < 0 Trollop::die :start_at, "requires either --changed, --restored or --all" unless opts[:changed] || opts[:restored] || opts[:all] end target = [:new, :changed, :all, :restored].find { |x| opts[x] } || :new op = [:asis, :restore, :discard].find { |x| opts[x] } || :asis Redwood::start index = Redwood::Index.init restored_state = if opts[:restore] dump = {} $stderr.puts "Loading state dump from #{opts[:restore]}..." IO.foreach opts[:restore] do |l| l =~ /^(\S+) \((.*?)\)$/ or raise "Can't read dump line: #{l.inspect}" mid, labels = $1, $2 dump[mid] = labels.to_set_of_symbols end $stderr.puts "Read #{dump.size} entries from dump file." dump else {} end seen = {} index.lock_interactively or exit begin index.load sources = if opts[:all_sources] Redwood::SourceManager.sources elsif ARGV.empty? Redwood::SourceManager.usual_sources else ARGV.map do |uri| Redwood::SourceManager.source_for uri or Trollop::die "Unknown source: #{uri}. Did you add it with sup-add first?" end end ## for all target specifications except for only-new messages, reset the ## source to the beginning (or to the user-specified starting point.) unless target == :new if opts[:start_at] Trollop::die :start_at, "can only be used on one source" unless sources.size == 1 sources.first.seek_to! opts[:start_at] sources.first.correct_offset! if sources.first.respond_to?(:correct_offset!) else sources.each { |s| s.reset! } end end sources.each do |source| $stderr.puts "Scanning #{source}..." num_added = num_updated = num_scanned = num_restored = 0 last_info_time = start_time = Time.now Redwood::PollManager.each_message_from source do |m| num_scanned += 1 seen[m.id] = true old_m = index.build_message m.id case target when :changed ## skip this message if we're operating only on changed messages, the ## message is in the index, and it's unchanged from what the source is ## reporting. next if old_m && old_m.source.id == m.source.id && old_m.source_info == m.source_info when :restored ## skip if we're operating on restored messages, and this one ## ain't (or we wouldn't be making a change) next unless old_m && restored_state[m.id] && restored_state[m.id] != old_m.labels when :new ## nothing to do; we'll consider all messages starting at the start offset, which ## hasn't been changed. when :all ## nothing to do; we'll consider all messages starting at the start offset, which ## was reset to the beginning above. end ## tweak source labels according to commandline arguments if necessary m.labels.delete :inbox if opts[:archive] m.labels.delete :unread if opts[:read] m.labels += opts[:extra_labels].to_set_of_symbols(",") ## decide what to do based on message labels and the operation we're performing dothis, new_labels = case when (op == :restore) && restored_state[m.id] if old_m && (old_m.labels != restored_state[m.id]) num_restored += 1 [:update_message_state, restored_state[m.id]] elsif old_m.nil? num_restored += 1 m.labels = restored_state[m.id] :add_message else # labels are the same; don't do anything end when op == :discard if old_m && (old_m.labels != m.labels) [:update_message_state, m.labels] else # labels are the same; don't do anything end else ## duplicate behavior of poll mode: if index_state is non-nil, this is a newer ## version of an older message, so merge in any new labels except :unread and ## :inbox. ## ## TODO: refactor such that this isn't duplicated if old_m m.labels = old_m.labels + (m.labels - [:unread, :inbox]) :update_message else :add_message end end ## now, actually do the operation case dothis when :add_message $stderr.puts "Adding new message #{source}###{m.source_info} with labels #{m.labels}" if opts[:verbose] index.add_message m unless opts[:dry_run] num_added += 1 when :update_message $stderr.puts "Updating message #{source}###{m.source_info}; labels #{old_m.labels} => #{m.labels}; offset #{old_m.source_info} => #{m.source_info}" if opts[:verbose] index.update_message m unless opts[:dry_run] num_updated += 1 when :update_message_state $stderr.puts "Changing flags for #{source}##{m.source_info} from #{m.labels} to #{new_labels}" m.labels = new_labels index.update_message_state m unless opts[:dry_run] num_updated += 1 end if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL last_info_time = Time.now elapsed = last_info_time - start_time pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s end end $stderr.puts "Scanned #{num_scanned}, added #{num_added}, updated #{num_updated} messages from #{source}." $stderr.puts "Restored state on #{num_restored} (#{100.0 * num_restored / num_scanned}%) messages." if num_restored > 0 end ## delete any messages in the index that claim they're from one of ## these sources, but that we didn't see. if (target == :all || target == :changed) $stderr.puts "Deleting missing messages from the index..." num_del, num_scanned = 0, 0 sources.each do |source| raise "no source id for #{source}" unless source.id index.each_message :source_id => source.id, :load_spam => true, :load_deleted => true, :load_killed => true do |m| num_scanned += 1 unless seen[m.id] next unless m.source_info >= opts[:start_at] if opts[:start_at] puts "Deleting #{m.id}" if opts[:verbose] index.delete m.id unless opts[:dry_run] num_del += 1 end end end $stderr.puts "Deleted #{num_del} / #{num_scanned} messages" end index.save if opts[:optimize] $stderr.puts "Optimizing index..." optt = time { index.optimize unless opts[:dry_run] } $stderr.puts "Optimized index of size #{index.size} in #{optt}s." end rescue Redwood::FatalSourceError => e $stderr.puts "Sorry, I couldn't communicate with a source: #{e.message}" rescue Exception => e File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace } raise ensure Redwood::finish index.unlock end