X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=bin%2Fsup-sync;h=2aa00c3720bcd0dbbce60499e5d14a70e01ae418;hb=ef1d8a9333f8d261deeb88a7c1cb0cc58d7b6563;hp=14d72f42d0b28278c2d8529a8a196908821e2af5;hpb=51e14a0a30b37f08b230a377d17bcf42f729b0b2;p=sup diff --git a/bin/sup-sync b/bin/sup-sync old mode 100644 new mode 100755 index 14d72f4..2aa00c3 --- a/bin/sup-sync +++ b/bin/sup-sync @@ -5,8 +5,11 @@ require 'rubygems' require 'trollop' require "sup" +PROGRESS_UPDATE_INTERVAL = 15 # seconds + class Float def to_s; sprintf '%.2f', self; end + def to_time_s; infinite? ? "unknown" : super end end class Numeric @@ -16,6 +19,10 @@ class Numeric end end +class Set + def to_s; to_a * ',' end +end + def time startt = Time.now yield @@ -43,16 +50,16 @@ Usage: sup-sync [options] * where * is zero or more source URIs. If no sources are given, -sync from all usual sources. All supported source URI schemes can -be seen by running "sup-add --help". +sync from all usual sources. Supported source URI schemes can be seen +by running "sup-add --help". Options controlling WHICH messages sup-sync operates on: EOS opt :new, "Operate on new messages only. Don't scan over the entire source. (Default.)", :short => :none - opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source. (In the case of mbox sources, this includes all messages AFTER an altered message.)" + opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source." opt :restored, "Operate only on those messages included in a dump file as specified by --restore which have changed state." opt :all, "Operate on all messages in the source, regardless of newness or changedness." - opt :start_at, "For --changed and --all, start at a particular offset.", :type => :int + opt :start_at, "For --changed, --restored and --all, start at a particular offset.", :type => :int text < :none opt :archive, "When using the default source state, mark messages as archived.", :short => "-x" opt :read, "When using the default source state, mark messages as read." - opt :extra_labels, "When using the default source state, also apply these user-defined labels. Should be a comma-separated list.", :type => String, :short => :none + opt :extra_labels, "When using the default source state, also apply these user-defined labels (a comma-separated list)", :default => "", :short => :none text < 60 + ## now, actually do the operation + case dothis + when :add_message + $stderr.puts "Adding new message #{source}###{m.source_info} with labels #{m.labels}" if opts[:verbose] + index.add_message m unless opts[:dry_run] + num_added += 1 + when :update_message + $stderr.puts "Updating message #{source}###{m.source_info}; labels #{old_m.labels} => #{m.labels}; offset #{old_m.source_info} => #{m.source_info}" if opts[:verbose] + index.update_message m unless opts[:dry_run] + num_updated += 1 + when :update_message_state + $stderr.puts "Changing flags for #{source}##{m.source_info} from #{m.labels} to #{new_labels}" + m.labels = new_labels + index.update_message_state m unless opts[:dry_run] + num_updated += 1 + end + + if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL last_info_time = Time.now elapsed = last_info_time - start_time pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) - $stderr.puts "## #{num_added + num_updated} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining (for this source)" + $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s end - - if index_state.nil? - puts "Adding message #{source}##{offset} with state {#{m.labels * ', '}}" if opts[:verbose] - num_added += 1 - else - puts "Updating message #{source}##{offset}, source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose] - num_updated += 1 - end - - opts[:dry_run] ? nil : m end + $stderr.puts "Scanned #{num_scanned}, added #{num_added}, updated #{num_updated} messages from #{source}." $stderr.puts "Restored state on #{num_restored} (#{100.0 * num_restored / num_scanned}%) messages." if num_restored > 0 end -rescue Exception => e - File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace } - raise -ensure - index.save - Redwood::finish -end -## delete any messages in the index that claim they're from one of -## these sources, but that we didn't see. -## -## kinda crappy code here, because we delve directly into the Ferret -## API. -## -## TODO: move this to Index, i suppose. -if target == :all || target == :changed - $stderr.puts "Deleting missing messages from the index..." - num_del, num_scanned = 0, 0 - sources.each do |source| - raise "no source id for #{source}" unless source.id - q = "+source_id:#{source.id}" - q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at] - index.index.search_each(q, :limit => :all) do |docid, score| - num_scanned += 1 - mid = index.index[docid][:message_id] - unless seen[mid] - puts "Deleting #{mid}" if opts[:verbose] - index.index.delete docid unless opts[:dry_run] - num_del += 1 + ## delete any messages in the index that claim they're from one of + ## these sources, but that we didn't see. + if (target == :all || target == :changed) + $stderr.puts "Deleting missing messages from the index..." + num_del, num_scanned = 0, 0 + sources.each do |source| + raise "no source id for #{source}" unless source.id + index.each_message :source_id => source.id, :load_spam => true, :load_deleted => true, :load_killed => true do |m| + num_scanned += 1 + unless seen[m.id] + next unless m.source_info >= opts[:start_at] if opts[:start_at] + puts "Deleting #{m.id}" if opts[:verbose] + index.delete m.id unless opts[:dry_run] + num_del += 1 + end end end + $stderr.puts "Deleted #{num_del} / #{num_scanned} messages" end - $stderr.puts "Deleted #{num_del} / #{num_scanned} messages" -end -if opts[:optimize] - $stderr.puts "Optimizing index..." - optt = time { index.index.optimize unless opts[:dry_run] } - $stderr.puts "Optimized index of size #{index.size} in #{optt}s." + index.save + + if opts[:optimize] + $stderr.puts "Optimizing index..." + optt = time { index.optimize unless opts[:dry_run] } + $stderr.puts "Optimized index of size #{index.size} in #{optt}s." + end +rescue Redwood::FatalSourceError => e + $stderr.puts "Sorry, I couldn't communicate with a source: #{e.message}" +rescue Exception => e + File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace } + raise +ensure + Redwood::finish + index.unlock end