X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=bin%2Fsup-sync;h=44ff3b20a873f01dc33fde8c7b6345521cc84fb9;hb=dcd7a7e4c33778d38dd8b1c4059d4b6c99add3fe;hp=371c6957cbb4a0ae8577699a465855de0a386e38;hpb=2c0bc997ecf7faa8aad3aa740624aa50473dc2c4;p=sup diff --git a/bin/sup-sync b/bin/sup-sync old mode 100644 new mode 100755 index 371c695..44ff3b2 --- a/bin/sup-sync +++ b/bin/sup-sync @@ -5,8 +5,13 @@ require 'rubygems' require 'trollop' require "sup" +PROGRESS_UPDATE_INTERVAL = 15 # seconds + class Float def to_s; sprintf '%.2f', self; end + def to_time_s + infinite? ? "unknown" : super + end end class Numeric @@ -52,7 +57,7 @@ EOS opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source. (In the case of mbox sources, this includes all messages AFTER an altered message.)" opt :restored, "Operate only on those messages included in a dump file as specified by --restore which have changed state." opt :all, "Operate on all messages in the source, regardless of newness or changedness." - opt :start_at, "For --changed and --all, start at a particular offset.", :type => :int + opt :start_at, "For --changed, --restored and --all, start at a particular offset.", :type => :int text < true do |m_old, m, offset| num_scanned += 1 seen[m.id] = true + if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL + last_info_time = Time.now + elapsed = last_info_time - start_time + start = opts[:start_at] || source.start_offset + pctdone = 100.0 * (source.cur_offset - start).to_f / (source.end_offset - start).to_f + remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) + $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s + end + ## skip if we're operating only on changed messages, the message ## is in the index, and it's unchanged from what the source is ## reporting. - next if target == :changed && entry && entry[:source_id].to_i == source.id && entry[:source_info].to_i == offset + next if target == :changed && m_old && m_old.source.id == source.id && m_old.source_info == offset ## get the state currently in the index - index_state = - if entry - entry[:label].split(/\s+/).map { |x| x.intern } - else - nil - end + index_state = m_old.labels.dup if m_old ## skip if we're operating on restored messages, and this one ## ain't. - next if target == :restored && (!restored_state[m.id] || restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s }) + next if target == :restored && (!restored_state[m.id] || (index_state && restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s })) ## m.labels is the default source labels. tweak these according ## to default source state modification flags. m.labels -= [:inbox] if opts[:archive] m.labels -= [:unread] if opts[:read] - m.labels += opts[:extra_labels].split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels] + m.labels += opts[:extra_labels].strip.split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels] ## assign message labels based on the operation we're performing case op when :asis - m.labels = index_state if index_state + m.labels = ((m.labels - [:unread, :inbox]) + index_state).uniq if index_state when :restore ## if the entry exists on disk if restored_state[m.id] @@ -173,19 +184,19 @@ begin ## nothin! use default source labels end - if Time.now - last_info_time > 60 + if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL last_info_time = Time.now elapsed = last_info_time - start_time pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) - $stderr.puts "## #{num_added + num_updated} (#{pctdone}%) read; #{elapsed.to_time_s} elapsed; #{remaining.to_time_s} remaining" + $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s end if index_state.nil? - puts "Adding message #{source}##{offset} with state {#{m.labels * ', '}}" if opts[:verbose] + puts "Adding message #{source}##{offset} from #{m.from} with state {#{m.labels * ', '}}" if opts[:verbose] num_added += 1 else - puts "Updating message #{source}##{offset}, source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose] + puts "Updating message #{source}##{offset}, source #{m_old.source.id} => #{source.id}, offset #{m_old.source_info} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose] num_updated += 1 end @@ -197,26 +208,17 @@ begin ## delete any messages in the index that claim they're from one of ## these sources, but that we didn't see. - ## - ## kinda crappy code here, because we delve directly into the Ferret - ## API. - ## - ## TODO: move this to Index, i suppose. - - - if target == :all || target == :changed + if (target == :all || target == :changed) $stderr.puts "Deleting missing messages from the index..." num_del, num_scanned = 0, 0 sources.each do |source| raise "no source id for #{source}" unless source.id - q = "+source_id:#{source.id}" - q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at] - index.index.search_each(q, :limit => :all) do |docid, score| + index.each_message :source_id => source.id, :load_spam => true, :load_deleted => true, :load_killed => true do |m| num_scanned += 1 - mid = index.index[docid][:message_id] - unless seen[mid] - puts "Deleting #{mid}" if opts[:verbose] - index.index.delete docid unless opts[:dry_run] + unless seen[m.id] + next unless m.source_info >= opts[:start_at] if opts[:start_at] + puts "Deleting #{m.id}" if opts[:verbose] + index.delete m.id unless opts[:dry_run] num_del += 1 end end @@ -224,9 +226,11 @@ begin $stderr.puts "Deleted #{num_del} / #{num_scanned} messages" end + index.save + if opts[:optimize] $stderr.puts "Optimizing index..." - optt = time { index.index.optimize unless opts[:dry_run] } + optt = time { index.optimize unless opts[:dry_run] } $stderr.puts "Optimized index of size #{index.size} in #{optt}s." end rescue Redwood::FatalSourceError => e @@ -235,7 +239,6 @@ rescue Exception => e File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace } raise ensure - index.save Redwood::finish index.unlock end