8 PROGRESS_UPDATE_INTERVAL = 15 # seconds
11 def to_s; sprintf '%.2f', self; end
13 infinite? ? "unknown" : super
20 sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60
30 opts = Trollop::options do
31 version "sup-sync (sup #{Redwood::VERSION})"
33 Synchronizes the Sup index with one or more message sources by adding
34 messages, deleting messages, or changing message state in the index as
37 "Message state" means read/unread, archived/inbox, starred/unstarred,
38 and all user-defined labels on each message.
40 "Default source state" refers to any state that a source itself has
41 keeps about a message. Sup-sync uses this information when adding a
42 new message to the index. The source state is typically limited to
43 read/unread, archived/inbox status and a single label based on the
44 source name. Messages using the default source state are placed in
45 the inbox (i.e. not archived) and unstarred.
48 sup-sync [options] <source>*
50 where <source>* is zero or more source URIs. If no sources are given,
51 sync from all usual sources. Supported source URI schemes can be seen
52 by running "sup-add --help".
54 Options controlling WHICH messages sup-sync operates on:
56 opt :new, "Operate on new messages only. Don't scan over the entire source. (Default.)", :short => :none
57 opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source. (In the case of mbox sources, this includes all messages AFTER an altered message.)"
58 opt :restored, "Operate only on those messages included in a dump file as specified by --restore which have changed state."
59 opt :all, "Operate on all messages in the source, regardless of newness or changedness."
60 opt :start_at, "For --changed and --all, start at a particular offset.", :type => :int
64 Options controlling HOW message state is altered:
66 opt :asis, "If the message is already in the index, preserve its state. Otherwise, use default source state. (Default.)", :short => :none
67 opt :restore, "Restore message state from a dump file created with sup-dump. If a message is not in this dumpfile, act as --asis.", :type => String, :short => :none
68 opt :discard, "Discard any message state in the index and use the default source state. Dangerous!", :short => :none
69 opt :archive, "When using the default source state, mark messages as archived.", :short => "-x"
70 opt :read, "When using the default source state, mark messages as read."
71 opt :extra_labels, "When using the default source state, also apply these user-defined labels. Should be a comma-separated list.", :type => String, :short => :none
77 opt :verbose, "Print message ids as they're processed."
78 opt :optimize, "As the final operation, optimize the index."
79 opt :all_sources, "Scan over all sources.", :short => :none
80 opt :dry_run, "Don't actually modify the index. Probably only useful with --verbose.", :short => "-n"
81 opt :version, "Show version information", :short => :none
83 conflicts :changed, :all, :new, :restored
84 conflicts :asis, :restore, :discard
86 Trollop::die :restored, "requires --restore" if opts[:restored] unless opts[:restore]
88 Trollop::die :start_at, "must be non-negative" if opts[:start_at] < 0
89 Trollop::die :start_at, "requires either --changed or --all" unless opts[:changed] || opts[:all]
92 target = [:new, :changed, :all, :restored].find { |x| opts[x] } || :new
93 op = [:asis, :restore, :discard].find { |x| opts[x] } || :asis
96 index = Redwood::Index.new
101 $stderr.puts "Loading state dump from #{opts[:restore]}..."
102 IO.foreach opts[:restore] do |l|
103 l =~ /^(\S+) \((.*?)\)$/ or raise "Can't read dump line: #{l.inspect}"
105 dump[mid] = labels.split(" ").map { |x| x.intern }
107 $stderr.puts "Read #{dump.size} entries from dump file."
118 sources = ARGV.map do |uri|
119 index.source_for uri or Trollop::die "Unknown source: #{uri}. Did you add it with sup-add first?"
122 sources = index.usual_sources if sources.empty?
123 sources = index.sources if opts[:all_sources]
125 unless target == :new
127 sources.each { |s| s.seek_to! opts[:start_at] }
129 sources.each { |s| s.reset! }
133 sources.each do |source|
134 $stderr.puts "Scanning #{source}..."
135 num_added = num_updated = num_scanned = num_restored = 0
136 last_info_time = start_time = Time.now
138 Redwood::PollManager.add_messages_from source, :force_overwrite => true do |m, offset, entry|
142 ## skip if we're operating only on changed messages, the message
143 ## is in the index, and it's unchanged from what the source is
145 next if target == :changed && entry && entry[:source_id].to_i == source.id && entry[:source_info].to_i == offset
147 ## get the state currently in the index
148 index_state = entry[:label].split(/\s+/).map { |x| x.intern } if entry
150 ## skip if we're operating on restored messages, and this one
152 next if target == :restored && (!restored_state[m.id] || (index_state && restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s }))
154 ## m.labels is the default source labels. tweak these according
155 ## to default source state modification flags.
156 m.labels -= [:inbox] if opts[:archive]
157 m.labels -= [:unread] if opts[:read]
158 m.labels += opts[:extra_labels].split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels]
160 ## assign message labels based on the operation we're performing
163 m.labels = ((m.labels - [:unread, :inbox]) + index_state).uniq if index_state
165 ## if the entry exists on disk
166 if restored_state[m.id]
167 m.labels = restored_state[m.id]
170 m.labels = index_state
173 ## nothin! use default source labels
176 if Time.now - last_info_time > PROGRESS_UPDATE_INTERVAL
177 last_info_time = Time.now
178 elapsed = last_info_time - start_time
179 pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f
180 remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
181 $stderr.printf "## read %dm (about %.0f%%) @ %.1fm/s. %s elapsed, about %s remaining\n", num_scanned, pctdone, num_scanned / elapsed, elapsed.to_time_s, remaining.to_time_s
185 puts "Adding message #{source}##{offset} with state {#{m.labels * ', '}}" if opts[:verbose]
188 puts "Updating message #{source}##{offset}, source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, state {#{index_state * ', '}} => {#{m.labels * ', '}}" if opts[:verbose]
192 opts[:dry_run] ? nil : m
194 $stderr.puts "Scanned #{num_scanned}, added #{num_added}, updated #{num_updated} messages from #{source}."
195 $stderr.puts "Restored state on #{num_restored} (#{100.0 * num_restored / num_scanned}%) messages." if num_restored > 0
198 ## delete any messages in the index that claim they're from one of
199 ## these sources, but that we didn't see.
201 ## kinda crappy code here, because we delve directly into the Ferret
204 ## TODO: move this to Index, i suppose.
205 if target == :all || target == :changed
206 $stderr.puts "Deleting missing messages from the index..."
207 num_del, num_scanned = 0, 0
208 sources.each do |source|
209 raise "no source id for #{source}" unless source.id
210 q = "+source_id:#{source.id}"
211 q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at]
212 index.index.search_each(q, :limit => :all) do |docid, score|
214 mid = index.index[docid][:message_id]
216 puts "Deleting #{mid}" if opts[:verbose]
217 index.index.delete docid unless opts[:dry_run]
222 $stderr.puts "Deleted #{num_del} / #{num_scanned} messages"
228 $stderr.puts "Optimizing index..."
229 optt = time { index.index.optimize unless opts[:dry_run] }
230 $stderr.puts "Optimized index of size #{index.size} in #{optt}s."
232 rescue Redwood::FatalSourceError => e
233 $stderr.puts "Sorry, I couldn't communicate with a source: #{e.message}"
234 rescue Exception => e
235 File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace }