require 'highline'
require "sup"
+Thread.abort_on_exception = true # make debugging possible
+
class Float
def to_s; sprintf '%.2f', self; end
end
else
found[m.id] = true
end
+
m.remove_label :unread if m.status == "RO" unless force_read
- puts "# message at #{offset}, labels: #{labels * ', '}" unless rebuild || force_rebuild
+ puts "# message at #{offset}, labels: #{labels * ', '}"
if (rebuild || force_rebuild) &&
(docid, entry = index.load_entry_for_id(m.id)) && entry
if force_rebuild || entry[:source_info].to_i != offset
puts "loaded #{num} messages" unless num == 0
end
ensure
+ $stderr.puts "saving index and sources..."
index.save
Redwood::finish
end
require 'net/imap'
require 'stringio'
+## fucking imap fucking sucks. what the FUCK kind of committee of
+## dunces designed this shit.
+
+## you see, imap touts 'unique ids' for messages, which are to be used
+## for cross-session identification. great, just what sup needs! only,
+## it turns out the uids can be invalidated every time some arbitrary
+## 'uidvalidity' value changes on the server, and 'uidvalidity' has no
+## restrictions. it can change any time you log in. it can change
+## EVERY time you log in. of course the imap spec "strongly
+## recommends" that it never change, but there's nothing to stop
+## people from just setting it to the current time, and in fact that's
+## exactly what the one imap server i have at my disposal does. thus
+## the so-called uids are absolutely useless and imap provides no
+## cross-session way of uniquely identifying a message. but thanks for
+## the "strong recommendation", guys!
+
+## right now i'm using the 'internal date' and the size of each
+## message to uniquely identify it, and i have to scan over the entire
+## mailbox each time i open it to map those things to message ids, and
+## we'll just hope that there are no collisions. ho ho! that's a
+## perfectly reasonable solution!
+
+## fuck you imap committee. you managed to design something as shitty
+## as mbox but goddamn THIRTY YEARS LATER.
+
module Redwood
class IMAP < Source
attr_reader_cloned :labels
- def initialize uri, username, password, uid_validity=nil, last_uid=nil, usual=true, archived=false, id=nil
+ def initialize uri, username, password, last_idate=nil, usual=true, archived=false, id=nil
raise ArgumentError, "username and password must be specified" unless username && password
raise ArgumentError, "not an imap uri" unless uri =~ %r!imaps?://!
- super uri, last_uid, usual, archived, id
+ super uri, last_idate, usual, archived, id
@parsed_uri = URI(uri)
@username = username
@password = password
- @uid_validity = uid_validity
@imap = nil
+ @imap_ids = {}
+ @ids = []
@labels = [:unread]
@labels << :inbox unless archived?
@labels << mailbox.intern unless mailbox =~ /inbox/i || mailbox.nil?
def connect
return false if broken?
return true if @imap
- Redwood::log "connecting to #{@parsed_uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?} ..."
## ok, this is FUCKING ANNOYING.
##
##
## FUCK!!!!!!!!!
- BufferManager.say "Connecting to IMAP server #{host}..." do
- ::Thread.new do
- begin
- #raise Net::IMAP::ByeResponseError, "simulated imap failure"
- @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl?
- @imap.authenticate 'LOGIN', @username, @password
- @imap.examine mailbox
- Redwood::log "successfully connected to #{@parsed_uri}, mailbox #{mailbox}"
- @uid_validity ||= @imap.responses["UIDVALIDITY"][-1]
- raise SourceError, "Your shitty IMAP server has taken advantage of the shitty IMAP spec and invalidated all supposedly 'unique' ids for the folder '#{mailbox}'. You will have to rescan this folder manually by running sup-import --rebuild #{self}" if @imap.responses["UIDVALIDITY"][-1] != @uid_validity
- rescue Net::IMAP::Error, SourceError => e
- self.broken_msg = e.message.chomp # fucking chomp! fuck!!!
- @imap = nil
- Redwood::log "error connecting to IMAP server: #{self.broken_msg}"
+ Redwood::log "connecting to #{@parsed_uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?} ..."
+ sid = BufferManager.say "Connecting to IMAP server #{host}..." if BufferManager.instantiated?
+
+ ::Thread.new do
+ begin
+ #raise Net::IMAP::ByeResponseError, "simulated imap failure"
+ @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl?
+ BufferManager.say "Logging in...", sid if BufferManager.instantiated?
+ @imap.authenticate 'LOGIN', @username, @password
+ BufferManager.say "Sizing mailbox...", sid if BufferManager.instantiated?
+ @imap.examine mailbox
+ last_id = @imap.responses["EXISTS"][-1]
+
+ BufferManager.say "Reading headers (because IMAP sucks)...", sid if BufferManager.instantiated?
+ values = @imap.fetch(1 .. last_id, ['RFC822.SIZE', 'INTERNALDATE'])
+
+ Redwood::log "successfully connected to #{@parsed_uri}"
+
+ values.each do |v|
+ msize, mdate = v.attr['RFC822.SIZE'], Time.parse(v.attr["INTERNALDATE"])
+ id = sprintf("%d.%08d", mdate.to_i, msize)
+ @ids << id
+ @imap_ids[id] = v.seqno
end
- end.join
- end
+ rescue SocketError, Net::IMAP::Error, SourceError => e
+ self.broken_msg = e.message.chomp # fucking chomp! fuck!!!
+ @imap = nil
+ Redwood::log "error connecting to IMAP server: #{self.broken_msg}"
+ ensure
+ BufferManager.clear sid if BufferManager.instantiated?
+ end
+ end.join
!!@imap
end
def mailbox; @parsed_uri.path[1..-1] end ##XXXX TODO handle nil
def ssl?; @parsed_uri.scheme == 'imaps' end
- def load_header uid=nil
- MBox::read_header StringIO.new(raw_header(uid))
+ def load_header id
+ MBox::read_header StringIO.new(raw_header(id))
end
- def load_message uid
- RMail::Parser.read raw_full_message(uid)
+ def load_message id
+ RMail::Parser.read raw_full_message(id)
end
## load the full header text
- def raw_header uid
+ def raw_header id
connect or raise SourceError, broken_msg
- get_imap_field(uid, 'RFC822.HEADER').gsub(/\r\n/, "\n")
+ get_imap_field(id, 'RFC822.HEADER').gsub(/\r\n/, "\n")
end
- def raw_full_message uid
+ def raw_full_message id
connect or raise SourceError, broken_msg
- get_imap_field(uid, 'RFC822').gsub(/\r\n/, "\n")
+ get_imap_field(id, 'RFC822').gsub(/\r\n/, "\n")
end
- def get_imap_field uid, field
+ def get_imap_field id, field
+ imap_id = @imap_ids[id] or raise SourceError, "Unknown message id #{id}. It is likely that messages have been deleted from this IMAP mailbox. Please run sup-import --rebuild #{to_s} in order to correct this problem."
+
f =
begin
- @imap.uid_fetch uid, field
+ @imap.fetch imap_id, field
rescue Net::IMAP::Error => e
raise SourceError, e.message
end
- raise SourceError, "null IMAP field '#{field}' for message with uid #{uid}" if f.nil?
+ raise SourceError, "null IMAP field '#{field}' for message with id #{id} imap id #{imap_id}" if f.nil?
f[0].attr[field]
end
private :get_imap_field
def each
connect or raise SourceError, broken_msg
- uids =
- begin
- @imap.uid_search ['UID', "#{cur_offset}:#{end_offset}"]
- rescue Net::IMAP::Error => e
- raise SourceError, e.message
- end
- uids.each do |uid|
- @last_uid = uid
- @dirty = true
- self.cur_offset = uid
- yield uid, labels
+ start = @ids.index(cur_offset || start_offset)
+ start.upto(@ids.length - 1) do |i|
+ id = @ids[i]
+ self.cur_offset = id
+ yield id, labels
end
end
- def start_offset; 1; end
+ def start_offset
+ connect or raise SourceError, broken_msg
+ @ids.first
+ end
def end_offset
- connect or return start_offset
- begin
- @imap.uid_search(['ALL']).last
- rescue Net::IMAP::Error => e
- raise SourceError, e.message
- end
+ connect or raise SourceError, broken_msg
+ @ids.last
end
end
-Redwood::register_yaml(IMAP, %w(uri username password uid_validity cur_offset usual archived id))
+Redwood::register_yaml(IMAP, %w(uri username password cur_offset usual archived id))
end
if m.source.is_a? Integer
m.source
else
- m.source.id or raise "unregistered source #{m.source}"
+ m.source.id or raise "unregistered source #{m.source} (id #{m.source.id.inspect})"
end
to = (m.to + m.cc + m.bcc).map { |x| x.email }.join(" ")
def parse_user_query_string str; @qparser.parse str; end
def build_query opts
-
query = Ferret::Search::BooleanQuery.new
query.add_query opts[:qobj], :must if opts[:qobj]
labels = ([opts[:label]] + (opts[:labels] || [])).compact
end
end
- attr_writer :f
- protected :f=
-
def start_offset; 0; end
def end_offset; File.size @f; end
def total; end_offset; end
## this is a file-like interface to a file that actually lives on the
## other end of an ssh connection. it works by using wc, head and tail
-## to simulate (buffered) random access. ## on a fast connection,
-## this can have a good bandwidth, but the latency is pretty terrible:
+## to simulate (buffered) random access. on a fast connection, this
+## can have a good bandwidth, but the latency is pretty terrible:
## about 1 second (!) per request. luckily, we're either just reading
## straight through the mbox (an import) or we're reading a few
-## messages at a time (viewing messages)
+## messages at a time (viewing messages) so the latency is not a problem.
-# debugging
+## all of the methods here catch SSHFileErrors, SocketErrors, and
+## Net::SSH::Exceptions and reraise them as SourceErrors. due to this
+## and to the logging, this class is somewhat tied to Sup, but it
+## wouldn't be too difficult to remove those bits and make it more
+## general-purpose.
+
+## debugging TODO: remove me
def debug s
Redwood::log s
end
module_function :debug
+## a simple buffer of contiguous data
class Buffer
def initialize
clear!
def endd; @start + @buf.length; end
def add data, offset=endd
- MBox::debug "+ adding #{data.length} bytes; size will be #{size + data.length}; limit #{SSHFile::MAX_BUF_SIZE}"
+ #MBox::debug "+ adding #{data.length} bytes; size will be #{size + data.length}; limit #{SSHFile::MAX_BUF_SIZE}"
if start.nil?
@buf = data
def to_s; empty? ? "<empty>" : "[#{start}, #{endd})"; end # for debugging
end
+## the file-like interface to a remote file
class SSHFile
MAX_BUF_SIZE = 1024 * 1024 # bytes
MAX_TRANSFER_SIZE = 1024 * 64
- REASONABLE_TRANSFER_SIZE = 1024 * 16
+ REASONABLE_TRANSFER_SIZE = 1024 * 32
SIZE_CHECK_INTERVAL = 60 * 1 # seconds
def initialize host, fn, ssh_opts={}
@fn = fn
@ssh_opts = ssh_opts
@file_size = nil
+ @offset = 0
end
def connect
return if @session
- MBox::debug "starting SSH session to #@host for #@fn..."
- @session = Net::SSH.start @host, @ssh_opts
- MBox::debug "starting SSH shell..."
- @shell = @session.shell.sync
- MBox::debug "SSH is ready"
- raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0
+
+ Redwood::log "starting SSH session to #@host for #@fn..."
+ sid = BufferManager.say "Connecting to SSH host #{@host}..." if BufferManager.instantiated?
+
+ begin
+ @session = Net::SSH.start @host, @ssh_opts
+ MBox::debug "starting SSH shell..."
+ BufferManager.say "Starting SSH shell...", sid if BufferManager.instantiated?
+ @shell = @session.shell.sync
+ MBox::debug "checking for file existence..."
+ raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0
+ MBox::debug "SSH is ready"
+ ensure
+ BufferManager.clear sid if BufferManager.instantiated?
+ end
end
- def eof?; @offset >= size; end
+ def eof?; raise "offset #@offset size #{size}" unless @offset && size; @offset >= size; end
def eof; eof?; end # lame but IO does this and rmail depends on it
- def seek loc; @offset = loc; end
+ def seek loc; raise "nil" unless loc; @offset = loc; end
def tell; @offset; end
def total; size; end
private
def do_remote cmd, expected_size=0
- retries = 0
- connect
- MBox::debug "sending command: #{cmd.inspect}"
begin
- result = @shell.send_command cmd
- raise SSHFileError, "Failure during remote command #{cmd.inspect}: #{result.stderr[0 .. 100]}" unless result.status == 0
- rescue Net::SSH::Exception
- retry if (retries += 1) < 3
- raise
+ retries = 0
+ connect
+ MBox::debug "sending command: #{cmd.inspect}"
+ begin
+ result = @shell.send_command cmd
+ raise SSHFileError, "Failure during remote command #{cmd.inspect}: #{result.stderr[0 .. 100]}" unless result.status == 0
+
+ rescue Net::SSH::Exception # these happen occasionally for no apparent reason. gotta love that nondeterminism!
+ retry if (retries += 1) < 3
+ raise
+ end
+ result.stdout
+ rescue Net::SSH::Exception, SocketError, Errno::ENOENT => e
+ @session = nil
+ Redwood::log "error connecting to SSH server: #{e.message}"
+ raise SourceError, "error connecting to SSH server: #{e.message}"
end
- result.stdout
end
def get_bytes offset, size
- MBox::debug "! request for [#{offset}, #{offset + size}); buf is #@buf"
+ #MBox::debug "! request for [#{offset}, #{offset + size}); buf is #@buf"
raise "wtf: offset #{offset} size #{size}" if size == 0 || offset < 0
do_remote "tail -c +#{offset + 1} #@fn | head -c #{size}", size
end
elsif @buf.start - offset < MAX_TRANSFER_SIZE
[offset, @buf.start - offset]
else
- MBox::debug "clearing buffer because buf.start #{@buf.start} - offset #{offset} >= #{MAX_TRANSFER_SIZE}"
+ MBox::debug "clearing SSH buffer because buf.start #{@buf.start} - offset #{offset} >= #{MAX_TRANSFER_SIZE}"
@buf.clear!
[offset, good_size]
end
elsif offset - @buf.endd < MAX_TRANSFER_SIZE
[@buf.endd, offset - @buf.endd]
else
- MBox::debug "clearing buffer because offset #{offset} - buf.end #{@buf.endd} >= #{MAX_TRANSFER_SIZE}"
+ MBox::debug "clearing SSH buffer because offset #{offset} - buf.end #{@buf.endd} >= #{MAX_TRANSFER_SIZE}"
@buf.clear!
[offset, good_size]
end
module Redwood
module MBox
-class SSHLoader < Loader
+class SSHLoader < Source
+ attr_reader_cloned :labels
+
def initialize uri, username=nil, password=nil, start_offset=nil, usual=true, archived=false, id=nil
- raise ArgumentError, "not an mbox+ssh uri" unless uri =~ %r!^mbox\+ssh://!
+ raise ArgumentError, "not an mbox+ssh uri: #{uri.inspect}" unless uri =~ %r!^mbox\+ssh://!
- super nil, start_offset, usual, archived, id
+ super uri, start_offset, usual, archived, id
@parsed_uri = URI(uri)
@username = username
@password = password
- @f = nil
@uri = uri
opts = {}
opts[:username] = @username if @username
opts[:password] = @password if @password
- begin
- @f = SSHFile.new host, filename, opts
- self.f = @f
- rescue SSHFileError => e
- self.broken_msg = e.message
- end
-
+ @f = SSHFile.new host, filename, opts
+ @loader = Loader.new @f, start_offset, usual, archived, id
+
## heuristic: use the filename as a label, unless the file
## has a path that probably represents an inbox.
+ @labels = [:unread]
+ @labels << :inbox unless archived?
@labels << File.basename(filename).intern unless File.dirname(filename) =~ /\b(var|usr|spool)\b/
end
def host; @parsed_uri.host; end
def filename; @parsed_uri.path[1..-1] end ##XXXX TODO handle nil
+ def next; with(@loader.next) { @cur_offset = @loader.cur_offset }; end # only necessary because YAML is a PITA
def end_offset; @f.size; end
+ def cur_offset= o; @cur_offset = @loader.cur_offset = o; @dirty = true; end
+ def id; @loader.id; end
+ def id= o; @id = @loader.id = o; end
+ def cur_offset; @loader.cur_offset; end
def to_s; @parsed_uri.to_s; end
+
+ defer_all_other_method_calls_to :loader
end
Redwood::register_yaml(SSHLoader, %w(uri username password cur_offset usual archived id))
begin
read_header @source.load_header(@source_info)
message_to_chunks @source.load_message(@source_info)
- rescue SourceError => e
+ rescue SourceError, SocketError => e
[Text.new(error_message(e.message))]
end
end
##
## broken? means no message can be loaded, e.g. IMAP server is
## down, mbox file is corrupt and needs to be rescanned.
- bool_reader :usual, :archived, :dirty
- attr_reader :cur_offset, :broken_msg
- attr_accessor :id
- ## You should implement:
+ ## When writing a new source, you should implement:
##
## start_offset
## end_offset
## load_message(offset)
## raw_header(offset)
## raw_full_message(offset)
- ## next
+ ## next (or each, if you prefer)
+
+ ## you can throw SourceErrors from any of those, but we don't catch
+ ## anything else, so make sure you catch all non-fatal errors and
+ ## reraise them as source errors.
+
+ bool_reader :usual, :archived, :dirty
+ attr_reader :cur_offset, :broken_msg
+ attr_accessor :id
def initialize uri, initial_offset=nil, usual=true, archived=false, id=nil
@uri = uri
- @cur_offset = initial_offset || start_offset
+ @cur_offset = initial_offset
@usual = usual
@archived = archived
@id = id
def broken?; !@broken_msg.nil?; end
def to_s; @uri; end
def seek_to! o; self.cur_offset = o; end
- def reset!; seek_to! start_offset; end
+ def reset!
+ return if broken?
+ begin
+ seek_to! start_offset
+ rescue SourceError
+ end
+ end
def == o; o.to_s == to_s; end
- def done?; cur_offset >= end_offset; end
+ def done?;
+ return true if broken?
+ begin
+ (cur_offset ||= start_offset) >= end_offset
+ rescue SourceError => e
+ true
+ end
+ end
def is_source_for? s; to_s == s; end
def each
- until done?
- n, labels = self.next
- raise "no message" unless n
- yield n, labels
+ begin
+ self.cur_offset ||= start_offset
+ until done? || broken? # just like life!
+ n, labels = self.next
+ raise "no message" unless n
+ yield n, labels
+ end
+ rescue SourceError
+ # just die
end
end
-protected
-
def cur_offset= o
@cur_offset = o
@dirty = true
end
-
- attr_writer :broken_msg
+
+ def broken_msg= m
+ @broken_msg = m
+ Redwood::log "#{to_s}: #{m}"
+ end
end
Redwood::register_yaml(Source, %w(uri cur_offset usual archived id))
def attr_reader_cloned *args
args.each { |sym| class_eval %{ def #{sym}; @#{sym}.clone; end } }
end
+
+ def defer_all_other_method_calls_to obj
+ class_eval %{ def method_missing meth, *a, &b; @#{obj}.send meth, *a, &b; end }
+ end
end
class Object