puts "loaded index of #{index.size} messages"
sources = ARGV.map do |fn|
+ fn = "mbox://#{fn}" unless fn =~ %r!://!
source = index.source_for fn
unless source
source =
found[m.id] = true
end
m.remove_label :unread if m.status == "RO" unless force_read
- puts "# message at #{offset} labels #{labels.inspect}" unless rebuild || force_rebuild
+ puts "# message at #{offset}, labels: #{labels * ', '}" unless rebuild || force_rebuild
if (rebuild || force_rebuild) &&
(docid, entry = index.load_entry_for_id(m.id)) && entry
if force_rebuild || entry[:source_info].to_i != offset
else
num += 1 if index.add_message m
end
- rescue Redwood::MessageFormatError, Redwood::MBox::Error => e
+ rescue Redwood::MessageFormatError, Redwood::SourceError => e
$stderr.puts "ignoring erroneous message at #{source}##{offset}: #{e.message}"
end
if num % 1000 == 0 && num > 0
require 'zlib'
require 'thread'
require 'fileutils'
+
Thread.abort_on_exception = true # make debugging possible
class Object
- ## this is for debugging purposes because i keep calling nil.id and
- ## i want it to throw an exception
- def id
+ ## this is for debugging purposes because i keep calling #id on the
+ ## wrong object and i want it to throw an exception
+ def id
raise "wrong id called"
end
end
require "sup/util"
require "sup/update"
require "sup/message"
+require "sup/source"
require "sup/mbox"
require "sup/imap"
require "sup/person"
self.class.i_am_the_instance self
end
- def self.source_name; "drafts"; end
+ def self.source_name; "drafts://"; end
def self.source_id; 9999; end
- def new_source; @source = DraftLoader.new @dir; end
+ def new_source; @source = DraftLoader.new; end
def write_draft
offset = @source.gen_offset
end
end
-class DraftLoader
- attr_accessor :dir, :end_offset
- bool_reader :dirty
+class DraftLoader < Source
+ attr_accessor :dir
- def initialize dir, end_offset=0
+ def initialize cur_offset=0
+ dir = Redwood::DRAFT_DIR
Dir.mkdir dir unless File.exists? dir
+ super "draft://#{dir}", cur_offset, true, false
@dir = dir
- @end_offset = end_offset
- @dirty = false
end
- def done?; !File.exists? fn_for_offset(@end_offset); end
- def usual?; true; end
def id; DraftManager.source_id; end
def to_s; DraftManager.source_name; end
- def is_source_for? x; x == DraftManager.source_name; end
+
+ def next
+ ret = nil
+ begin
+ ret = cur_offset
+ self.cur_offset = cur_offset + 1
+ end until File.exists? fn_for_offset(ret)
+ [ret, [:draft]]
+ end
def gen_offset
- i = @end_offset
+ i = cur_offset
while File.exists? fn_for_offset(i)
i += 1
end
ret
end
- def each
- while File.exists?(fn = File.join(@dir, @end_offset.to_s))
- yield @end_offset, [:draft, :inbox]
- @end_offset += 1
- @dirty = true
- end
- end
-
- def total; Dir[File.join(@dir, "*")].sort.last.to_i; end
- def reset!; @end_offset = 0; @dirty = true; end
+ def start_offset; 0; end
+ def end_offset; Dir.new(@dir).entries.sort.last.to_i + 1; end
end
-Redwood::register_yaml(DraftLoader, %w(dir end_offset))
+Redwood::register_yaml(DraftLoader, %w(cur_offset))
end
module Redwood
-class IMAP
- attr_reader :uri
- bool_reader :usual, :archived, :read, :dirty
- attr_accessor :id, :labels
-
- class Error < StandardError; end
-
+class IMAP < Source
+ attr_reader :labels, :broken_msg
+
def initialize uri, username, password, last_uid=nil, usual=true, archived=false, id=nil
- raise "username and password must be specified" unless username && password
+ raise ArgumentError, "username and password must be specified" unless username && password
+
+ super uri, last_uid, usual, archived, id
- @uri_s = uri
- @uri = URI(uri)
+ @parsed_uri = URI(uri)
@username = username
@password = password
- @last_uid = last_uid || 1
- @dirty = false
- @usual = usual
- @archived = archived
- @id = id
@imap = nil
- @labels = [:unread,
- archived ? nil : :inbox,
- mailbox !~ /inbox/i && !mailbox.empty? ? mailbox.intern : nil,
- ].compact
+ @labels = []
+ @labels << mailbox.intern unless mailbox =~ /inbox/i || mailbox.empty?
+ @labels << :inbox unless archived?
+
+ connect
end
def connect
- return if @imap
- Redwood::log "connecting to #{@uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?}"
- #raise "simulated imap failure"
- @imap = Net::IMAP.new @uri.host, ssl? ? 993 : 143, ssl?
- @imap.authenticate('LOGIN', @username, @password)
- Redwood::log "success. selecting #{mailbox.inspect}."
- @imap.examine(mailbox)
+ return false if broken?
+ return true if @imap
+ Redwood::log "connecting to #{@parsed_uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?} ..."
+
+ ## ok, this is FUCKING ANNOYING.
+ ##
+ ## what imap.rb likes to do is, if an exception occurs, catch it
+ ## and re-raise it on the calling thread. seems reasonable. but
+ ## what that REALLY means is that the only way to reasonably
+ ## initialize imap is in its own thread, because otherwise, you
+ ## will never be able to catch the exception it raises on the
+ ## calling thread, and the backtrace will not make any sense at
+ ## all, and you will waste HOURS of your life on this fucking
+ ## problem.
+ ##
+ ## FUCK!!!!!!!!!
+ ::Thread.new do
+ begin
+ #raise Net::IMAP::ByeResponseError, "simulated imap failure"
+ @imap = Net::IMAP.new @parsed_uri.host, ssl? ? 993 : 143, ssl?
+ @imap.authenticate 'LOGIN', @username, @password
+ @imap.examine mailbox
+ Redwood::log "successfully connected to #{@parsed_uri}, mailbox #{mailbox}"
+ rescue Exception => e
+ self.broken = true
+ @imap = nil
+ @broken_msg = e.message.chomp # fucking chomp! fuck!!!
+ Redwood::log "error connecting to IMAP server: #{@broken_msg}"
+ end
+ end.join
+
+ !!@imap
end
private :connect
- def mailbox; @uri.path[1..-1] end ##XXXX TODO handle nil
- def ssl?; @uri.scheme == 'imaps' end
- def reset!; @last_uid = 1; @dirty = true; end
- def == o; o.is_a?(IMAP) && o.uri == uri; end
- def uri; @uri.to_s; end
- def to_s; uri; end
- def is_source_for? s; to_s == s; end
+ def mailbox; @parsed_uri.path[1..-1] end ##XXXX TODO handle nil
+ def ssl?; @parsed_uri.scheme == 'imaps' end
def load_header uid=nil
MBox::read_header StringIO.new(raw_header(uid))
## load the full header text
def raw_header uid
- connect
+ begin
+ connect or return broken_msg
+ rescue Exception => e
+ raise "wtf: #{e.inspect}"
+ end
@imap.uid_fetch(uid, 'RFC822.HEADER')[0].attr['RFC822.HEADER'].gsub(/\r\n/, "\n")
end
def raw_full_message uid
- connect
+ connect or return broken_msg
@imap.uid_fetch(uid, 'RFC822')[0].attr['RFC822'].gsub(/\r\n/, "\n")
end
def each
- connect
- uids = @imap.uid_search ['UID', "#{@last_uid}:#{total}"]
+ connect or return broken_msg
+ uids = @imap.uid_search ['UID', "#{cur_offset}:#{end_offset}"]
uids.each do |uid|
- yield uid, labels
@last_uid = uid
@dirty = true
+ yield uid, labels
end
end
- def done?; @last_uid >= total; end
-
- def total
- connect
+ def start_offset; 1; end
+ def end_offset
+ connect or return start_offset
@imap.uid_search(['ALL']).last
end
end
-Redwood::register_yaml(IMAP, %w(uri_s username password last_uid usual archived id))
+Redwood::register_yaml(IMAP, %w(uri username password offset usual archived id))
end
source = @sources[doc[:source_id].to_i]
#puts "building message #{doc[:message_id]} (#{source}##{doc[:source_info]})"
raise "invalid source #{doc[:source_id]}" unless source
- raise "no snippet" unless doc[:snippet]
-
- begin
- Message.new :source => source, :source_info => doc[:source_info].to_i,
- :labels => doc[:label].split(" ").map { |s| s.intern },
- :snippet => doc[:snippet]
- rescue MessageFormatError => e
- raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}")
-# rescue StandardError => e
-# Message.new_from_index doc, <<EOS
-# An error occurred while loading this message. It is possible that the source
-# has changed, or (in the case of remote sources) is down. The error was:
-# #{e.message}
-# EOS
+
+ m =
+ if source.broken?
+ nil
+ else
+ begin
+ Message.new :source => source, :source_info => doc[:source_info].to_i,
+ :labels => doc[:label].split(" ").map { |s| s.intern },
+ :snippet => doc[:snippet]
+ rescue MessageFormatError => e
+ raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}")
+ rescue SourceError => e
+ nil
+ end
+ end
+
+ unless m
+ fake_header = {
+ "date" => Time.at(doc[:date].to_i),
+ "subject" => unwrap_subj(doc[:subject]),
+ "from" => doc[:from],
+ "to" => doc[:to],
+ "message-id" => doc[:message_id],
+ "references" => doc[:refs],
+ }
+
+ m = Message.new :labels => doc[:label].split(" ").map { |s| s.intern },
+ :snippet => doc[:snippet], :header => fake_header,
+ :body => <<EOS
+#{doc[:snippet]}...
+
+An error occurred while loading this message. It is possible that the source
+has changed, or (in the case of remote sources) is down.
+
+The error message was:
+ #{source.broken_msg}
+EOS
end
+ m
end
def fresh_thread_id; @next_thread_id += 1; end
def wrap_subj subj; "__START_SUBJECT__ #{subj} __END_SUBJECT__"; end
+ def unwrap_subj subj; subj =~ /__START_SUBJECT__ (.*?) __END_SUBJECT__/ && $1; end
def add_message m
return false if contains? m
module Redwood
module MBox
-class Error < StandardError; end
-
-class Loader
- attr_reader :filename
- bool_reader :usual, :archived, :read, :dirty
- attr_accessor :id, :labels
-
- ## end_offset is the last offsets within the file which we've read.
- ## everything after that is considered new messages that haven't
- ## been indexed.
- def initialize filename, end_offset=nil, usual=true, archived=false, id=nil
- @filename = filename.gsub(%r(^mbox://), "")
- @end_offset = end_offset || 0
- @dirty = false
- @usual = usual
- @archived = archived
- @id = id
+class Loader < Source
+ attr_reader :labels
+
+ def initialize uri, start_offset=nil, usual=true, archived=false, id=nil
+ raise ArgumentError, "not an mbox uri" unless uri =~ %r!mbox://!
+ super
+
@mutex = Mutex.new
+ @filename = uri.sub(%r!^mbox://!, "")
@f = File.open @filename
- @labels = ([
- :unread,
- archived ? nil : :inbox,
- ] +
- if File.dirname(filename) =~ /\b(var|usr|spool)\b/
- []
- else
- [File.basename(filename).intern]
- end).compact
- end
-
- def seek_to! offset
- @end_offset = [offset, File.size(@f) - 1].min;
- @dirty = true
+ ## heuristic: use the filename as a label, unless the file
+ ## has a path that probably represents an inbox.
+ @labels = []
+ @labels << File.basename(@filename).intern unless File.dirname(@filename) =~ /\b(var|usr|spool)\b/
end
- def reset!; seek_to! 0; end
- def == o; o.is_a?(Loader) && o.filename == filename; end
- def to_s; "mbox://#{@filename}"; end
- def is_source_for? s
- @filename == s || self.to_s == s
- end
+ def start_offset; 0; end
+ def end_offset; File.size @f; end
def load_header offset
- raise ArgumentError, "nil offset" unless offset
header = nil
@mutex.synchronize do
- @f.seek offset if offset
+ @f.seek offset
l = @f.gets
- raise Error, "offset mismatch in mbox file offset #{offset.inspect}: #{l.inspect}. Run 'sup-import --rebuild #{to_s}' to correct this." unless l =~ BREAK_RE
+ raise SourceError, "offset mismatch in mbox file offset #{offset.inspect}: #{l.inspect}. Run 'sup-import --rebuild #{to_s}' to correct this." unless l =~ BREAK_RE
header = MBox::read_header @f
end
header
end
def load_message offset
- ret = nil
@mutex.synchronize do
@f.seek offset
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- return RMail::Parser.read(input)
+ begin
+ RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
+ return RMail::Parser.read(input)
+ end
+ rescue RMail::Parser::Error => e
+ raise SourceError, "error parsing message with rmail: #{e.message}"
end
end
end
end
def next
- return nil if done?
- @dirty = true
- start_offset = nil
- next_end_offset = @end_offset
-
- ## @end_offset could be at one of two places here: before a \n and
- ## a mbox separator, if it was previously at EOF and a new message
- ## was added; or, at the beginning of an mbox separator (in all
- ## other cases).
+ returned_offset = nil
+ next_offset = cur_offset
+
@mutex.synchronize do
- @f.seek @end_offset
- l = @f.gets or return nil
- if l =~ /^\s*$/
- start_offset = @f.tell
- @f.gets
- else
- start_offset = @end_offset
+ @f.seek cur_offset
+
+ ## cur_offset could be at one of two places here:
+
+ ## 1. before a \n and a mbox separator, if it was previously at
+ ## EOF and a new message was added; or,
+ ## 2. at the beginning of an mbox separator (in all other
+ ## cases).
+
+ l = @f.gets or raise "next while at EOF"
+ if l =~ /^\s*$/ # case 1
+ returned_offset = @f.tell
+ @f.gets # now we're at a BREAK_RE, so skip past it
+ else # case 2
+ returned_offset = cur_offset
+ ## we've already skipped past the BREAK_RE, to just go
end
while(line = @f.gets)
break if line =~ BREAK_RE
- next_end_offset = @f.tell
+ next_offset = @f.tell
end
end
- @end_offset = next_end_offset
- start_offset
+ self.cur_offset = next_offset
+ [returned_offset, labels]
end
-
- def each
- until @end_offset >= File.size(@f)
- n = self.next
- yield(n, labels) if n
- end
- end
-
- def done?; @end_offset >= File.size(@f); end
- def total; File.size @f; end
end
-Redwood::register_yaml(Loader, %w(filename end_offset usual archived id))
+Redwood::register_yaml(Loader, %w(uri cur_offset usual archived id))
end
end
## if index_entry is specified, will fill in values from that,
def initialize opts
- @source = opts[:source]
- @source_info = opts[:source_info]
+ if opts[:source]
+ @source = opts[:source]
+ @source_info = opts[:source_info] or raise ArgumentError, ":source but no :source_info"
+ @body = nil
+ else
+ @source = @source_info = nil
+ @body = opts[:body] or raise ArgumentError, "one of :body or :source must be specified"
+ end
@snippet = opts[:snippet] || ""
@labels = opts[:labels] || []
@dirty = false
end
begin
- @date = Time.parse header["date"]
+ date = header["date"]
+ @date = (Time === date ? date : Time.parse(header["date"]))
rescue ArgumentError => e
raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}"
end
end
def to_chunks
- m = @source.load_message @source_info
- message_to_chunks m
+ if @body
+ [Text.new(@body.split("\n"))]
+ else
+ message_to_chunks @source.load_message(@source_info)
+ end
end
def raw_header
self.class.i_am_the_instance self
end
- def self.source_name; "sent"; end
+ def self.source_name; "sent://"; end
def self.source_id; 9998; end
- def new_source; @source = SentLoader.new @fn; end
+ def new_source; @source = SentLoader.new; end
def write_sent_message date, from_email
need_blank = File.exists?(@fn) && !File.zero?(@fn)
end
class SentLoader < MBox::Loader
- def initialize filename, end_offset=0
+ def initialize cur_offset=0
+ filename = Redwood::SENT_FN
File.open(filename, "w") { } unless File.exists? filename
- super filename, end_offset, true, true
+ super "mbox://" + filename, cur_offset, true, true
end
- def id; SentManager.source_id; end
def to_s; SentManager.source_name; end
-
+ def id; SentManager.source_id; end
def labels; [:sent, :inbox]; end
end
-Redwood::register_yaml(SentLoader, %w(filename end_offset))
+Redwood::register_yaml(SentLoader, %w(cur_offset))
end
--- /dev/null
+module Redwood
+
+class SourceError < StandardError; end
+
+class Source
+ ## dirty? described whether cur_offset has changed, which means the
+ ## source needs to be re-saved to disk.
+ ##
+ ## broken? means no message can be loaded (e.g. IMAP server is
+ ## down), so don't even bother.
+ bool_reader :usual, :archived, :dirty, :broken
+ attr_reader :cur_offset
+ attr_accessor :id
+
+ ## You should implement:
+ ##
+ ## start_offset
+ ## end_offset
+ ## load_header(offset)
+ ## load_message(offset)
+ ## raw_header(offset)
+ ## raw_full_message(offset)
+ ## next
+
+ def initialize uri, initial_offset=nil, usual=true, archived=false, id=nil
+ @uri = uri
+ @cur_offset = initial_offset || start_offset
+ @usual = usual
+ @archived = archived
+ @id = id
+ @dirty = false
+ @broken = false
+ end
+
+ def to_s; @uri; end
+ def seek_to! o; self.cur_offset = o; end
+ def reset!; seek_to! start_offset; end
+ def == o; o.to_s == to_s; end
+ def done?; cur_offset >= end_offset; end
+ def is_source_for? s; to_s == s; end
+
+ def each
+ until done?
+ n, labels = self.next
+ raise "no message" unless n
+ labels += [:inbox] unless archived?
+ yield n, labels
+ end
+ end
+
+protected
+
+ def cur_offset= o
+ @cur_offset = o
+ @dirty = true
+ end
+
+ attr_writer :broken
+end
+
+Redwood::register_yaml(Source, %w(uri cur_offset usual archived id))
+
+end