require 'rmail'
require 'uri'
+require 'set'
module Redwood
module MBox
class Loader < Source
+ include SerializeLabelsNicely
yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels
+ attr_reader :labels
+
## uri_or_fp is horrific. need to refactor.
- def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[]
+ def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=nil
@mutex = Mutex.new
- @labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze
+ @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS)
case uri_or_fp
when String
if File.dirname(path) =~ /\b(var|usr|spool)\b/
[]
else
- [File.basename(path).intern]
+ [File.basename(path).downcase.intern]
end
end
raise OutOfSyncSourceError, "mbox file is smaller than last recorded message offset. Messages have probably been deleted by another client."
end
end
-
+
def start_offset; 0; end
def end_offset; File.size @f; end
@mutex.synchronize do
@f.seek offset
l = @f.gets
- unless l =~ BREAK_RE
+ unless MBox::is_break_line? l
raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
end
- header = MBox::read_header @f
+ header = parse_raw_email_header @f
end
header
end
@mutex.synchronize do
@f.seek offset
begin
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- return RMail::Parser.read(input)
- end
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ l = @f.gets
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ RMail::Parser.read string
rescue RMail::Parser::Error => e
raise FatalSourceError, "error parsing mbox file: #{e.message}"
end
end
end
+ ## scan forward until we're at the valid start of a message
+ def correct_offset!
+ @mutex.synchronize do
+ @f.seek cur_offset
+ string = ""
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ string << l
+ end
+ self.cur_offset += string.length
+ end
+ end
+
def raw_header offset
ret = ""
@mutex.synchronize do
@f.seek offset
- until @f.eof? || (l = @f.gets) =~ /^$/
- ret += l
+ until @f.eof? || (l = @f.gets) =~ /^\r*$/
+ ret << l
end
end
ret
def raw_message offset
ret = ""
- each_raw_message_line(offset) { |l| ret += l }
+ each_raw_message_line(offset) { |l| ret << l }
ret
end
+ def store_message date, from_email, &block
+ need_blank = File.exists?(@filename) && !File.zero?(@filename)
+ File.open(@filename, "a") do |f|
+ f.puts if need_blank
+ f.puts "From #{from_email} #{date.utc}"
+ yield f
+ end
+ end
+
## apparently it's a million times faster to call this directly if
## we're just moving messages around on disk, than reading things
## into memory with raw_message.
@mutex.synchronize do
@f.seek offset
yield @f.gets
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
yield l
end
end
## 2. at the beginning of an mbox separator (in all other
## cases).
- l = @f.gets or raise "next while at EOF"
+ l = @f.gets or return nil
if l =~ /^\s*$/ # case 1
returned_offset = @f.tell
@f.gets # now we're at a BREAK_RE, so skip past it
end
while(line = @f.gets)
- break if line =~ BREAK_RE
+ break if MBox::is_break_line? line
next_offset = @f.tell
end
end
end
self.cur_offset = next_offset
- [returned_offset, (@labels + [:unread]).uniq]
+ [returned_offset, (labels + [:unread])]
end
end