module Redwood
module MBox
- BREAK_RE = /^From \S+/ ######### TODO REMOVE ME
+ BREAK_RE = /^From \S+ (.+)$/
+
+ def is_break_line? l
+ l =~ BREAK_RE or return false
+ time = $1
+ begin
+ ## hack -- make Time.parse fail when trying to substitute values from Time.now
+ Time.parse time, 0
+ true
+ rescue NoMethodError
+ Redwood::log "found invalid date in potential mbox split line, not splitting: #{l.inspect}"
+ false
+ end
+ end
+ module_function :is_break_line?
end
end
attr_accessor :labels
## uri_or_fp is horrific. need to refactor.
- def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[]
+ def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=[]
@mutex = Mutex.new
@labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze
@mutex.synchronize do
@f.seek offset
l = @f.gets
- unless l =~ BREAK_RE
+ unless MBox::is_break_line? l
raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
end
header = parse_raw_email_header @f
@mutex.synchronize do
@f.seek offset
begin
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- m = RMail::Parser.read(input)
- if m.body && m.body.is_a?(String)
- m.body.gsub!(/^>From /, "From ")
- end
- return m
- end
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ l = @f.gets
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ RMail::Parser.read string
rescue RMail::Parser::Error => e
raise FatalSourceError, "error parsing mbox file: #{e.message}"
end
@mutex.synchronize do
@f.seek offset
yield @f.gets
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
yield l
end
end
## 2. at the beginning of an mbox separator (in all other
## cases).
- l = @f.gets or raise "next while at EOF"
+ l = @f.gets or return nil
if l =~ /^\s*$/ # case 1
returned_offset = @f.tell
@f.gets # now we're at a BREAK_RE, so skip past it
end
while(line = @f.gets)
- break if line =~ BREAK_RE
+ break if MBox::is_break_line? line
next_offset = @f.tell
end
end
## this is called when the message body needs to actually be loaded.
def load_from_source!
@chunks ||=
- if @source.has_errors?
+ if @source.respond_to?(:has_errors?) && @source.has_errors?
[Chunk::Text.new(error_message(@source.error.message).split("\n"))]
else
begin
[notice, sig, children].flatten.compact
end
+ ## takes a RMail::Message, breaks it into Chunk:: classes.
def message_to_chunks m, encrypted=false, sibling_types=[]
if m.multipart?
chunks =
Index.usual_sources.each do |source|
# yield "source #{source} is done? #{source.done?} (cur_offset #{source.cur_offset} >= #{source.end_offset})"
begin
- yield "Loading from #{source}... " unless source.done? || source.has_errors?
+ yield "Loading from #{source}... " unless source.done? || (source.respond_to?(:has_errors?) && source.has_errors?)
rescue SourceError => e
Redwood::log "problem getting messages from #{source}: #{e.message}"
Redwood::report_broken_sources :force_to_top => true
yield f.gets
end
end
-
- # FIXME: this one was not mentioned in the source documentation, but
- # it's still required
- def has_errors?
-
- end
-
end
end
assert_equal "Bob <bob@bob.com>", h["from"]
assert_nil h["to"]
end
+
+ def test_from_line_splitting
+ l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From sea to shining sea
+
+From bob@bob.com I get only spam.
+
+From bob@bob.com
+
+From bob@bob.com
+
+(that second one has spaces at the endj
+
+This is the end of the email.
+EOS
+ offset, labels = l.next
+ assert_equal 0, offset
+ offset, labels = l.next
+ assert_nil offset
+ end
+
+ def test_more_from_line_splitting
+ l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From bob@bob.com Mon Apr 27 12:56:19 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello again! Would you like to buy my products?
+EOS
+ offset, labels = l.next
+ assert_not_nil offset
+
+ offset, labels = l.next
+ assert_not_nil offset
+
+ offset, labels = l.next
+ assert_nil offset
+ end
end