##
## TODO: move functionality to somewhere better, like message.rb
module MBox
- BREAK_RE = /^From \S+@\S+ /
+ BREAK_RE = /^From \S+ (.+)$/
HEADER_RE = /\s*(.*?)\s*/
+ def is_break_line? l
+ l =~ BREAK_RE or return false
+ time = $1
+ begin
+ ## hack -- make Time.parse fail when trying to substitute values from Time.now
+ Time.parse time, 0
+ true
+ rescue NoMethodError
+ Redwood::log "found invalid date in potential mbox split line, not splitting: #{l.inspect}"
+ false
+ end
+ end
+ module_function :is_break_line?
+
def read_header f
header = {}
last = nil
def read_body f
body = []
f.each_line do |l|
- break if l =~ BREAK_RE
+ break if is_break_line?(l)
body << l.chomp
end
body
@mutex.synchronize do
@f.seek offset
l = @f.gets
- unless l =~ BREAK_RE
+ unless MBox::is_break_line? l
raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
end
header = MBox::read_header @f
## "From" at the start of a message body line.
string = ""
l = @f.gets
- string << l until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
RMail::Parser.read string
rescue RMail::Parser::Error => e
raise FatalSourceError, "error parsing mbox file: #{e.message}"
@mutex.synchronize do
@f.seek offset
yield @f.gets
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
yield l
end
end
end
while(line = @f.gets)
- break if line =~ BREAK_RE
+ break if MBox::is_break_line? line
next_offset = @f.tell
end
end
assert_equal "Bob <bob@bob.com>", h["From"]
assert_nil h["To"]
end
+
+ def test_from_line_splitting
+ l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From sea to shining sea
+
+From bob@bob.com I get only spam.
+
+From bob@bob.com
+
+From bob@bob.com
+
+(that second one has spaces at the endj
+
+This is the end of the email.
+EOS
+ offset, labels = l.next
+ assert_equal 0, offset
+ offset, labels = l.next
+ assert_nil offset
+ end
+
+ def test_more_from_line_splitting
+ l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From bob@bob.com Mon Apr 27 12:56:19 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello again! Would you like to buy my products?
+EOS
+ offset, labels = l.next
+ assert_not_nil offset
+
+ offset, labels = l.next
+ assert_not_nil offset
+
+ offset, labels = l.next
+ assert_nil offset
+ end
end