From: William Morgan Date: Tue, 28 Apr 2009 13:34:27 +0000 (-0400) Subject: fix mbox splitting regexp X-Git-Url: https://git.cworth.org/git?a=commitdiff_plain;h=d06b80220e17580c61750b6b29f02c0ca2a7bd57;p=sup fix mbox splitting regexp I dunno. This helps with the "From problem", but at the expense of being too specific than the mbox spec really demands. I don't think there's a really right solution, in general (due to the mbox format being a fundamentally broken one), but I'm hoping this will work with all modern mbox files. --- diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb index 8497a37..33a8adb 100644 --- a/lib/sup/mbox.rb +++ b/lib/sup/mbox.rb @@ -10,7 +10,7 @@ module Redwood ## ## TODO: move functionality to somewhere better, like message.rb module MBox - BREAK_RE = /^From \S+/ + BREAK_RE = /^From \S+@\S+ / HEADER_RE = /\s*(.*?)\s*/ def read_header f diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index 7fe9129..086510d 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -68,13 +68,12 @@ class Loader < Source @mutex.synchronize do @f.seek offset begin - RMail::Mailbox::MBoxReader.new(@f).each_message do |input| - m = RMail::Parser.read(input) - if m.body && m.body.is_a?(String) - m.body.gsub!(/^>From /, "From ") - end - return m - end + ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore + ## "From" at the start of a message body line. + string = "" + l = @f.gets + string << l until @f.eof? || (l = @f.gets) =~ BREAK_RE + RMail::Parser.read string rescue RMail::Parser::Error => e raise FatalSourceError, "error parsing mbox file: #{e.message}" end