X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmbox%2Floader.rb;h=a11bf9541e6efed049754cf85442c6cdbd714c8b;hb=4efc8adaf8c63ff054c71149de6c12fe8194ff03;hp=ad52dfb1087fae4d3d794fbe8a9d1e46bdbbad52;hpb=3439894f38d1d4140bb912de0232fdb576991de0;p=sup diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index ad52dfb..a11bf95 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -1,17 +1,20 @@ require 'rmail' require 'uri' +require 'set' module Redwood module MBox class Loader < Source + include SerializeLabelsNicely yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels - attr_accessor :labels + + attr_reader :labels ## uri_or_fp is horrific. need to refactor. - def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[] + def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=nil @mutex = Mutex.new - @labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze + @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS) case uri_or_fp when String @@ -47,7 +50,7 @@ class Loader < Source raise OutOfSyncSourceError, "mbox file is smaller than last recorded message offset. Messages have probably been deleted by another client." end end - + def start_offset; 0; end def end_offset; File.size @f; end @@ -56,7 +59,7 @@ class Loader < Source @mutex.synchronize do @f.seek offset l = @f.gets - unless l =~ BREAK_RE + unless MBox::is_break_line? l raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}." end header = parse_raw_email_header @f @@ -68,13 +71,12 @@ class Loader < Source @mutex.synchronize do @f.seek offset begin - RMail::Mailbox::MBoxReader.new(@f).each_message do |input| - m = RMail::Parser.read(input) - if m.body && m.body.is_a?(String) - m.body.gsub!(/^>From /, "From ") - end - return m - end + ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore + ## "From" at the start of a message body line. + string = "" + l = @f.gets + string << l until @f.eof? || MBox::is_break_line?(l = @f.gets) + RMail::Parser.read string rescue RMail::Parser::Error => e raise FatalSourceError, "error parsing mbox file: #{e.message}" end @@ -86,7 +88,7 @@ class Loader < Source @mutex.synchronize do @f.seek cur_offset string = "" - until @f.eof? || (l = @f.gets) =~ BREAK_RE + until @f.eof? || MBox::is_break_line?(l = @f.gets) string << l end self.cur_offset += string.length @@ -110,6 +112,15 @@ class Loader < Source ret end + def store_message date, from_email, &block + need_blank = File.exists?(@filename) && !File.zero?(@filename) + File.open(@filename, "a") do |f| + f.puts if need_blank + f.puts "From #{from_email} #{date.utc}" + yield f + end + end + ## apparently it's a million times faster to call this directly if ## we're just moving messages around on disk, than reading things ## into memory with raw_message. @@ -120,7 +131,7 @@ class Loader < Source @mutex.synchronize do @f.seek offset yield @f.gets - until @f.eof? || (l = @f.gets) =~ BREAK_RE + until @f.eof? || MBox::is_break_line?(l = @f.gets) yield l end end @@ -141,7 +152,7 @@ class Loader < Source ## 2. at the beginning of an mbox separator (in all other ## cases). - l = @f.gets or raise "next while at EOF" + l = @f.gets or return nil if l =~ /^\s*$/ # case 1 returned_offset = @f.tell @f.gets # now we're at a BREAK_RE, so skip past it @@ -151,7 +162,7 @@ class Loader < Source end while(line = @f.gets) - break if line =~ BREAK_RE + break if MBox::is_break_line? line next_offset = @f.tell end end @@ -160,7 +171,7 @@ class Loader < Source end self.cur_offset = next_offset - [returned_offset, (self.labels + [:unread]).uniq] + [returned_offset, (labels + [:unread])] end end