X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmbox%2Floader.rb;h=ea277cf0f80ffa41397484a92a6940cb468dd0a2;hb=e459fc846862a1499f3acb2f8a2eb098344ee7ac;hp=c1392654e17aa7e00fe471efa2c978e41458d46b;hpb=fa6be1e7004de61bc8ed67d2b43b29f97d76e14c;p=sup diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index c139265..ea277cf 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -6,9 +6,10 @@ module MBox class Loader < Source yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels + attr_accessor :labels ## uri_or_fp is horrific. need to refactor. - def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[] + def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=[] @mutex = Mutex.new @labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze @@ -16,7 +17,8 @@ class Loader < Source when String uri = URI(Source.expand_filesystem_uri(uri_or_fp)) raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox" - raise ArgumentError, "mbox uri ('#{uri}') cannot have a host: #{uri.host}" if uri.host + raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host + raise ArgumentError, "mbox URI must have a path component" unless uri.path @f = File.open uri.path @path = uri.path else @@ -36,7 +38,7 @@ class Loader < Source if File.dirname(path) =~ /\b(var|usr|spool)\b/ [] else - [File.basename(path).intern] + [File.basename(path).downcase.intern] end end @@ -54,10 +56,10 @@ class Loader < Source @mutex.synchronize do @f.seek offset l = @f.gets - unless l =~ BREAK_RE + unless MBox::is_break_line? l raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}." end - header = MBox::read_header @f + header = parse_raw_email_header @f end header end @@ -66,21 +68,36 @@ class Loader < Source @mutex.synchronize do @f.seek offset begin - RMail::Mailbox::MBoxReader.new(@f).each_message do |input| - return RMail::Parser.read(input) - end + ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore + ## "From" at the start of a message body line. + string = "" + l = @f.gets + string << l until @f.eof? || MBox::is_break_line?(l = @f.gets) + RMail::Parser.read string rescue RMail::Parser::Error => e raise FatalSourceError, "error parsing mbox file: #{e.message}" end end end + ## scan forward until we're at the valid start of a message + def correct_offset! + @mutex.synchronize do + @f.seek cur_offset + string = "" + until @f.eof? || MBox::is_break_line?(l = @f.gets) + string << l + end + self.cur_offset += string.length + end + end + def raw_header offset ret = "" @mutex.synchronize do @f.seek offset - until @f.eof? || (l = @f.gets) =~ /^$/ - ret += l + until @f.eof? || (l = @f.gets) =~ /^\r*$/ + ret << l end end ret @@ -88,10 +105,19 @@ class Loader < Source def raw_message offset ret = "" - each_raw_message_line(offset) { |l| ret += l } + each_raw_message_line(offset) { |l| ret << l } ret end + def store_message date, from_email, &block + need_blank = File.exists?(@filename) && !File.zero?(@filename) + File.open(@filename, "a") do |f| + f.puts if need_blank + f.puts "From #{from_email} #{date.utc}" + yield f + end + end + ## apparently it's a million times faster to call this directly if ## we're just moving messages around on disk, than reading things ## into memory with raw_message. @@ -102,7 +128,7 @@ class Loader < Source @mutex.synchronize do @f.seek offset yield @f.gets - until @f.eof? || (l = @f.gets) =~ BREAK_RE + until @f.eof? || MBox::is_break_line?(l = @f.gets) yield l end end @@ -123,7 +149,7 @@ class Loader < Source ## 2. at the beginning of an mbox separator (in all other ## cases). - l = @f.gets or raise "next while at EOF" + l = @f.gets or return nil if l =~ /^\s*$/ # case 1 returned_offset = @f.tell @f.gets # now we're at a BREAK_RE, so skip past it @@ -133,7 +159,7 @@ class Loader < Source end while(line = @f.gets) - break if line =~ BREAK_RE + break if MBox::is_break_line? line next_offset = @f.tell end end @@ -142,7 +168,7 @@ class Loader < Source end self.cur_offset = next_offset - [returned_offset, (@labels + [:unread]).uniq] + [returned_offset, (self.labels + [:unread]).uniq] end end