-require 'thread'
require 'rmail'
+require 'uri'
+require 'set'
module Redwood
module MBox
-class Error < StandardError; end
-
-class Loader
- attr_reader :filename
- bool_reader :usual, :archived, :read, :dirty
- attr_accessor :id, :labels
-
- ## end_offset is the last offsets within the file which we've read.
- ## everything after that is considered new messages that haven't
- ## been indexed.
- def initialize filename, end_offset=0, usual=true, archived=false, id=nil
- @filename = filename.gsub(%r(^mbox://), "")
- @end_offset = end_offset
- @dirty = false
- @usual = usual
- @archived = archived
- @id = id
+class Loader < Source
+ include SerializeLabelsNicely
+ yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels
+
+ attr_reader :labels
+
+ ## uri_or_fp is horrific. need to refactor.
+ def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=nil
@mutex = Mutex.new
- @f = File.open @filename
- @labels = ([
- :unread,
- archived ? nil : :inbox,
- ] +
- if File.dirname(filename) =~ /\b(var|usr|spool)\b/
- []
- else
- [File.basename(filename).intern]
- end).compact
+ @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS)
+
+ case uri_or_fp
+ when String
+ uri = URI(Source.expand_filesystem_uri(uri_or_fp))
+ raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
+ raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host
+ raise ArgumentError, "mbox URI must have a path component" unless uri.path
+ @f = File.open uri.path
+ @path = uri.path
+ else
+ @f = uri_or_fp
+ @path = uri_or_fp.path
+ end
+
+ super uri_or_fp, start_offset, usual, archived, id
end
- def seek_to! offset
- @end_offset = [offset, File.size(@f) - 1].min;
- @dirty = true
+ def file_path; @path end
+ def is_source_for? uri; super || (self.uri.is_a?(String) && (URI(Source.expand_filesystem_uri(uri)) == URI(Source.expand_filesystem_uri(self.uri)))) end
+
+ def self.suggest_labels_for path
+ ## heuristic: use the filename as a label, unless the file
+ ## has a path that probably represents an inbox.
+ if File.dirname(path) =~ /\b(var|usr|spool)\b/
+ []
+ else
+ [File.basename(path).downcase.intern]
+ end
end
- def reset!; seek_to! 0; end
- def == o; o.is_a?(Loader) && o.filename == filename; end
- def to_s; "mbox://#{@filename}"; end
- def is_source_for? s
- @filename == s || self.to_s == s
+ def check
+ if (cur_offset ||= start_offset) > end_offset
+ raise OutOfSyncSourceError, "mbox file is smaller than last recorded message offset. Messages have probably been deleted by another client."
+ end
end
- def load_header offset=nil
+ def start_offset; 0; end
+ def end_offset; File.size @f; end
+
+ def load_header offset
header = nil
@mutex.synchronize do
- @f.seek offset if offset
- header = MBox::read_header @f
+ @f.seek offset
+ l = @f.gets
+ unless MBox::is_break_line? l
+ raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
+ end
+ header = parse_raw_email_header @f
end
header
end
def load_message offset
- ret = nil
@mutex.synchronize do
@f.seek offset
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- return RMail::Parser.read(input)
+ begin
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ l = @f.gets
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ RMail::Parser.read string
+ rescue RMail::Parser::Error => e
+ raise FatalSourceError, "error parsing mbox file: #{e.message}"
end
end
end
- def raw_header offset
- ret = ""
+ ## scan forward until we're at the valid start of a message
+ def correct_offset!
@mutex.synchronize do
- @f.seek offset
- until @f.eof? || (l = @f.gets) =~ /^$/
- ret += l
+ @f.seek cur_offset
+ string = ""
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ string << l
end
+ self.cur_offset += string.length
end
- ret
end
- def raw_full_message offset
+ def raw_header offset
ret = ""
@mutex.synchronize do
@f.seek offset
- @f.gets # skip mbox header
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
- ret += l
+ until @f.eof? || (l = @f.gets) =~ /^\r*$/
+ ret << l
end
end
ret
end
- def next
- return nil if done?
- @dirty = true
- next_end_offset = @end_offset
+ def raw_message offset
+ ret = ""
+ each_raw_message_line(offset) { |l| ret << l }
+ ret
+ end
- @mutex.synchronize do
- @f.seek @end_offset
+ def store_message date, from_email, &block
+ need_blank = File.exists?(@filename) && !File.zero?(@filename)
+ File.open(@filename, "a") do |f|
+ f.puts if need_blank
+ f.puts "From #{from_email} #{date.utc}"
+ yield f
+ end
+ end
- @f.gets # skip the From separator
- next_end_offset = @f.tell
- while(line = @f.gets)
- break if line =~ BREAK_RE
- next_end_offset = @f.tell
+ ## apparently it's a million times faster to call this directly if
+ ## we're just moving messages around on disk, than reading things
+ ## into memory with raw_message.
+ ##
+ ## i hoped never to have to move shit around on disk but
+ ## sup-sync-back has to do it.
+ def each_raw_message_line offset
+ @mutex.synchronize do
+ @f.seek offset
+ yield @f.gets
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ yield l
end
end
-
- start_offset = @end_offset
- @end_offset = next_end_offset
-
- start_offset
end
- def each
- until @end_offset >= File.size(@f)
- n = self.next
- yield(n, labels) if n
+ def next
+ returned_offset = nil
+ next_offset = cur_offset
+
+ begin
+ @mutex.synchronize do
+ @f.seek cur_offset
+
+ ## cur_offset could be at one of two places here:
+
+ ## 1. before a \n and a mbox separator, if it was previously at
+ ## EOF and a new message was added; or,
+ ## 2. at the beginning of an mbox separator (in all other
+ ## cases).
+
+ l = @f.gets or return nil
+ if l =~ /^\s*$/ # case 1
+ returned_offset = @f.tell
+ @f.gets # now we're at a BREAK_RE, so skip past it
+ else # case 2
+ returned_offset = cur_offset
+ ## we've already skipped past the BREAK_RE, so just go
+ end
+
+ while(line = @f.gets)
+ break if MBox::is_break_line? line
+ next_offset = @f.tell
+ end
+ end
+ rescue SystemCallError, IOError => e
+ raise FatalSourceError, "Error reading #{@f.path}: #{e.message}"
end
- end
- def each_header
- each { |offset, labels| yield offset, labels, load_header(offset) }
+ self.cur_offset = next_offset
+ [returned_offset, (labels + [:unread])]
end
-
- def done?; @end_offset >= File.size(@f); end
- def total; File.size @f; end
end
-Redwood::register_yaml(Loader, %w(filename end_offset usual archived id))
-
end
end