8 def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil
17 raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
18 raise ArgumentError, "mbox uri cannot have a host: #{uri.host}" if uri.host
19 ## heuristic: use the filename as a label, unless the file
20 ## has a path that probably represents an inbox.
21 @labels << File.basename(uri.path).intern unless File.dirname(uri.path) =~ /\b(var|usr|spool)\b/
22 @f = File.open uri.path
29 if (cur_offset ||= start_offset) > end_offset
30 raise OutOfSyncSourceError, "mbox file is smaller than last recorded message offset. Messages have probably been deleted by another client."
34 def start_offset; 0; end
35 def end_offset; File.size @f; end
37 def load_header offset
43 raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
45 header = MBox::read_header @f
50 def load_message offset
54 RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
55 return RMail::Parser.read(input)
57 rescue RMail::Parser::Error => e
58 raise FatalSourceError, "error parsing mbox file: #{e.message}"
67 until @f.eof? || (l = @f.gets) =~ /^$/
74 def raw_full_message offset
78 @f.gets # skip mbox header
79 until @f.eof? || (l = @f.gets) =~ BREAK_RE
88 next_offset = cur_offset
94 ## cur_offset could be at one of two places here:
96 ## 1. before a \n and a mbox separator, if it was previously at
97 ## EOF and a new message was added; or,
98 ## 2. at the beginning of an mbox separator (in all other
101 l = @f.gets or raise "next while at EOF"
102 if l =~ /^\s*$/ # case 1
103 returned_offset = @f.tell
104 @f.gets # now we're at a BREAK_RE, so skip past it
106 returned_offset = cur_offset
107 ## we've already skipped past the BREAK_RE, so just go
110 while(line = @f.gets)
111 break if line =~ BREAK_RE
112 next_offset = @f.tell
115 rescue SystemCallError => e
116 raise FatalSourceError, "Error reading #{@f.path}: #{e.message}"
119 self.cur_offset = next_offset
120 [returned_offset, @labels.clone]
124 Redwood::register_yaml(Loader, %w(uri cur_offset usual archived id))