X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmbox.rb;h=3f3abadefeb6f3833970abf027e6e8f77ce7dd45;hb=aef0216d7f988ab87a3430c9a65210f0d55dfc64;hp=8497a37f11a6f6a00bce35953e4a43d5c96103e7;hpb=998b0a30b1d5a52655553c39aea26913b9ab48b8;p=sup diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb index 8497a37..3f3abad 100644 --- a/lib/sup/mbox.rb +++ b/lib/sup/mbox.rb @@ -1,81 +1,24 @@ require "sup/mbox/loader" require "sup/mbox/ssh-file" require "sup/mbox/ssh-loader" -require "sup/rfc2047" module Redwood -## some utility functions. actually these are not mbox-specific at all -## and should be moved somewhere else. -## -## TODO: move functionality to somewhere better, like message.rb module MBox - BREAK_RE = /^From \S+/ - HEADER_RE = /\s*(.*?)\s*/ - - def read_header f - header = {} - last = nil - - ## i do it in this weird way because i am trying to speed things up - ## when scanning over large mbox files. - while(line = f.gets) - case line - ## these three can occur multiple times, and we want the first one - when /^(Delivered-To):#{HEADER_RE}$/i, - /^(X-Original-To):#{HEADER_RE}$/i, - /^(Envelope-To):#{HEADER_RE}$/i: header[last = $1] ||= $2 - - when /^(From):#{HEADER_RE}$/i, - /^(To):#{HEADER_RE}$/i, - /^(Cc):#{HEADER_RE}$/i, - /^(Bcc):#{HEADER_RE}$/i, - /^(Subject):#{HEADER_RE}$/i, - /^(Date):#{HEADER_RE}$/i, - /^(References):#{HEADER_RE}$/i, - /^(In-Reply-To):#{HEADER_RE}$/i, - /^(Reply-To):#{HEADER_RE}$/i, - /^(List-Post):#{HEADER_RE}$/i, - /^(List-Subscribe):#{HEADER_RE}$/i, - /^(List-Unsubscribe):#{HEADER_RE}$/i, - /^(Status):#{HEADER_RE}$/i, - /^(X-\S+):#{HEADER_RE}$/: header[last = $1] = $2 - when /^(Message-Id):#{HEADER_RE}$/i: header[mid_field = last = $1] = $2 - - when /^\r*$/: break - when /^\S+:/: last = nil # some other header we don't care about - else - header[last] += " " + line.chomp.gsub(/^\s+/, "") if last - end - end - - if mid_field && header[mid_field] && header[mid_field] =~ /<(.*?)>/ - header[mid_field] = $1 + BREAK_RE = /^From \S+ (.+)$/ + + def is_break_line? l + l =~ BREAK_RE or return false + time = $1 + begin + ## hack -- make Time.parse fail when trying to substitute values from Time.now + Time.parse time, 0 + true + rescue NoMethodError + warn "found invalid date in potential mbox split line, not splitting: #{l.inspect}" + false end - - header.each do |k, v| - next unless Rfc2047.is_encoded? v - header[k] = - begin - Rfc2047.decode_to $encoding, v - rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e - Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}" - v - end - end - header end - - ## never actually called - def read_body f - body = [] - f.each_line do |l| - break if l =~ BREAK_RE - body << l.chomp - end - body - end - - module_function :read_header, :read_body + module_function :is_break_line? end end