From 47801067362e6bb2ba503da7c6bdea4a16153ac8 Mon Sep 17 00:00:00 2001 From: William Morgan Date: Sun, 26 Apr 2009 12:10:56 -0400 Subject: [PATCH] move MBox.parse_header -> Source.parse_raw_email_header --- lib/sup/draft.rb | 4 +- lib/sup/imap.rb | 2 +- lib/sup/maildir.rb | 2 +- lib/sup/mbox.rb | 42 ------------------- lib/sup/mbox/loader.rb | 2 +- lib/sup/modes/edit-message-mode.rb | 2 +- lib/sup/source.rb | 42 +++++++++++++++++++ test/dummy_source.rb | 2 +- ...mbox_parsing.rb => test_header_parsing.rb} | 18 ++++---- 9 files changed, 57 insertions(+), 59 deletions(-) rename test/{test_mbox_parsing.rb => test_header_parsing.rb} (78%) diff --git a/lib/sup/draft.rb b/lib/sup/draft.rb index 35fac30..32266b5 100644 --- a/lib/sup/draft.rb +++ b/lib/sup/draft.rb @@ -79,9 +79,7 @@ class DraftLoader < Source def fn_for_offset o; File.join(@dir, o.to_s); end def load_header offset - File.open fn_for_offset(offset) do |f| - return MBox::read_header(f) - end + File.open(fn_for_offset(offset)) { |f| parse_raw_email_header f } end def load_message offset diff --git a/lib/sup/imap.rb b/lib/sup/imap.rb index 4eb13f4..7508c2c 100644 --- a/lib/sup/imap.rb +++ b/lib/sup/imap.rb @@ -93,7 +93,7 @@ class IMAP < Source def == o; o.is_a?(IMAP) && o.uri == self.uri && o.username == self.username; end def load_header id - MBox::read_header StringIO.new(raw_header(id)) + parse_raw_email_header StringIO.new(raw_header(id)) end def load_message id diff --git a/lib/sup/maildir.rb b/lib/sup/maildir.rb index 3d584f7..a9ae05c 100644 --- a/lib/sup/maildir.rb +++ b/lib/sup/maildir.rb @@ -56,7 +56,7 @@ class Maildir < Source def load_header id scan_mailbox - with_file_for(id) { |f| MBox::read_header f } + with_file_for(id) { |f| parse_raw_email_header f } end def load_message id diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb index 223bb7c..5dd89b7 100644 --- a/lib/sup/mbox.rb +++ b/lib/sup/mbox.rb @@ -5,49 +5,7 @@ require "sup/rfc2047" module Redwood -## some utility functions. actually these are not mbox-specific at all -## and should be moved somewhere else. -## -## TODO: move functionality to somewhere better, like message.rb module MBox BREAK_RE = /^From \S+/ ######### TODO REMOVE ME - - ## WARNING! THIS IS A SPEED-CRITICAL SECTION. Everything you do here will have - ## a significant effect on Sup's processing speed of email from ALL sources. - ## Little things like string interpolation, regexp interpolation, += vs <<, - ## all have DRAMATIC effects. BE CAREFUL WHAT YOU DO! - def read_header f - header = {} - last = nil - - while(line = f.gets) - case line - ## these three can occur multiple times, and we want the first one - when /^(Delivered-To|X-Original-To|Envelope-To):\s*(.*?)\s*$/i; header[last = $1.downcase] ||= $2 - ## mark this guy specially. not sure why i care. - when /^([^:\s]+):\s*(.*?)\s*$/i; header[last = $1.downcase] = $2 - when /^\r*$/; break - else - if last - header[last] << " " unless header[last].empty? - header[last] << line.strip - end - end - end - - %w(subject from to cc bcc).each do |k| - v = header[k] or next - next unless Rfc2047.is_encoded? v - header[k] = begin - Rfc2047.decode_to $encoding, v - rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e - Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}" - v - end - end - header - end - - module_function :read_header end end diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index fbf31ae..c623239 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -59,7 +59,7 @@ class Loader < Source unless l =~ BREAK_RE raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}." end - header = MBox::read_header @f + header = parse_raw_email_header @f end header end diff --git a/lib/sup/modes/edit-message-mode.rb b/lib/sup/modes/edit-message-mode.rb index 31aa897..51f0824 100644 --- a/lib/sup/modes/edit-message-mode.rb +++ b/lib/sup/modes/edit-message-mode.rb @@ -212,7 +212,7 @@ protected def parse_file fn File.open(fn) do |f| - header = MBox::read_header f + header = Source.parse_raw_email_header f body = f.readlines.map { |l| l.chomp } header.delete_if { |k, v| NON_EDITABLE_HEADERS.member? k } diff --git a/lib/sup/source.rb b/lib/sup/source.rb index 6510aae..91cd71f 100644 --- a/lib/sup/source.rb +++ b/lib/sup/source.rb @@ -99,7 +99,49 @@ class Source end end + ## read a raw email header from a filehandle (or anything that responds to + ## #gets), and turn it into a hash of key-value pairs. + ## + ## WARNING! THIS IS A SPEED-CRITICAL SECTION. Everything you do here will have + ## a significant effect on Sup's processing speed of email from ALL sources. + ## Little things like string interpolation, regexp interpolation, += vs <<, + ## all have DRAMATIC effects. BE CAREFUL WHAT YOU DO! + def self.parse_raw_email_header f + header = {} + last = nil + + while(line = f.gets) + case line + ## these three can occur multiple times, and we want the first one + when /^(Delivered-To|X-Original-To|Envelope-To):\s*(.*?)\s*$/i; header[last = $1.downcase] ||= $2 + ## mark this guy specially. not sure why i care. + when /^([^:\s]+):\s*(.*?)\s*$/i; header[last = $1.downcase] = $2 + when /^\r*$/; break + else + if last + header[last] << " " unless header[last].empty? + header[last] << line.strip + end + end + end + + %w(subject from to cc bcc).each do |k| + v = header[k] or next + next unless Rfc2047.is_encoded? v + header[k] = begin + Rfc2047.decode_to $encoding, v + rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e + #Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}" + v + end + end + header + end + protected + + ## convenience function + def parse_raw_email_header f; self.class.parse_raw_email_header f end def Source.expand_filesystem_uri uri uri.gsub "~", File.expand_path("~") diff --git a/test/dummy_source.rb b/test/dummy_source.rb index f3afa31..b84e64e 100644 --- a/test/dummy_source.rb +++ b/test/dummy_source.rb @@ -26,7 +26,7 @@ class DummySource < Source end def load_header offset - MBox::read_header StringIO.new(raw_header(offset)) + Source.parse_raw_email_header StringIO.new(raw_header(offset)) end def load_message offset diff --git a/test/test_mbox_parsing.rb b/test/test_header_parsing.rb similarity index 78% rename from test/test_mbox_parsing.rb rename to test/test_header_parsing.rb index 3486f1b..7368d81 100644 --- a/test/test_mbox_parsing.rb +++ b/test/test_header_parsing.rb @@ -14,7 +14,7 @@ class TestMBoxParsing < Test::Unit::TestCase end def test_normal_headers - h = MBox.read_header StringIO.new(< To: Sally EOS @@ -25,7 +25,7 @@ EOS end def test_multiline - h = MBox.read_header StringIO.new(< Subject: one two three four five six @@ -47,7 +47,7 @@ EOS "Subject: one two three end \n", ] variants.each do |s| - h = MBox.read_header StringIO.new(s) + h = Source.parse_raw_email_header StringIO.new(s) assert_equal "one two three end", h["subject"] end end @@ -58,13 +58,13 @@ EOS "Message-Id: \n", ] variants.each do |s| - h = MBox.read_header StringIO.new(s) + h = Source.parse_raw_email_header StringIO.new(s) assert_equal "", h["message-id"] end end def test_blank_lines - h = MBox.read_header StringIO.new("") + h = Source.parse_raw_email_header StringIO.new("") assert_equal nil, h["message-id"] end @@ -74,13 +74,13 @@ EOS "Message-Id:\n", ] variants.each do |s| - h = MBox.read_header StringIO.new(s) + h = Source.parse_raw_email_header StringIO.new(s) assert_equal "", h["message-id"] end end def test_detect_end_of_headers - h = MBox.read_header StringIO.new(< To: a dear friend @@ -88,7 +88,7 @@ EOS assert_equal "Bob ", h["from"] assert_nil h["to"] - h = MBox.read_header StringIO.new(< \r To: a dear friend @@ -96,7 +96,7 @@ EOS assert_equal "Bob ", h["from"] assert_nil h["to"] - h = MBox.read_header StringIO.new(< \r\n\r To: a dear friend -- 2.45.2