##
## TODO: move functionality to somewhere better, like message.rb
module MBox
- BREAK_RE = /^From \S+/
- HEADER_RE = /\s*(.*?)\s*/
+ BREAK_RE = /^From \S+/ ######### TODO REMOVE ME
+ ## WARNING! THIS IS A SPEED-CRITICAL SECTION. Everything you do here will have
+ ## a significant effect on Sup's processing speed of email from ALL sources.
+ ## Little things like string interpolation, regexp interpolation, += vs <<,
+ ## all have DRAMATIC effects. BE CAREFUL WHAT YOU DO!
def read_header f
header = {}
last = nil
- ## i do it in this weird way because i am trying to speed things up
- ## when scanning over large mbox files.
while(line = f.gets)
case line
## these three can occur multiple times, and we want the first one
- when /^(Delivered-To):#{HEADER_RE}$/i,
- /^(X-Original-To):#{HEADER_RE}$/i,
- /^(Envelope-To):#{HEADER_RE}$/i: header[last = $1] ||= $2
-
- when /^(From):#{HEADER_RE}$/i,
- /^(To):#{HEADER_RE}$/i,
- /^(Cc):#{HEADER_RE}$/i,
- /^(Bcc):#{HEADER_RE}$/i,
- /^(Subject):#{HEADER_RE}$/i,
- /^(Date):#{HEADER_RE}$/i,
- /^(References):#{HEADER_RE}$/i,
- /^(In-Reply-To):#{HEADER_RE}$/i,
- /^(Reply-To):#{HEADER_RE}$/i,
- /^(List-Post):#{HEADER_RE}$/i,
- /^(List-Subscribe):#{HEADER_RE}$/i,
- /^(List-Unsubscribe):#{HEADER_RE}$/i,
- /^(Status):#{HEADER_RE}$/i,
- /^(X-\S+):#{HEADER_RE}$/: header[last = $1] = $2
- when /^(Message-Id):#{HEADER_RE}$/i: header[mid_field = last = $1] = $2
-
- when /^\r*$/: break
- when /^\S+:/: last = nil # some other header we don't care about
+ when /^(Delivered-To|X-Original-To|Envelope-To):\s*(.*?)\s*$/i; header[last = $1.downcase] ||= $2
+ ## mark this guy specially. not sure why i care.
+ when /^([^:\s]+):\s*(.*?)\s*$/i; header[last = $1.downcase] = $2
+ when /^\r*$/; break
else
- header[last] += " " + line.chomp.gsub(/^\s+/, "") if last
+ if last
+ header[last] << " " unless header[last].empty?
+ header[last] << line.strip
+ end
end
end
- if mid_field && header[mid_field] && header[mid_field] =~ /<(.*?)>/
- header[mid_field] = $1
- end
-
- header.each do |k, v|
+ %w(subject from to cc bcc).each do |k|
+ v = header[k] or next
next unless Rfc2047.is_encoded? v
- header[k] =
- begin
- Rfc2047.decode_to $encoding, v
- rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e
- Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}"
- v
- end
+ header[k] = begin
+ Rfc2047.decode_to $encoding, v
+ rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e
+ Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}"
+ v
+ end
end
header
end
end
def parse_header header
- header.keys.each { |k| header[k.downcase] = header[k] } # canonicalize
-
- fakeid = nil
- fakename = nil
-
- @id =
- if header["message-id"]
- sanitize_message_id header["message-id"]
- else
- fakeid = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
- end
+ @id = if header["message-id"]
+ mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"]
+ sanitize_message_id mid
+ else
+ id = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
+ from = header["from"]
+ #Redwood::log "faking non-existent message-id for message from #{from}: #{id}"
+ id
+ end
- @from =
- if header["from"]
- Person.from_address header["from"]
- else
- fakename = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
- Person.from_address fakename
- end
-
- Redwood::log "faking message-id for message from #@from: #{id}" if fakeid
- Redwood::log "faking from for message #@id: #{fakename}" if fakename
-
- date = header["date"]
- @date =
- case date
- when Time
- date
- when String
- begin
- Time.parse date
- rescue ArgumentError => e
- Redwood::log "faking date header for #{@id} due to error parsing date #{header['date'].inspect}: #{e.message}"
- Time.now
- end
- else
- Redwood::log "faking date header for #{@id}"
+ @from = Person.from_address(if header["from"]
+ header["from"]
+ else
+ name = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
+ Redwood::log "faking non-existent sender for message #@id: #{name}"
+ name
+ end)
+
+ @date = case(date = header["date"])
+ when Time
+ date
+ when String
+ begin
+ Time.parse date
+ rescue ArgumentError => e
+ Redwood::log "faking mangled date header for #{@id} (orig #{header['date'].inspect} gave error: #{e.message})"
Time.now
end
+ else
+ Redwood::log "faking non-existent date header for #{@id}"
+ Time.now
+ end
@subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
@to = Person.from_address_list header["to"]
To: Sally <sally@sally.com>
EOS
- assert_equal "Bob <bob@bob.com>", h["From"]
- assert_equal "Sally <sally@sally.com>", h["To"]
- assert_nil h["Message-Id"]
- end
-
- ## this is shitty behavior in retrospect, but it's built in now.
- def test_message_id_stripping
- h = MBox.read_header StringIO.new("Message-Id: <one@bob.com>\n")
- assert_equal "one@bob.com", h["Message-Id"]
-
- h = MBox.read_header StringIO.new("Message-Id: one@bob.com\n")
- assert_equal "one@bob.com", h["Message-Id"]
+ assert_equal "Bob <bob@bob.com>", h["from"]
+ assert_equal "Sally <sally@sally.com>", h["to"]
+ assert_nil h["message-id"]
end
def test_multiline
Subject: one two three
four five six
To: Sally <sally@sally.com>
-References: seven
- eight
+References: <seven>
+ <eight>
Seven: Eight
EOS
- assert_equal "one two three four five six", h["Subject"]
- assert_equal "Sally <sally@sally.com>", h["To"]
- assert_equal "seven eight", h["References"]
+ assert_equal "one two three four five six", h["subject"]
+ assert_equal "Sally <sally@sally.com>", h["to"]
+ assert_equal "<seven> <eight>", h["references"]
end
def test_ignore_spacing
]
variants.each do |s|
h = MBox.read_header StringIO.new(s)
- assert_equal "one two three end", h["Subject"]
+ assert_equal "one two three end", h["subject"]
end
end
def test_message_id_ignore_spacing
variants = [
"Message-Id: <one@bob.com> \n",
- "Message-Id: one@bob.com \n",
"Message-Id:<one@bob.com> \n",
- "Message-Id:one@bob.com \n",
]
variants.each do |s|
h = MBox.read_header StringIO.new(s)
- assert_equal "one@bob.com", h["Message-Id"]
+ assert_equal "<one@bob.com>", h["message-id"]
end
end
def test_blank_lines
h = MBox.read_header StringIO.new("")
- assert_equal nil, h["Message-Id"]
+ assert_equal nil, h["message-id"]
end
def test_empty_headers
]
variants.each do |s|
h = MBox.read_header StringIO.new(s)
- assert_equal "", h["Message-Id"]
+ assert_equal "", h["message-id"]
end
end
To: a dear friend
EOS
- assert_equal "Bob <bob@bob.com>", h["From"]
- assert_nil h["To"]
+ assert_equal "Bob <bob@bob.com>", h["from"]
+ assert_nil h["to"]
h = MBox.read_header StringIO.new(<<EOS)
From: Bob <bob@bob.com>
\r
To: a dear friend
EOS
- assert_equal "Bob <bob@bob.com>", h["From"]
- assert_nil h["To"]
+ assert_equal "Bob <bob@bob.com>", h["from"]
+ assert_nil h["to"]
h = MBox.read_header StringIO.new(<<EOS)
From: Bob <bob@bob.com>
\r\n\r
To: a dear friend
EOS
- assert_equal "Bob <bob@bob.com>", h["From"]
- assert_nil h["To"]
+ assert_equal "Bob <bob@bob.com>", h["from"]
+ assert_nil h["to"]
end
end
# Look at another header field whose first line was blank.
list_unsubscribe = sup_message.list_unsubscribe
- assert_equal(" <http://mailman2.widget.com/mailman/listinfo/monitor-list>, " +
+ assert_equal("<http://mailman2.widget.com/mailman/listinfo/monitor-list>, " +
"<mailto:monitor-list-request@widget.com?subject=unsubscribe>",
list_unsubscribe)