require 'tempfile'
require 'time'
+require 'iconv'
module Redwood
## i would like, for example, to be able to add in a ruby-talk
## specific module that would detect and link to /ruby-talk:\d+/
## sequences in the text of an email. (how sweet would that be?)
-##
-## TODO: integrate with user's addressbook to render names
-## appropriately.
class Message
SNIPPET_LEN = 80
+ WRAP_LEN = 80 # wrap at this width
RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i
-
+
+ HookManager.register "mime-decode", <<EOS
+Executes when decoding a MIME attachment.
+Variables:
+ content_type: the content-type of the message
+ filename: the filename of the attachment as saved to disk (generated
+ on the fly, so don't call more than once)
+ sibling_types: if this attachment is part of a multipart MIME attachment,
+ an array of content-types for all attachments. Otherwise,
+ the empty array.
+Return value:
+ The decoded text of the attachment, or nil if not decoded.
+EOS
+#' stupid ruby-mode
+
## some utility methods
class << self
def normalize_subj s; s.gsub(RE_PATTERN, ""); end
end
class Attachment
- attr_reader :content_type, :desc, :filename
- def initialize content_type, desc, part
+ ## encoded_content is still possible MIME-encoded
+ ##
+ ## raw_content is after decoding but before being turned into
+ ## inlineable text.
+ ##
+ ## lines is array of inlineable text.
+
+ attr_reader :content_type, :filename, :lines, :raw_content
+
+ def initialize content_type, filename, encoded_content, sibling_types
@content_type = content_type
- @desc = desc
- @part = part
- @file = nil
- desc =~ /filename="(.*?)"/ && @filename = $1
+ @filename = filename
+ @raw_content = encoded_content.decode
+
+ @lines =
+ case @content_type
+ when /^text\/plain\b/
+ Message.convert_from(@raw_content, encoded_content.charset).split("\n")
+ else
+ text = HookManager.run "mime-decode", :content_type => content_type,
+ :filename => lambda { write_to_disk },
+ :sibling_types => sibling_types
+ text.split("\n") if text
+
+ end
end
- def view!
- unless @file
- @file = Tempfile.new "redwood.attachment"
- @file.print self
- @file.close
- end
+ def inlineable?; !@lines.nil? end
- ## TODO: handle unknown mime-types
- system "/usr/bin/run-mailcap --action=view #{@content_type}:#{@file.path}"
+ def view!
+ path = write_to_disk
+ system "/usr/bin/run-mailcap --action=view #{@content_type}:#{path} >& /dev/null"
+ $? == 0
+ end
+
+ ## used when viewing the attachment as text
+ def to_s
+ @lines || @raw_content
end
- def to_s; @part.decode; end
+ private
+
+ def write_to_disk
+ file = Tempfile.new "redwood.attachment"
+ file.print @raw_content
+ file.close
+ file.path
+ end
end
class Text
attr_reader :lines
def initialize lines
## do some wrapping
- @lines = lines.map { |l| l.wrap 80 }.flatten
+ @lines = lines.map { |l| l.chomp.wrap WRAP_LEN }.flatten
end
end
QUOTE_PATTERN = /^\s{0,4}[>|\}]/
BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/
QUOTE_START_PATTERN = /(^\s*Excerpts from)|(^\s*In message )|(^\s*In article )|(^\s*Quoting )|((wrote|writes|said|says)\s*:\s*$)/
- SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)/
+ SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)/
+
MAX_SIG_DISTANCE = 15 # lines from the end
- DEFAULT_SUBJECT = "(missing subject)"
+ DEFAULT_SUBJECT = ""
DEFAULT_SENDER = "(missing sender)"
attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source,
:cc, :bcc, :labels, :list_address, :recipient_email, :replyto,
- :source_info, :status
+ :source_info, :chunks
- bool_reader :dirty
+ bool_reader :dirty, :source_marked_read
- ## if index_entry is specified, will fill in values from that,
+ ## if you specify a :header, will use values from that. otherwise,
+ ## will try and load the header from the source.
def initialize opts
- @source = opts[:source]
- @source_info = opts[:source_info]
+ @source = opts[:source] or raise ArgumentError, "source can't be nil"
+ @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil"
@snippet = opts[:snippet] || ""
- @labels = opts[:labels] || []
+ @have_snippet = !opts[:snippet].nil?
+ @labels = [] + (opts[:labels] || [])
@dirty = false
+ @chunks = nil
- header =
- if opts[:header]
- opts[:header]
- else
- header = @source.load_header @source_info
- header.each { |k, v| header[k.downcase] = v }
- header
- end
+ parse_header(opts[:header] || @source.load_header(@source_info))
+ end
- %w(message-id date).each do |f|
- raise MessageFormatError, "no #{f} field in header #{header.inspect} (source #@source offset #@source_info)" unless header.include? f
- raise MessageFormatError, "nil #{f} field in header #{header.inspect} (source #@source offset #@source_info)" unless header[f]
- end
+ def parse_header header
+ header.each { |k, v| header[k.downcase] = v }
- begin
- @date = Time.parse header["date"]
- rescue ArgumentError => e
- raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}"
- end
+ @from = PersonManager.person_for header["from"]
- if(@subj = header["subject"])
- @subj = @subj.gsub(/\s+/, " ").gsub(/\s+$/, "")
- else
- @subj = DEFAULT_SUBJECT
- end
- @from = Person.for header["from"]
- @to = Person.for_several header["to"]
- @cc = Person.for_several header["cc"]
- @bcc = Person.for_several header["bcc"]
@id = header["message-id"]
- @refs = (header["references"] || "").scan(/<(.*?)>/).flatten
+ unless @id
+ @id = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
+ Redwood::log "faking message-id for message from #@from: #@id"
+ end
+
+ date = header["date"]
+ @date =
+ case date
+ when Time
+ date
+ when String
+ begin
+ Time.parse date
+ rescue ArgumentError => e
+ raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}"
+ end
+ else
+ Redwood::log "faking date header for #{@id}"
+ Time.now
+ end
+
+ @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
+ @to = PersonManager.people_for header["to"]
+ @cc = PersonManager.people_for header["cc"]
+ @bcc = PersonManager.people_for header["bcc"]
+ @refs = (header["references"] || "").gsub(/[<>]/, "").split(/\s+/).flatten
@replytos = (header["in-reply-to"] || "").scan(/<(.*?)>/).flatten
- @replyto = Person.for header["reply-to"]
+ @replyto = PersonManager.person_for header["reply-to"]
@list_address =
if header["list-post"]
- @list_address = Person.for header["list-post"].gsub(/^<mailto:|>$/, "")
+ @list_address = PersonManager.person_for header["list-post"].gsub(/^<mailto:|>$/, "")
else
nil
end
- @recipient_email = header["delivered-to"]
- @status = header["status"]
+ @recipient_email = header["envelope-to"] || header["x-original-to"] || header["delivered-to"]
+ @source_marked_read = header["status"] == "RO"
end
+ private :parse_header
- def snippet; @snippet || to_chunks && @snippet; end
+ def snippet; @snippet || chunks && @snippet; end
def is_list_message?; !@list_address.nil?; end
- def is_draft?; DraftLoader === @source; end
+ def is_draft?; @source.is_a? DraftLoader; end
def draft_filename
raise "not a draft" unless is_draft?
@source.fn_for_offset @source_info
end
def save index
- index.update_message self if @dirty
+ index.sync_message self if @dirty
@dirty = false
end
@dirty = true
end
- def to_chunks
- m = @source.load_message @source_info
- message_to_chunks m
+ ## this is called when the message body needs to actually be loaded.
+ def load_from_source!
+ @chunks ||=
+ if @source.has_errors?
+ [Text.new(error_message(@source.error.message.split("\n")))]
+ else
+ begin
+ ## we need to re-read the header because it contains information
+ ## that we don't store in the index. actually i think it's just
+ ## the mailing list address (if any), so this is kinda overkill.
+ ## i could just store that in the index, but i think there might
+ ## be other things like that in the future, and i'd rather not
+ ## bloat the index.
+ ## actually, it's also the differentiation between to/cc/bcc,
+ ## so i will keep this.
+ parse_header @source.load_header(@source_info)
+ message_to_chunks @source.load_message(@source_info)
+ rescue SourceError, SocketError, MessageFormatError => e
+ Redwood::log "problem getting messages from #{@source}: #{e.message}"
+ ## we need force_to_top here otherwise this window will cover
+ ## up the error message one
+ Redwood::report_broken_sources :force_to_top => true
+ [Text.new(error_message(e.message))]
+ end
+ end
+ end
+
+ def error_message msg
+ <<EOS
+#@snippet...
+
+***********************************************************************
+ An error occurred while loading this message. It is possible that
+ the source has changed, or (in the case of remote sources) is down.
+ You can check the log for errors, though hopefully an error window
+ should have popped up at some point.
+
+ The message location was:
+ #@source##@source_info
+***********************************************************************
+
+The error message was:
+ #{msg}
+EOS
+ end
+
+ def with_source_errors_handled
+ begin
+ yield
+ rescue SourceError => e
+ Redwood::log "problem getting messages from #{@source}: #{e.message}"
+ error_message e.message
+ end
end
def raw_header
- @source.raw_header @source_info
+ with_source_errors_handled { @source.raw_header @source_info }
end
def raw_full_message
- @source.raw_full_message @source_info
+ with_source_errors_handled { @source.raw_full_message @source_info }
+ end
+
+ ## much faster than raw_full_message
+ def each_raw_full_message_line &b
+ with_source_errors_handled { @source.each_raw_full_message_line(@source_info, &b) }
end
def content
+ load_from_source!
[
- from && from.longname,
- to.map { |p| p.longname },
- cc.map { |p| p.longname },
- bcc.map { |p| p.longname },
- to_chunks.select { |c| c.is_a? Text }.map { |c| c.lines },
- subj,
+ from && "#{from.name} #{from.email}",
+ to.map { |p| "#{p.name} #{p.email}" },
+ cc.map { |p| "#{p.name} #{p.email}" },
+ bcc.map { |p| "#{p.name} #{p.email}" },
+ chunks.select { |c| c.is_a? Text }.map { |c| c.lines },
+ Message.normalize_subj(subj),
].flatten.compact.join " "
end
def basic_body_lines
- to_chunks.find_all { |c| c.is_a?(Text) || c.is_a?(Quote) }.map { |c| c.lines }.flatten
+ chunks.find_all { |c| c.is_a?(Text) || c.is_a?(Quote) }.map { |c| c.lines }.flatten
end
def basic_header_lines
private
- ## everything RubyMail-specific goes here.
- def message_to_chunks m
- ret = [] <<
- case m.header.content_type
- when "text/plain", nil
- raise MessageFormatError, "no message body before decode" unless
- m.body
- body = m.decode or raise MessageFormatError, "no message body"
- text_to_chunks body.gsub(/\t/, " ").gsub(/\r/, "").split("\n")
- when /^multipart\//
- nil
+ ## here's where we handle decoding mime attachments. unfortunately
+ ## but unsurprisingly, the world of mime attachments is a bit of a
+ ## mess. as an empiricist, i'm basing the following behavior on
+ ## observed mail rather than on interpretations of rfcs, so probably
+ ## this will have to be tweaked.
+ ##
+ ## the general behavior i want is: ignore content-disposition, at
+ ## least in so far as it suggests something being inline vs being an
+ ## attachment. (because really, that should be the recipient's
+ ## decision to make.) if a mime part is text/plain, OR if the user
+ ## decoding hook converts it, then decode it and display it
+ ## inline. for these decoded attachments, if it has associated
+ ## filename, then make it collapsable and individually saveable;
+ ## otherwise, treat it as regular body text.
+ ##
+ ## everything else is just an attachment and is not displayed
+ ## inline.
+ ##
+ ## so, in contrast to mutt, the user is not exposed to the workings
+ ## of the gruesome slaughterhouse and sausage factory that is a
+ ## mime-encoded message, but need only see the delicious end
+ ## product.
+ def message_to_chunks m, sibling_types=[]
+ if m.multipart?
+ sibling_types = m.body.map { |p| p.header.content_type }
+ m.body.map { |p| message_to_chunks p, sibling_types }.flatten.compact # recurse
+ else
+ filename =
+ ## first, paw through the headers looking for a filename
+ if m.header["Content-Disposition"] &&
+ m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/
+ $1
+ elsif m.header["Content-Type"] &&
+ m.header["Content-Type"] =~ /name=(.*?)(;|$)/
+ $1
+
+ ## haven't found one, but it's a non-text message. fake
+ ## it.
+ elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/
+ "sup-attachment-#{Time.now.to_i}-#{rand 10000}"
+ end
+
+ ## if there's a filename, we'll treat it as an attachment.
+ if filename
+ [Attachment.new(m.header.content_type, filename, m, sibling_types)]
+
+ ## otherwise, it's body text
else
- disp = m.header["Content-Disposition"] || ""
- Attachment.new m.header.content_type, disp.gsub(/[\s\n]+/, " "), m
+ body = Message.convert_from m.decode, m.charset
+ text_to_chunks body.normalize_whitespace.split("\n")
end
-
- m.each_part { |p| ret << message_to_chunks(p) } if m.multipart?
- ret.compact.flatten
+ end
+ end
+
+ def self.convert_from body, charset
+ return body unless charset
+
+ begin
+ Iconv.iconv($encoding, charset, body).join
+ rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e
+ Redwood::log "warning: error (#{e.class.name}) decoding message body from #{charset}: #{e.message}"
+ File.open("sup-unable-to-decode.txt", "w") { |f| f.write body }
+ body
+ end
end
## parse the lines of text into chunk objects. the heuristics here
## need tweaking in some nice manner. TODO: move these heuristics
## into the classes themselves.
-
def text_to_chunks lines
state = :text # one of :text, :quote, or :sig
chunks = []
lines.each_with_index do |line, i|
nextline = lines[(i + 1) ... lines.length].find { |l| l !~ /^\s*$/ } # skip blank lines
+
case state
when :text
newstate = nil
+
if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && (nextline =~ QUOTE_PATTERN || nextline =~ QUOTE_START_PATTERN))
newstate = :quote
elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
elsif line =~ BLOCK_QUOTE_PATTERN
newstate = :block_quote
end
+
if newstate
chunks << Text.new(chunk_lines) unless chunk_lines.empty?
chunk_lines = [line]
else
chunk_lines << line
end
+
when :quote
newstate = nil
- if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN || line =~ /^\s*$/
+
+ if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN #|| line =~ /^\s*$/
chunk_lines << line
elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
newstate = :sig
else
newstate = :text
end
+
if newstate
if chunk_lines.empty?
# nothing
- elsif chunk_lines.size == 1
- chunks << Text.new(chunk_lines) # forget about one-line quotes
else
chunks << Quote.new(chunk_lines)
end
chunk_lines = [line]
state = newstate
end
- when :block_quote
- chunk_lines << line
- when :sig
+
+ when :block_quote, :sig
chunk_lines << line
end
- if state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) &&
- line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
- @snippet = (@snippet ? @snippet + " " : "") + line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
- @snippet = @snippet[0 ... SNIPPET_LEN]
+ if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
+ @snippet += " " unless @snippet.empty?
+ @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
+ @snippet = @snippet[0 ... SNIPPET_LEN].chomp
end
-# if @snippet.nil? && state == :text && (line.length > 40 ||
-# line =~ /\S+.*[^,!:]\s*$/)
-# @snippet = line.gsub(/^\s+/, "").gsub(/[\r\n]/, "")[0 .. 80]
-# end
end
## final object