git.cworth.org Git - sup/blob - lib/sup/message.rb

   1 require 'time'
   2
   3 module Redwood
   4
   5 ## a Message is what's threaded.
   6 ##
   7 ## it is also where the parsing for quotes and signatures is done, but
   8 ## that should be moved out to a separate class at some point (because
   9 ## i would like, for example, to be able to add in a ruby-talk
  10 ## specific module that would detect and link to /ruby-talk:\d+/
  11 ## sequences in the text of an email. (how sweet would that be?)
  12 ##
  13 ## this class catches all source exceptions. if the underlying source
  14 ## throws an error, it is caught and handled.
  15
  16 class Message
  17   SNIPPET_LEN = 80
  18   RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i
  19
  20   ## some utility methods
  21   class << self
  22     def normalize_subj s; s.gsub(RE_PATTERN, ""); end
  23     def subj_is_reply? s; s =~ RE_PATTERN; end
  24     def reify_subj s; subj_is_reply?(s) ? s : "Re: " + s; end
  25   end
  26
  27   QUOTE_PATTERN = /^\s{0,4}[>|\}]/
  28   BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/
  29   SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/
  30
  31   MAX_SIG_DISTANCE = 15 # lines from the end
  32   DEFAULT_SUBJECT = ""
  33   DEFAULT_SENDER = "(missing sender)"
  34
  35   attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source,
  36               :cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto,
  37               :source_info, :list_subscribe, :list_unsubscribe
  38
  39   bool_reader :dirty, :source_marked_read, :snippet_contains_encrypted_content
  40
  41   ## if you specify a :header, will use values from that. otherwise,
  42   ## will try and load the header from the source.
  43   def initialize opts
  44     @source = opts[:source] or raise ArgumentError, "source can't be nil"
  45     @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil"
  46     @snippet = opts[:snippet]
  47     @snippet_contains_encrypted_content = false
  48     @have_snippet = !(opts[:snippet].nil? || opts[:snippet].empty?)
  49     @labels = Set.new(opts[:labels] || [])
  50     @dirty = false
  51     @encrypted = false
  52     @chunks = nil
  53     @attachments = []
  54
  55     ## we need to initialize this. see comments in parse_header as to
  56     ## why.
  57     @refs = []
  58
  59     #parse_header(opts[:header] || @source.load_header(@source_info))
  60   end
  61
  62   def parse_header header
  63     ## forcibly decode these headers from and to the current encoding,
  64     ## which serves to strip out characters that aren't displayable
  65     ## (and which would otherwise be screwing up the display)
  66     %w(from to subject cc bcc).each do |f|
  67       header[f] = Iconv.easy_decode($encoding, $encoding, header[f]) if header[f]
  68     end
  69
  70     @id = if header["message-id"]
  71       mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"]
  72       sanitize_message_id mid
  73     else
  74       id = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
  75       from = header["from"]
  76       #debug "faking non-existent message-id for message from #{from}: #{id}"
  77       id
  78     end
  79
  80     @from = Person.from_address(if header["from"]
  81       header["from"]
  82     else
  83       name = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
  84       #debug "faking non-existent sender for message #@id: #{name}"
  85       name
  86     end)
  87
  88     @date = case(date = header["date"])
  89     when Time
  90       date
  91     when String
  92       begin
  93         Time.parse date
  94       rescue ArgumentError => e
  95         #debug "faking mangled date header for #{@id} (orig #{header['date'].inspect} gave error: #{e.message})"
  96         Time.now
  97       end
  98     else
  99       #debug "faking non-existent date header for #{@id}"
 100       Time.now
 101     end
 102
 103     @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
 104     @to = Person.from_address_list header["to"]
 105     @cc = Person.from_address_list header["cc"]
 106     @bcc = Person.from_address_list header["bcc"]
 107
 108     ## before loading our full header from the source, we can actually
 109     ## have some extra refs set by the UI. (this happens when the user
 110     ## joins threads manually). so we will merge the current refs values
 111     ## in here.
 112     refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
 113     @refs = (@refs + refs).uniq
 114     @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
 115
 116     @replyto = Person.from_address header["reply-to"]
 117     @list_address =
 118       if header["list-post"]
 119         @list_address = Person.from_address header["list-post"].gsub(/^<mailto:|>$/, "")
 120       else
 121         nil
 122       end
 123
 124     @recipient_email = header["envelope-to"] || header["x-original-to"] || header["delivered-to"]
 125     @source_marked_read = header["status"] == "RO"
 126     @list_subscribe = header["list-subscribe"]
 127     @list_unsubscribe = header["list-unsubscribe"]
 128   end
 129
 130   def add_ref ref
 131     @refs << ref
 132     @dirty = true
 133   end
 134
 135   def remove_ref ref
 136     @dirty = true if @refs.delete ref
 137   end
 138
 139   def snippet; @snippet || (chunks && @snippet); end
 140   def is_list_message?; !@list_address.nil?; end
 141   def is_draft?; @source.is_a? DraftLoader; end
 142   def draft_filename
 143     raise "not a draft" unless is_draft?
 144     @source.fn_for_offset @source_info
 145   end
 146
 147   ## sanitize message ids by removing spaces and non-ascii characters.
 148   ## also, truncate to 255 characters. all these steps are necessary
 149   ## to make ferret happy. of course, we probably fuck up a couple
 150   ## valid message ids as well. as long as we're consistent, this
 151   ## should be fine, though.
 152   ##
 153   ## also, mostly the message ids that are changed by this belong to
 154   ## spam email.
 155   ##
 156   ## an alternative would be to SHA1 or MD5 all message ids on a regular basis.
 157   ## don't tempt me.
 158   def sanitize_message_id mid; mid.gsub(/(\s|[^\000-\177])+/, "")[0..254] end
 159
 160   def save_state index
 161     return unless @dirty
 162     index.update_message_state self
 163     @dirty = false
 164     true
 165   end
 166
 167   def has_label? t; @labels.member? t; end
 168   def add_label l
 169     return if @labels.member? l
 170     @labels << l
 171     @dirty = true
 172   end
 173   def remove_label l
 174     return unless @labels.member? l
 175     @labels.delete l
 176     @dirty = true
 177   end
 178
 179   def recipients
 180     @to + @cc + @bcc
 181   end
 182
 183   def labels= l
 184     raise ArgumentError, "not a set" unless l.is_a?(Set)
 185     return if @labels == l
 186     @labels = l
 187     @dirty = true
 188   end
 189
 190   def chunks
 191     load_from_source!
 192     @chunks
 193   end
 194
 195   ## this is called when the message body needs to actually be loaded.
 196   def load_from_source!
 197     @chunks ||=
 198       if @source.respond_to?(:has_errors?) && @source.has_errors?
 199         [Chunk::Text.new(error_message(@source.error.message).split("\n"))]
 200       else
 201         begin
 202           ## we need to re-read the header because it contains information
 203           ## that we don't store in the index. actually i think it's just
 204           ## the mailing list address (if any), so this is kinda overkill.
 205           ## i could just store that in the index, but i think there might
 206           ## be other things like that in the future, and i'd rather not
 207           ## bloat the index.
 208           ## actually, it's also the differentiation between to/cc/bcc,
 209           ## so i will keep this.
 210           parse_header @source.load_header(@source_info)
 211           message_to_chunks @source.load_message(@source_info)
 212         rescue SourceError, SocketError => e
 213           warn "problem getting messages from #{@source}: #{e.message}"
 214           ## we need force_to_top here otherwise this window will cover
 215           ## up the error message one
 216           @source.error ||= e
 217           Redwood::report_broken_sources :force_to_top => true
 218           [Chunk::Text.new(error_message(e.message).split("\n"))]
 219         end
 220       end
 221   end
 222
 223   def error_message msg
 224     <<EOS
 225 #@snippet...
 226
 227 ***********************************************************************
 228  An error occurred while loading this message. It is possible that
 229  the source has changed, or (in the case of remote sources) is down.
 230  You can check the log for errors, though hopefully an error window
 231  should have popped up at some point.
 232
 233  The message location was:
 234  #@source##@source_info
 235 ***********************************************************************
 236
 237 The error message was:
 238   #{msg}
 239 EOS
 240   end
 241
 242   ## wrap any source methods that might throw sourceerrors
 243   def with_source_errors_handled
 244     begin
 245       yield
 246     rescue SourceError => e
 247       warn "problem getting messages from #{@source}: #{e.message}"
 248       @source.error ||= e
 249       Redwood::report_broken_sources :force_to_top => true
 250       error_message e.message
 251     end
 252   end
 253
 254   def raw_header
 255     with_source_errors_handled { @source.raw_header @source_info }
 256   end
 257
 258   def raw_message
 259     with_source_errors_handled { @source.raw_message @source_info }
 260   end
 261
 262   ## much faster than raw_message
 263   def each_raw_message_line &b
 264     with_source_errors_handled { @source.each_raw_message_line(@source_info, &b) }
 265   end
 266
 267   ## returns all the content from a message that will be indexed
 268   def indexable_content
 269     load_from_source!
 270     [
 271       from && from.indexable_content,
 272       to.map { |p| p.indexable_content },
 273       cc.map { |p| p.indexable_content },
 274       bcc.map { |p| p.indexable_content },
 275       indexable_chunks.map { |c| c.lines },
 276       indexable_subject,
 277     ].flatten.compact.join " "
 278   end
 279
 280   def indexable_body
 281     indexable_chunks.map { |c| c.lines }.flatten.compact.join " "
 282   end
 283
 284   def indexable_chunks
 285     chunks.select { |c| c.is_a? Chunk::Text }
 286   end
 287
 288   def indexable_subject
 289     Message.normalize_subj(subj)
 290   end
 291
 292   def quotable_body_lines
 293     chunks.find_all { |c| c.quotable? }.map { |c| c.lines }.flatten
 294   end
 295
 296   def quotable_header_lines
 297     ["From: #{@from.full_address}"] +
 298       (@to.empty? ? [] : ["To: " + @to.map { |p| p.full_address }.join(", ")]) +
 299       (@cc.empty? ? [] : ["Cc: " + @cc.map { |p| p.full_address }.join(", ")]) +
 300       (@bcc.empty? ? [] : ["Bcc: " + @bcc.map { |p| p.full_address }.join(", ")]) +
 301       ["Date: #{@date.rfc822}",
 302        "Subject: #{@subj}"]
 303   end
 304
 305   def self.build_from_source source, source_info
 306     m = Message.new :source => source, :source_info => source_info
 307     m.load_from_source!
 308     m
 309   end
 310
 311 private
 312
 313   ## here's where we handle decoding mime attachments. unfortunately
 314   ## but unsurprisingly, the world of mime attachments is a bit of a
 315   ## mess. as an empiricist, i'm basing the following behavior on
 316   ## observed mail rather than on interpretations of rfcs, so probably
 317   ## this will have to be tweaked.
 318   ##
 319   ## the general behavior i want is: ignore content-disposition, at
 320   ## least in so far as it suggests something being inline vs being an
 321   ## attachment. (because really, that should be the recipient's
 322   ## decision to make.) if a mime part is text/plain, OR if the user
 323   ## decoding hook converts it, then decode it and display it
 324   ## inline. for these decoded attachments, if it has associated
 325   ## filename, then make it collapsable and individually saveable;
 326   ## otherwise, treat it as regular body text.
 327   ##
 328   ## everything else is just an attachment and is not displayed
 329   ## inline.
 330   ##
 331   ## so, in contrast to mutt, the user is not exposed to the workings
 332   ## of the gruesome slaughterhouse and sausage factory that is a
 333   ## mime-encoded message, but need only see the delicious end
 334   ## product.
 335
 336   def multipart_signed_to_chunks m
 337     if m.body.size != 2
 338       warn "multipart/signed with #{m.body.size} parts (expecting 2)"
 339       return
 340     end
 341
 342     payload, signature = m.body
 343     if signature.multipart?
 344       warn "multipart/signed with payload multipart #{payload.multipart?} and signature multipart #{signature.multipart?}"
 345       return
 346     end
 347
 348     ## this probably will never happen
 349     if payload.header.content_type == "application/pgp-signature"
 350       warn "multipart/signed with payload content type #{payload.header.content_type}"
 351       return
 352     end
 353
 354     if signature.header.content_type != "application/pgp-signature"
 355       ## unknown signature type; just ignore.
 356       #warn "multipart/signed with signature content type #{signature.header.content_type}"
 357       return
 358     end
 359
 360     [CryptoManager.verify(payload, signature), message_to_chunks(payload)].flatten.compact
 361   end
 362
 363   def multipart_encrypted_to_chunks m
 364     if m.body.size != 2
 365       warn "multipart/encrypted with #{m.body.size} parts (expecting 2)"
 366       return
 367     end
 368
 369     control, payload = m.body
 370     if control.multipart?
 371       warn "multipart/encrypted with control multipart #{control.multipart?} and payload multipart #{payload.multipart?}"
 372       return
 373     end
 374
 375     if payload.header.content_type != "application/octet-stream"
 376       warn "multipart/encrypted with payload content type #{payload.header.content_type}"
 377       return
 378     end
 379
 380     if control.header.content_type != "application/pgp-encrypted"
 381       warn "multipart/encrypted with control content type #{signature.header.content_type}"
 382       return
 383     end
 384
 385     notice, sig, decryptedm = CryptoManager.decrypt payload
 386     if decryptedm # managed to decrypt
 387       children = message_to_chunks(decryptedm, true)
 388       [notice, sig, children]
 389     else
 390       [notice]
 391     end
 392   end
 393
 394   ## takes a RMail::Message, breaks it into Chunk:: classes.
 395   def message_to_chunks m, encrypted=false, sibling_types=[]
 396     if m.multipart?
 397       chunks =
 398         case m.header.content_type
 399         when "multipart/signed"
 400           multipart_signed_to_chunks m
 401         when "multipart/encrypted"
 402           multipart_encrypted_to_chunks m
 403         end
 404
 405       unless chunks
 406         sibling_types = m.body.map { |p| p.header.content_type }
 407         chunks = m.body.map { |p| message_to_chunks p, encrypted, sibling_types }.flatten.compact
 408       end
 409
 410       chunks
 411     elsif m.header.content_type == "message/rfc822"
 412       payload = RMail::Parser.read(m.body)
 413       from = payload.header.from.first
 414       from_person = from ? Person.from_address(from.format) : nil
 415       [Chunk::EnclosedMessage.new(from_person, payload.to_s)] +
 416         message_to_chunks(payload, encrypted)
 417     else
 418       filename =
 419         ## first, paw through the headers looking for a filename
 420         if m.header["Content-Disposition"] && m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/
 421           $1
 422         elsif m.header["Content-Type"] && m.header["Content-Type"] =~ /name="?(.*?[^\\])("|;|$)/
 423           $1
 424
 425         ## haven't found one, but it's a non-text message. fake
 426         ## it.
 427         ##
 428         ## TODO: make this less lame.
 429         elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/
 430           extension =
 431             case m.header["Content-Type"]
 432             when /text\/html/ then "html"
 433             when /image\/(.*)/ then $1
 434             end
 435
 436           ["sup-attachment-#{Time.now.to_i}-#{rand 10000}", extension].join(".")
 437         end
 438
 439       ## if there's a filename, we'll treat it as an attachment.
 440       if filename
 441         # add this to the attachments list if its not a generated html
 442         # attachment (should we allow images with generated names?).
 443         # Lowercase the filename because searches are easier that way
 444         @attachments.push filename.downcase unless filename =~ /^sup-attachment-/
 445         add_label :attachment unless filename =~ /^sup-attachment-/
 446         content_type = m.header.content_type || "application/unknown" # sometimes RubyMail gives us nil
 447         [Chunk::Attachment.new(content_type, filename, m, sibling_types)]
 448
 449       ## otherwise, it's body text
 450       else
 451         ## if there's no charset, use the current encoding as the charset.
 452         ## this ensures that the body is normalized to avoid non-displayable
 453         ## characters
 454         body = Iconv.easy_decode($encoding, m.charset || $encoding, m.decode) if m.body
 455         text_to_chunks((body || "").normalize_whitespace.split("\n"), encrypted)
 456       end
 457     end
 458   end
 459
 460   ## parse the lines of text into chunk objects.  the heuristics here
 461   ## need tweaking in some nice manner. TODO: move these heuristics
 462   ## into the classes themselves.
 463   def text_to_chunks lines, encrypted
 464     state = :text # one of :text, :quote, or :sig
 465     chunks = []
 466     chunk_lines = []
 467
 468     lines.each_with_index do |line, i|
 469       nextline = lines[(i + 1) ... lines.length].find { |l| l !~ /^\s*$/ } # skip blank lines
 470
 471       case state
 472       when :text
 473         newstate = nil
 474
 475         ## the following /:$/ followed by /\w/ is an attempt to detect the
 476         ## start of a quote. this is split into two regexen because the
 477         ## original regex /\w.*:$/ had very poor behavior on long lines
 478         ## like ":a:a:a:a:a" that occurred in certain emails.
 479         if line =~ QUOTE_PATTERN || (line =~ /:$/ && line =~ /\w/ && nextline =~ QUOTE_PATTERN)
 480           newstate = :quote
 481         elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
 482           newstate = :sig
 483         elsif line =~ BLOCK_QUOTE_PATTERN
 484           newstate = :block_quote
 485         end
 486
 487         if newstate
 488           chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty?
 489           chunk_lines = [line]
 490           state = newstate
 491         else
 492           chunk_lines << line
 493         end
 494
 495       when :quote
 496         newstate = nil
 497
 498         if line =~ QUOTE_PATTERN || (line =~ /^\s*$/ && nextline =~ QUOTE_PATTERN)
 499           chunk_lines << line
 500         elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
 501           newstate = :sig
 502         else
 503           newstate = :text
 504         end
 505
 506         if newstate
 507           if chunk_lines.empty?
 508             # nothing
 509           else
 510             chunks << Chunk::Quote.new(chunk_lines)
 511           end
 512           chunk_lines = [line]
 513           state = newstate
 514         end
 515
 516       when :block_quote, :sig
 517         chunk_lines << line
 518       end
 519
 520       if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
 521         @snippet ||= ""
 522         @snippet += " " unless @snippet.empty?
 523         @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
 524         @snippet = @snippet[0 ... SNIPPET_LEN].chomp
 525         @dirty = true unless encrypted && $config[:discard_snippets_from_encrypted_messages]
 526         @snippet_contains_encrypted_content = true if encrypted
 527       end
 528     end
 529
 530     ## final object
 531     case state
 532     when :quote, :block_quote
 533       chunks << Chunk::Quote.new(chunk_lines) unless chunk_lines.empty?
 534     when :text
 535       chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty?
 536     when :sig
 537       chunks << Chunk::Signature.new(chunk_lines) unless chunk_lines.empty?
 538     end
 539     chunks
 540   end
 541 end
 542
 543 end