Merge branch 'better-buffer-list' into next

[sup] / lib / sup / message.rb
diff --git a/lib/sup/message.rb b/lib/sup/message.rb

index 0ef1a615438a0fce9fa0f4bb2721af13917566b2..0ee46fb25d7ecbb97807910c9fa0785337bccc2c 100644 (file)
--- a/lib/sup/message.rb
+++ b/lib/sup/message.rb
@@ -1,4 +1,3 @@
-require 'tempfile'
  require 'time'
  require 'iconv'
  
@@ -13,6 +12,10 @@ class MessageFormatError < StandardError; end
  ## i would like, for example, to be able to add in a ruby-talk
  ## specific module that would detect and link to /ruby-talk:\d+/
  ## sequences in the text of an email. (how sweet would that be?)
+##
+## this class cathces all source exceptions. if the underlying source throws
+## an error, it is caught and handled.
+
  class Message
    SNIPPET_LEN = 80
    RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i
@@ -26,7 +29,7 @@ class Message
  
    QUOTE_PATTERN = /^\s{0,4}[>|\}]/
    BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/
-  QUOTE_START_PATTERN = /(^\s*Excerpts from)|(^\s*In message )|(^\s*In article )|(^\s*Quoting )|((wrote|writes|said|says)\s*:\s*$)/
+  QUOTE_START_PATTERN = /\w.*:$/
    SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/
  
    MAX_SIG_DISTANCE = 15 # lines from the end
@@ -34,46 +37,56 @@ class Message
    DEFAULT_SENDER = "(missing sender)"
  
    attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source,
-              :cc, :bcc, :labels, :list_address, :recipient_email, :replyto,
-              :source_info, :chunks, :list_subscribe, :list_unsubscribe
+              :cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto,
+              :source_info, :list_subscribe, :list_unsubscribe
  
-  bool_reader :dirty, :source_marked_read
+  bool_reader :dirty, :source_marked_read, :snippet_contains_encrypted_content
  
    ## if you specify a :header, will use values from that. otherwise,
    ## will try and load the header from the source.
    def initialize opts
      @source = opts[:source] or raise ArgumentError, "source can't be nil"
      @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil"
-    @snippet = opts[:snippet] || ""
-    @have_snippet = !opts[:snippet].nil?
+    @snippet = opts[:snippet]
+    @snippet_contains_encrypted_content = false
+    @have_snippet = !(opts[:snippet].nil? || opts[:snippet].empty?)
      @labels = [] + (opts[:labels] || [])
      @dirty = false
+    @encrypted = false
      @chunks = nil
+    @attachments = []
+
+    ## we need to initialize this. see comments in parse_header as to
+    ## why.
+    @refs = []
  
      parse_header(opts[:header] || @source.load_header(@source_info))
    end
  
    def parse_header header
-    header.each { |k, v| header[k.downcase] = v }
-    
+    header.keys.each { |k| header[k.downcase] = header[k] } # canonicalize
+
+    fakeid = nil
+    fakename = nil
+
      @id =
        if header["message-id"]
          sanitize_message_id header["message-id"]
        else
-        returning("sup-faked-" + Digest::MD5.hexdigest(raw_header)) do |id|
-          Redwood::log "faking message-id for message from #@from: #{id}"
-        end
+        fakeid = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
        end
      
      @from =
        if header["from"]
-        PersonManager.person_for header["from"]
+        Person.from_address header["from"]
        else
-        name = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
-        Redwood::log "faking from for message #@id: #{name}"
-        PersonManager.person_for name
+        fakename = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
+        Person.from_address fakename
        end
  
+    Redwood::log "faking message-id for message from #@from: #{id}" if fakeid
+    Redwood::log "faking from for message #@id: #{fakename}" if fakename
+
      date = header["date"]
      @date =
        case date
@@ -83,7 +96,8 @@ class Message
          begin
            Time.parse date
          rescue ArgumentError => e
-          raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}"
+          Redwood::log "faking date header for #{@id} due to error parsing date #{header['date'].inspect}: #{e.message}"
+          Time.now
          end
        else
          Redwood::log "faking date header for #{@id}"
@@ -91,16 +105,22 @@ class Message
        end
  
      @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
-    @to = PersonManager.people_for header["to"]
-    @cc = PersonManager.people_for header["cc"]
-    @bcc = PersonManager.people_for header["bcc"]
-    @refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
+    @to = Person.from_address_list header["to"]
+    @cc = Person.from_address_list header["cc"]
+    @bcc = Person.from_address_list header["bcc"]
+
+    ## before loading our full header from the source, we can actually
+    ## have some extra refs set by the UI. (this happens when the user
+    ## joins threads manually). so we will merge the current refs values
+    ## in here.
+    refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
+    @refs = (@refs + refs).uniq
      @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
  
-    @replyto = PersonManager.person_for header["reply-to"]
+    @replyto = Person.from_address header["reply-to"]
      @list_address =
        if header["list-post"]
-        @list_address = PersonManager.person_for header["list-post"].gsub(/^<mailto:|>$/, "")
+        @list_address = Person.from_address header["list-post"].gsub(/^<mailto:|>$/, "")
        else
          nil
        end
@@ -112,7 +132,16 @@ class Message
    end
    private :parse_header
  
-  def snippet; @snippet || chunks && @snippet; end
+  def add_ref ref
+    @refs << ref
+    @dirty = true
+  end
+
+  def remove_ref ref
+    @dirty = true if @refs.delete ref
+  end
+
+  def snippet; @snippet || (chunks && @snippet); end
    def is_list_message?; !@list_address.nil?; end
    def is_draft?; @source.is_a? DraftLoader; end
    def draft_filename
@@ -120,11 +149,24 @@ class Message
      @source.fn_for_offset @source_info
    end
  
-  def sanitize_message_id mid; mid.gsub(/\s/, "") end
+  ## sanitize message ids by removing spaces and non-ascii characters.
+  ## also, truncate to 255 characters. all these steps are necessary
+  ## to make ferret happy. of course, we probably fuck up a couple
+  ## valid message ids as well. as long as we're consistent, this
+  ## should be fine, though.
+  ##
+  ## also, mostly the message ids that are changed by this belong to
+  ## spam email.
+  ##
+  ## an alternative would be to SHA1 or MD5 all message ids on a regular basis.
+  ## don't tempt me.
+  def sanitize_message_id mid; mid.gsub(/(\s|[^\000-\177])+/, "")[0..254] end
  
    def save index
-    index.sync_message self if @dirty
+    return unless @dirty
+    index.sync_message self
      @dirty = false
+    true
    end
  
    def has_label? t; @labels.member? t; end
@@ -148,11 +190,16 @@ class Message
      @dirty = true
    end
  
+  def chunks
+    load_from_source!
+    @chunks
+  end
+
    ## this is called when the message body needs to actually be loaded.
    def load_from_source!
      @chunks ||=
        if @source.has_errors?
-        [Chunk::Text.new(error_message(@source.error.message.split("\n")))]
+        [Chunk::Text.new(error_message(@source.error.message).split("\n"))]
        else
          begin
            ## we need to re-read the header because it contains information
@@ -169,8 +216,9 @@ class Message
            Redwood::log "problem getting messages from #{@source}: #{e.message}"
            ## we need force_to_top here otherwise this window will cover
            ## up the error message one
+          @source.error ||= e
            Redwood::report_broken_sources :force_to_top => true
-          [Chunk::Text.new(error_message(e.message))]
+          [Chunk::Text.new(error_message(e.message).split("\n"))]
          end
        end
    end
@@ -194,11 +242,14 @@ The error message was:
  EOS
    end
  
+  ## wrap any source methods that might throw sourceerrors
    def with_source_errors_handled
      begin
        yield
      rescue SourceError => e
        Redwood::log "problem getting messages from #{@source}: #{e.message}"
+      @source.error ||= e
+      Redwood::report_broken_sources :force_to_top => true
        error_message e.message
      end
    end
@@ -216,13 +267,14 @@ EOS
      with_source_errors_handled { @source.each_raw_message_line(@source_info, &b) }
    end
  
-  def content
+  ## returns all the content from a message that will be indexed
+  def indexable_content
      load_from_source!
      [
-      from && "#{from.name} #{from.email}",
-      to.map { |p| "#{p.name} #{p.email}" },
-      cc.map { |p| "#{p.name} #{p.email}" },
-      bcc.map { |p| "#{p.name} #{p.email}" },
+      from && from.indexable_content,
+      to.map { |p| p.indexable_content },
+      cc.map { |p| p.indexable_content },
+      bcc.map { |p| p.indexable_content },
        chunks.select { |c| c.is_a? Chunk::Text }.map { |c| c.lines },
        Message.normalize_subj(subj),
      ].flatten.compact.join " "
@@ -267,7 +319,6 @@ private
    ## product.
  
    def multipart_signed_to_chunks m
-#    Redwood::log ">> multipart SIGNED: #{m.header['Content-Type']}: #{m.body.size}"
      if m.body.size != 2
        Redwood::log "warning: multipart/signed with #{m.body.size} parts (expecting 2)"
        return
@@ -279,13 +330,15 @@ private
        return
      end
  
+    ## this probably will never happen
      if payload.header.content_type == "application/pgp-signature"
        Redwood::log "warning: multipart/signed with payload content type #{payload.header.content_type}"
        return
      end
  
      if signature.header.content_type != "application/pgp-signature"
-      Redwood::log "warning: multipart/signed with signature content type #{signature.header.content_type}"
+      ## unknown signature type; just ignore.
+      #Redwood::log "warning: multipart/signed with signature content type #{signature.header.content_type}"
        return
      end
  
@@ -293,7 +346,6 @@ private
    end
  
    def multipart_encrypted_to_chunks m
-    Redwood::log ">> multipart ENCRYPTED: #{m.header['Content-Type']}: #{m.body.size}"
      if m.body.size != 2
        Redwood::log "warning: multipart/encrypted with #{m.body.size} parts (expecting 2)"
        return
@@ -316,11 +368,11 @@ private
      end
  
      decryptedm, sig, notice = CryptoManager.decrypt payload
-    children = message_to_chunks(decryptedm) if decryptedm
+    children = message_to_chunks(decryptedm, true) if decryptedm
      [notice, sig, children].flatten.compact
    end
  
-  def message_to_chunks m, sibling_types=[]
+  def message_to_chunks m, encrypted=false, sibling_types=[]
      if m.multipart?
        chunks =
          case m.header.content_type
@@ -332,39 +384,51 @@ private
  
        unless chunks
          sibling_types = m.body.map { |p| p.header.content_type }
-        chunks = m.body.map { |p| message_to_chunks p, sibling_types }.flatten.compact
+        chunks = m.body.map { |p| message_to_chunks p, encrypted, sibling_types }.flatten.compact
        end
  
        chunks
      elsif m.header.content_type == "message/rfc822"
        payload = RMail::Parser.read(m.body)
        from = payload.header.from.first
-      from_person = from ? PersonManager.person_for(from.format) : nil
-      [Chunk::EnclosedMessage.new(from_person, payload.to_s)]
+      from_person = from ? Person.from_address(from.format) : nil
+      [Chunk::EnclosedMessage.new(from_person, payload.to_s)] +
+        message_to_chunks(payload, encrypted)
      else
        filename =
          ## first, paw through the headers looking for a filename
-        if m.header["Content-Disposition"] &&
-            m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/
+        if m.header["Content-Disposition"] && m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/
            $1
-        elsif m.header["Content-Type"] &&
-            m.header["Content-Type"] =~ /name=(.*?)(;|$)/
+        elsif m.header["Content-Type"] && m.header["Content-Type"] =~ /name="?(.*?[^\\])("|;|$)/
            $1
  
          ## haven't found one, but it's a non-text message. fake
          ## it.
+        ##
+        ## TODO: make this less lame.
          elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/
-          "sup-attachment-#{Time.now.to_i}-#{rand 10000}"
+          extension =
+            case m.header["Content-Type"]
+            when /text\/html/: "html"
+            when /image\/(.*)/: $1
+            end
+
+          ["sup-attachment-#{Time.now.to_i}-#{rand 10000}", extension].join(".")
          end
  
        ## if there's a filename, we'll treat it as an attachment.
        if filename
+        # add this to the attachments list if its not a generated html
+        # attachment (should we allow images with generated names?).
+        # Lowercase the filename because searches are easier that way 
+        @attachments.push filename.downcase unless filename =~ /^sup-attachment-/
+        add_label :attachment unless filename =~ /^sup-attachment-/
          [Chunk::Attachment.new(m.header.content_type, filename, m, sibling_types)]
  
        ## otherwise, it's body text
        else
-        body = Message.convert_from m.decode, m.charset
-        text_to_chunks body.normalize_whitespace.split("\n")
+        body = Message.convert_from m.decode, m.charset if m.body
+        text_to_chunks((body || "").normalize_whitespace.split("\n"), encrypted)
        end
      end
    end
@@ -373,10 +437,10 @@ private
      begin
        raise MessageFormatError, "RubyMail decode returned a null body" unless body
        return body unless charset
-      Iconv.iconv($encoding, charset, body).join
+      Iconv.easy_decode($encoding, charset, body)
      rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence, MessageFormatError => e
        Redwood::log "warning: error (#{e.class.name}) decoding message body from #{charset}: #{e.message}"
-      File.open("sup-unable-to-decode.txt", "w") { |f| f.write body }
+      File.open(File.join(BASE_DIR,"unable-to-decode.txt"), "w") { |f| f.write body }
        body
      end
    end
@@ -384,7 +448,7 @@ private
    ## parse the lines of text into chunk objects.  the heuristics here
    ## need tweaking in some nice manner. TODO: move these heuristics
    ## into the classes themselves.
-  def text_to_chunks lines
+  def text_to_chunks lines, encrypted
      state = :text # one of :text, :quote, or :sig
      chunks = []
      chunk_lines = []
@@ -396,7 +460,7 @@ private
        when :text
          newstate = nil
  
-        if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && (nextline =~ QUOTE_PATTERN || nextline =~ QUOTE_START_PATTERN))
+        if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && nextline =~ QUOTE_PATTERN)
            newstate = :quote
          elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
            newstate = :sig
@@ -415,7 +479,7 @@ private
        when :quote
          newstate = nil
  
-        if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN #|| line =~ /^\s*$/
+        if line =~ QUOTE_PATTERN || (line =~ /^\s*$/ && nextline =~ QUOTE_PATTERN)
            chunk_lines << line
          elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
            newstate = :sig
@@ -436,11 +500,14 @@ private
        when :block_quote, :sig
          chunk_lines << line
        end
- 
+
        if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
+        @snippet ||= ""
          @snippet += " " unless @snippet.empty?
          @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
          @snippet = @snippet[0 ... SNIPPET_LEN].chomp
+        @dirty = true unless encrypted && $config[:discard_snippets_from_encrypted_messages]
+        @snippet_contains_encrypted_content = true if encrypted
        end
      end