X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmessage.rb;h=c06a03fa547a3d6c96dd19945fae0934b907c925;hb=073e1f40024d7721ada08bce7c38d87883ec62f6;hp=ca7efad4cd0d4d25d734978abe61d6414decca80;hpb=b675cd3d9c8674390fd23c54ecc8f1ff001a6999;p=sup diff --git a/lib/sup/message.rb b/lib/sup/message.rb index ca7efad..c06a03f 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -1,4 +1,3 @@ -require 'tempfile' require 'time' require 'iconv' @@ -13,6 +12,10 @@ class MessageFormatError < StandardError; end ## i would like, for example, to be able to add in a ruby-talk ## specific module that would detect and link to /ruby-talk:\d+/ ## sequences in the text of an email. (how sweet would that be?) +## +## this class cathces all source exceptions. if the underlying source throws +## an error, it is caught and handled. + class Message SNIPPET_LEN = 80 RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i @@ -26,7 +29,7 @@ class Message QUOTE_PATTERN = /^\s{0,4}[>|\}]/ BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/ - QUOTE_START_PATTERN = /(^\s*Excerpts from)|(^\s*In message )|(^\s*In article )|(^\s*Quoting )|((wrote|writes|said|says)\s*:\s*$)/ + QUOTE_START_PATTERN = /\w.*:$/ SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/ MAX_SIG_DISTANCE = 15 # lines from the end @@ -35,45 +38,54 @@ class Message attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source, :cc, :bcc, :labels, :list_address, :recipient_email, :replyto, - :source_info, :chunks, :list_subscribe, :list_unsubscribe + :source_info, :list_subscribe, :list_unsubscribe - bool_reader :dirty, :source_marked_read + bool_reader :dirty, :source_marked_read, :snippet_contains_encrypted_content ## if you specify a :header, will use values from that. otherwise, ## will try and load the header from the source. def initialize opts @source = opts[:source] or raise ArgumentError, "source can't be nil" @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil" - @snippet = opts[:snippet] || "" - @have_snippet = !opts[:snippet].nil? + @snippet = opts[:snippet] + @snippet_contains_encrypted_content = false + @have_snippet = !(opts[:snippet].nil? || opts[:snippet].empty?) @labels = [] + (opts[:labels] || []) @dirty = false + @encrypted = false @chunks = nil + ## we need to initialize this. see comments in parse_header as to + ## why. + @refs = [] + parse_header(opts[:header] || @source.load_header(@source_info)) end def parse_header header header.each { |k, v| header[k.downcase] = v } - + + fakeid = nil + fakename = nil + @id = if header["message-id"] sanitize_message_id header["message-id"] else - returning("sup-faked-" + Digest::MD5.hexdigest(raw_header)) do |id| - Redwood::log "faking message-id for message from #@from: #{id}" - end + fakeid = "sup-faked-" + Digest::MD5.hexdigest(raw_header) end @from = if header["from"] PersonManager.person_for header["from"] else - name = "Sup Auto-generated Fake Sender " - Redwood::log "faking from for message #@id: #{name}" - PersonManager.person_for name + fakename = "Sup Auto-generated Fake Sender " + PersonManager.person_for fakename end + Redwood::log "faking message-id for message from #@from: #{id}" if fakeid + Redwood::log "faking from for message #@id: #{fakename}" if fakename + date = header["date"] @date = case date @@ -83,7 +95,8 @@ class Message begin Time.parse date rescue ArgumentError => e - raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}" + Redwood::log "faking date header for #{@id} due to error parsing date #{header['date'].inspect}: #{e.message}" + Time.now end else Redwood::log "faking date header for #{@id}" @@ -94,7 +107,13 @@ class Message @to = PersonManager.people_for header["to"] @cc = PersonManager.people_for header["cc"] @bcc = PersonManager.people_for header["bcc"] - @refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } + + ## before loading our full header from the source, we can actually + ## have some extra refs set by the UI. (this happens when the user + ## joins threads manually). so we will merge the current refs values + ## in here. + refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } + @refs = (@refs + refs).uniq @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } @replyto = PersonManager.person_for header["reply-to"] @@ -112,7 +131,16 @@ class Message end private :parse_header - def snippet; @snippet || chunks && @snippet; end + def add_ref ref + @refs << ref + @dirty = true + end + + def remove_ref ref + @dirty = true if @refs.delete ref + end + + def snippet; @snippet || (chunks && @snippet); end def is_list_message?; !@list_address.nil?; end def is_draft?; @source.is_a? DraftLoader; end def draft_filename @@ -123,8 +151,10 @@ class Message def sanitize_message_id mid; mid.gsub(/\s/, "") end def save index - index.sync_message self if @dirty + return unless @dirty + index.sync_message self @dirty = false + true end def has_label? t; @labels.member? t; end @@ -148,11 +178,16 @@ class Message @dirty = true end + def chunks + load_from_source! + @chunks + end + ## this is called when the message body needs to actually be loaded. def load_from_source! @chunks ||= if @source.has_errors? - [Chunk::Text.new(error_message(@source.error.message.split("\n")))] + [Chunk::Text.new(error_message(@source.error.message).split("\n"))] else begin ## we need to re-read the header because it contains information @@ -169,8 +204,9 @@ class Message Redwood::log "problem getting messages from #{@source}: #{e.message}" ## we need force_to_top here otherwise this window will cover ## up the error message one + @source.error ||= e Redwood::report_broken_sources :force_to_top => true - [Chunk::Text.new(error_message(e.message))] + [Chunk::Text.new(error_message(e.message).split("\n"))] end end end @@ -194,11 +230,14 @@ The error message was: EOS end + ## wrap any source methods that might throw sourceerrors def with_source_errors_handled begin yield rescue SourceError => e Redwood::log "problem getting messages from #{@source}: #{e.message}" + @source.error ||= e + Redwood::report_broken_sources :force_to_top => true error_message e.message end end @@ -216,23 +255,24 @@ EOS with_source_errors_handled { @source.each_raw_message_line(@source_info, &b) } end - def content + ## returns all the content from a message that will be indexed + def indexable_content load_from_source! [ - from && "#{from.name} #{from.email}", - to.map { |p| "#{p.name} #{p.email}" }, - cc.map { |p| "#{p.name} #{p.email}" }, - bcc.map { |p| "#{p.name} #{p.email}" }, + from && from.indexable_content, + to.map { |p| p.indexable_content }, + cc.map { |p| p.indexable_content }, + bcc.map { |p| p.indexable_content }, chunks.select { |c| c.is_a? Chunk::Text }.map { |c| c.lines }, Message.normalize_subj(subj), ].flatten.compact.join " " end - def basic_body_lines - chunks.find_all { |c| c.is_a?(Chunk::Text) || c.is_a?(Chunk::Quote) }.map { |c| c.lines }.flatten + def quotable_body_lines + chunks.find_all { |c| c.quotable? }.map { |c| c.lines }.flatten end - def basic_header_lines + def quotable_header_lines ["From: #{@from.full_address}"] + (@to.empty? ? [] : ["To: " + @to.map { |p| p.full_address }.join(", ")]) + (@cc.empty? ? [] : ["Cc: " + @cc.map { |p| p.full_address }.join(", ")]) + @@ -267,7 +307,6 @@ private ## product. def multipart_signed_to_chunks m -# Redwood::log ">> multipart SIGNED: #{m.header['Content-Type']}: #{m.body.size}" if m.body.size != 2 Redwood::log "warning: multipart/signed with #{m.body.size} parts (expecting 2)" return @@ -279,13 +318,15 @@ private return end + ## this probably will never happen if payload.header.content_type == "application/pgp-signature" Redwood::log "warning: multipart/signed with payload content type #{payload.header.content_type}" return end if signature.header.content_type != "application/pgp-signature" - Redwood::log "warning: multipart/signed with signature content type #{signature.header.content_type}" + ## unknown signature type; just ignore. + #Redwood::log "warning: multipart/signed with signature content type #{signature.header.content_type}" return end @@ -293,7 +334,6 @@ private end def multipart_encrypted_to_chunks m - Redwood::log ">> multipart ENCRYPTED: #{m.header['Content-Type']}: #{m.body.size}" if m.body.size != 2 Redwood::log "warning: multipart/encrypted with #{m.body.size} parts (expecting 2)" return @@ -316,11 +356,11 @@ private end decryptedm, sig, notice = CryptoManager.decrypt payload - children = message_to_chunks(decryptedm) if decryptedm + children = message_to_chunks(decryptedm, true) if decryptedm [notice, sig, children].flatten.compact end - def message_to_chunks m, sibling_types=[] + def message_to_chunks m, encrypted=false, sibling_types=[] if m.multipart? chunks = case m.header.content_type @@ -332,7 +372,7 @@ private unless chunks sibling_types = m.body.map { |p| p.header.content_type } - chunks = m.body.map { |p| message_to_chunks p, sibling_types }.flatten.compact + chunks = m.body.map { |p| message_to_chunks p, encrypted, sibling_types }.flatten.compact end chunks @@ -344,17 +384,23 @@ private else filename = ## first, paw through the headers looking for a filename - if m.header["Content-Disposition"] && - m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/ + if m.header["Content-Disposition"] && m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/ $1 - elsif m.header["Content-Type"] && - m.header["Content-Type"] =~ /name=(.*?)(;|$)/ + elsif m.header["Content-Type"] && m.header["Content-Type"] =~ /name="?(.*?[^\\])("|;|$)/ $1 ## haven't found one, but it's a non-text message. fake ## it. + ## + ## TODO: make this less lame. elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/ - "sup-attachment-#{Time.now.to_i}-#{rand 10000}" + extension = + case m.header["Content-Type"] + when /text\/html/: "html" + when /image\/(.*)/: $1 + end + + ["sup-attachment-#{Time.now.to_i}-#{rand 10000}", extension].join(".") end ## if there's a filename, we'll treat it as an attachment. @@ -363,17 +409,18 @@ private ## otherwise, it's body text else - body = Message.convert_from m.decode, m.charset - text_to_chunks body.normalize_whitespace.split("\n") + body = Message.convert_from m.decode, m.charset if m.body + text_to_chunks((body || "").normalize_whitespace.split("\n"), encrypted) end end end def self.convert_from body, charset + charset = "utf-8" if charset =~ /UTF_?8/i begin raise MessageFormatError, "RubyMail decode returned a null body" unless body return body unless charset - Iconv.iconv($encoding, charset, body).join + Iconv.iconv($encoding + "//IGNORE", charset, body + " ").join[0 .. -2] rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence, MessageFormatError => e Redwood::log "warning: error (#{e.class.name}) decoding message body from #{charset}: #{e.message}" File.open("sup-unable-to-decode.txt", "w") { |f| f.write body } @@ -384,7 +431,7 @@ private ## parse the lines of text into chunk objects. the heuristics here ## need tweaking in some nice manner. TODO: move these heuristics ## into the classes themselves. - def text_to_chunks lines + def text_to_chunks lines, encrypted state = :text # one of :text, :quote, or :sig chunks = [] chunk_lines = [] @@ -396,7 +443,7 @@ private when :text newstate = nil - if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && (nextline =~ QUOTE_PATTERN || nextline =~ QUOTE_START_PATTERN)) + if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && nextline =~ QUOTE_PATTERN) newstate = :quote elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig @@ -415,7 +462,7 @@ private when :quote newstate = nil - if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN #|| line =~ /^\s*$/ + if line =~ QUOTE_PATTERN || (line =~ /^\s*$/ && nextline =~ QUOTE_PATTERN) chunk_lines << line elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig @@ -436,11 +483,14 @@ private when :block_quote, :sig chunk_lines << line end - + if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/ + @snippet ||= "" @snippet += " " unless @snippet.empty? @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ") @snippet = @snippet[0 ... SNIPPET_LEN].chomp + @dirty = true unless encrypted && $config[:discard_snippets_from_encrypted_messages] + @snippet_contains_encrypted_content = true if encrypted end end