X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmessage.rb;h=f640011e594efd1581b1dc5c4140af0d2b3508c7;hb=46f8e5116f38c8248fdc8553db18f8d2132a1f46;hp=e01e24582b7b99510f0ebfa8f857fff6b78cf8e2;hpb=fd7cdb7b0d424fbcb520fcf5625ee39d642d7a9c;p=sup diff --git a/lib/sup/message.rb b/lib/sup/message.rb index e01e245..f640011 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -1,10 +1,7 @@ require 'time' -require 'iconv' module Redwood -class MessageFormatError < StandardError; end - ## a Message is what's threaded. ## ## it is also where the parsing for quotes and signatures is done, but @@ -13,8 +10,8 @@ class MessageFormatError < StandardError; end ## specific module that would detect and link to /ruby-talk:\d+/ ## sequences in the text of an email. (how sweet would that be?) ## -## this class cathces all source exceptions. if the underlying source throws -## an error, it is caught and handled. +## this class catches all source exceptions. if the underlying source +## throws an error, it is caught and handled. class Message SNIPPET_LEN = 80 @@ -29,7 +26,6 @@ class Message QUOTE_PATTERN = /^\s{0,4}[>|\}]/ BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/ - QUOTE_START_PATTERN = /\w.*:$/ SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/ MAX_SIG_DISTANCE = 15 # lines from the end @@ -50,7 +46,7 @@ class Message @snippet = opts[:snippet] @snippet_contains_encrypted_content = false @have_snippet = !(opts[:snippet].nil? || opts[:snippet].empty?) - @labels = [] + (opts[:labels] || []) + @labels = Set.new(opts[:labels] || []) @dirty = false @encrypted = false @chunks = nil @@ -60,54 +56,54 @@ class Message ## why. @refs = [] - parse_header(opts[:header] || @source.load_header(@source_info)) + #parse_header(opts[:header] || @source.load_header(@source_info)) end def parse_header header - header.each { |k, v| header[k.downcase] = v } - - fakeid = nil - fakename = nil - - @id = - if header["message-id"] - sanitize_message_id header["message-id"] - else - fakeid = "sup-faked-" + Digest::MD5.hexdigest(raw_header) - end - - @from = - if header["from"] - PersonManager.person_for header["from"] - else - fakename = "Sup Auto-generated Fake Sender " - PersonManager.person_for fakename - end + ## forcibly decode these headers from and to the current encoding, + ## which serves to strip out characters that aren't displayable + ## (and which would otherwise be screwing up the display) + %w(from to subject cc bcc).each do |f| + header[f] = Iconv.easy_decode($encoding, $encoding, header[f]) if header[f] + end - Redwood::log "faking message-id for message from #@from: #{id}" if fakeid - Redwood::log "faking from for message #@id: #{fakename}" if fakename + @id = if header["message-id"] + mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"] + sanitize_message_id mid + else + id = "sup-faked-" + Digest::MD5.hexdigest(raw_header) + from = header["from"] + #debug "faking non-existent message-id for message from #{from}: #{id}" + id + end - date = header["date"] - @date = - case date - when Time - date - when String - begin - Time.parse date - rescue ArgumentError => e - Redwood::log "faking date header for #{@id} due to error parsing date #{header['date'].inspect}: #{e.message}" - Time.now - end - else - Redwood::log "faking date header for #{@id}" + @from = Person.from_address(if header["from"] + header["from"] + else + name = "Sup Auto-generated Fake Sender " + #debug "faking non-existent sender for message #@id: #{name}" + name + end) + + @date = case(date = header["date"]) + when Time + date + when String + begin + Time.parse date + rescue ArgumentError => e + #debug "faking mangled date header for #{@id} (orig #{header['date'].inspect} gave error: #{e.message})" Time.now end + else + #debug "faking non-existent date header for #{@id}" + Time.now + end @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT - @to = PersonManager.people_for header["to"] - @cc = PersonManager.people_for header["cc"] - @bcc = PersonManager.people_for header["bcc"] + @to = Person.from_address_list header["to"] + @cc = Person.from_address_list header["cc"] + @bcc = Person.from_address_list header["bcc"] ## before loading our full header from the source, we can actually ## have some extra refs set by the UI. (this happens when the user @@ -117,10 +113,10 @@ class Message @refs = (@refs + refs).uniq @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } - @replyto = PersonManager.person_for header["reply-to"] + @replyto = Person.from_address header["reply-to"] @list_address = if header["list-post"] - @list_address = PersonManager.person_for header["list-post"].gsub(/^$/, "") + @list_address = Person.from_address header["list-post"].gsub(/^$/, "") else nil end @@ -130,7 +126,31 @@ class Message @list_subscribe = header["list-subscribe"] @list_unsubscribe = header["list-unsubscribe"] end - private :parse_header + + ## Expected index entry format: + ## :message_id, :subject => String + ## :date => Time + ## :refs, :replytos => Array of String + ## :from => Person + ## :to, :cc, :bcc => Array of Person + def load_from_index! entry + @id = entry[:message_id] + @from = entry[:from] + @date = entry[:date] + @subj = entry[:subject] + @to = entry[:to] + @cc = entry[:cc] + @bcc = entry[:bcc] + @refs = (@refs + entry[:refs]).uniq + @replytos = entry[:replytos] + + @replyto = nil + @list_address = nil + @recipient_email = nil + @source_marked_read = false + @list_subscribe = nil + @list_unsubscribe = nil + end def add_ref ref @refs << ref @@ -162,22 +182,22 @@ class Message ## don't tempt me. def sanitize_message_id mid; mid.gsub(/(\s|[^\000-\177])+/, "")[0..254] end - def save index + def save_state index return unless @dirty - index.sync_message self + index.update_message_state self @dirty = false true end def has_label? t; @labels.member? t; end - def add_label t - return if @labels.member? t - @labels.push t + def add_label l + return if @labels.member? l + @labels << l @dirty = true end - def remove_label t - return unless @labels.member? t - @labels.delete t + def remove_label l + return unless @labels.member? l + @labels.delete l @dirty = true end @@ -186,6 +206,8 @@ class Message end def labels= l + raise ArgumentError, "not a set" unless l.is_a?(Set) + return if @labels == l @labels = l @dirty = true end @@ -198,7 +220,7 @@ class Message ## this is called when the message body needs to actually be loaded. def load_from_source! @chunks ||= - if @source.has_errors? + if @source.respond_to?(:has_errors?) && @source.has_errors? [Chunk::Text.new(error_message(@source.error.message).split("\n"))] else begin @@ -212,8 +234,8 @@ class Message ## so i will keep this. parse_header @source.load_header(@source_info) message_to_chunks @source.load_message(@source_info) - rescue SourceError, SocketError, MessageFormatError => e - Redwood::log "problem getting messages from #{@source}: #{e.message}" + rescue SourceError, SocketError => e + warn "problem getting messages from #{@source}: #{e.message}" ## we need force_to_top here otherwise this window will cover ## up the error message one @source.error ||= e @@ -247,7 +269,7 @@ EOS begin yield rescue SourceError => e - Redwood::log "problem getting messages from #{@source}: #{e.message}" + warn "problem getting messages from #{@source}: #{e.message}" @source.error ||= e Redwood::report_broken_sources :force_to_top => true error_message e.message @@ -275,11 +297,23 @@ EOS to.map { |p| p.indexable_content }, cc.map { |p| p.indexable_content }, bcc.map { |p| p.indexable_content }, - chunks.select { |c| c.is_a? Chunk::Text }.map { |c| c.lines }, - Message.normalize_subj(subj), + indexable_chunks.map { |c| c.lines }, + indexable_subject, ].flatten.compact.join " " end + def indexable_body + indexable_chunks.map { |c| c.lines }.flatten.compact.join " " + end + + def indexable_chunks + chunks.select { |c| c.is_a? Chunk::Text } + end + + def indexable_subject + Message.normalize_subj(subj) + end + def quotable_body_lines chunks.find_all { |c| c.quotable? }.map { |c| c.lines }.flatten end @@ -293,6 +327,12 @@ EOS "Subject: #{@subj}"] end + def self.build_from_source source, source_info + m = Message.new :source => source, :source_info => source_info + m.load_from_source! + m + end + private ## here's where we handle decoding mime attachments. unfortunately @@ -320,25 +360,25 @@ private def multipart_signed_to_chunks m if m.body.size != 2 - Redwood::log "warning: multipart/signed with #{m.body.size} parts (expecting 2)" + warn "multipart/signed with #{m.body.size} parts (expecting 2)" return end payload, signature = m.body if signature.multipart? - Redwood::log "warning: multipart/signed with payload multipart #{payload.multipart?} and signature multipart #{signature.multipart?}" + warn "multipart/signed with payload multipart #{payload.multipart?} and signature multipart #{signature.multipart?}" return end ## this probably will never happen if payload.header.content_type == "application/pgp-signature" - Redwood::log "warning: multipart/signed with payload content type #{payload.header.content_type}" + warn "multipart/signed with payload content type #{payload.header.content_type}" return end if signature.header.content_type != "application/pgp-signature" ## unknown signature type; just ignore. - #Redwood::log "warning: multipart/signed with signature content type #{signature.header.content_type}" + #warn "multipart/signed with signature content type #{signature.header.content_type}" return end @@ -347,31 +387,36 @@ private def multipart_encrypted_to_chunks m if m.body.size != 2 - Redwood::log "warning: multipart/encrypted with #{m.body.size} parts (expecting 2)" + warn "multipart/encrypted with #{m.body.size} parts (expecting 2)" return end control, payload = m.body if control.multipart? - Redwood::log "warning: multipart/encrypted with control multipart #{control.multipart?} and payload multipart #{payload.multipart?}" + warn "multipart/encrypted with control multipart #{control.multipart?} and payload multipart #{payload.multipart?}" return end if payload.header.content_type != "application/octet-stream" - Redwood::log "warning: multipart/encrypted with payload content type #{payload.header.content_type}" + warn "multipart/encrypted with payload content type #{payload.header.content_type}" return end if control.header.content_type != "application/pgp-encrypted" - Redwood::log "warning: multipart/encrypted with control content type #{signature.header.content_type}" + warn "multipart/encrypted with control content type #{signature.header.content_type}" return end - decryptedm, sig, notice = CryptoManager.decrypt payload - children = message_to_chunks(decryptedm, true) if decryptedm - [notice, sig, children].flatten.compact + notice, sig, decryptedm = CryptoManager.decrypt payload + if decryptedm # managed to decrypt + children = message_to_chunks(decryptedm, true) + [notice, sig, children] + else + [notice] + end end + ## takes a RMail::Message, breaks it into Chunk:: classes. def message_to_chunks m, encrypted=false, sibling_types=[] if m.multipart? chunks = @@ -389,11 +434,15 @@ private chunks elsif m.header.content_type == "message/rfc822" - payload = RMail::Parser.read(m.body) - from = payload.header.from.first - from_person = from ? PersonManager.person_for(from.format) : nil - [Chunk::EnclosedMessage.new(from_person, payload.to_s)] + - message_to_chunks(payload, encrypted) + if m.body + payload = RMail::Parser.read(m.body) + from = payload.header.from.first + from_person = from ? Person.from_address(from.format) : nil + [Chunk::EnclosedMessage.new(from_person, payload.to_s)] + + message_to_chunks(payload, encrypted) + else + [Chunk::EnclosedMessage.new(nil, "")] + end else filename = ## first, paw through the headers looking for a filename @@ -409,8 +458,8 @@ private elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/ extension = case m.header["Content-Type"] - when /text\/html/: "html" - when /image\/(.*)/: $1 + when /text\/html/ then "html" + when /image\/(.*)/ then $1 end ["sup-attachment-#{Time.now.to_i}-#{rand 10000}", extension].join(".") @@ -423,28 +472,20 @@ private # Lowercase the filename because searches are easier that way @attachments.push filename.downcase unless filename =~ /^sup-attachment-/ add_label :attachment unless filename =~ /^sup-attachment-/ - [Chunk::Attachment.new(m.header.content_type, filename, m, sibling_types)] + content_type = m.header.content_type || "application/unknown" # sometimes RubyMail gives us nil + [Chunk::Attachment.new(content_type, filename, m, sibling_types)] ## otherwise, it's body text else - body = Message.convert_from m.decode, m.charset if m.body + ## if there's no charset, use the current encoding as the charset. + ## this ensures that the body is normalized to avoid non-displayable + ## characters + body = Iconv.easy_decode($encoding, m.charset || $encoding, m.decode) if m.body text_to_chunks((body || "").normalize_whitespace.split("\n"), encrypted) end end end - def self.convert_from body, charset - begin - raise MessageFormatError, "RubyMail decode returned a null body" unless body - return body unless charset - Iconv.easy_decode($encoding, charset, body) - rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence, MessageFormatError => e - Redwood::log "warning: error (#{e.class.name}) decoding message body from #{charset}: #{e.message}" - File.open(File.join(BASE_DIR,"unable-to-decode.txt"), "w") { |f| f.write body } - body - end - end - ## parse the lines of text into chunk objects. the heuristics here ## need tweaking in some nice manner. TODO: move these heuristics ## into the classes themselves. @@ -460,7 +501,11 @@ private when :text newstate = nil - if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && nextline =~ QUOTE_PATTERN) + ## the following /:$/ followed by /\w/ is an attempt to detect the + ## start of a quote. this is split into two regexen because the + ## original regex /\w.*:$/ had very poor behavior on long lines + ## like ":a:a:a:a:a" that occurred in certain emails. + if line =~ QUOTE_PATTERN || (line =~ /:$/ && line =~ /\w/ && nextline =~ QUOTE_PATTERN) newstate = :quote elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig