X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmessage.rb;h=944dd88947207295e143fb3df1bf0c131ba83511;hb=e7afdc2e3d20bbf7d23cb64c93a1dda9e5a21a8f;hp=1c84d79d9881c6c19ffe08fdb41af795d67d42bc;hpb=df17a7174718b190d629a75e06aeccd7c47c0a72;p=sup diff --git a/lib/sup/message.rb b/lib/sup/message.rb index 1c84d79..944dd88 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -1,4 +1,3 @@ -require 'tempfile' require 'time' require 'iconv' @@ -30,7 +29,7 @@ class Message QUOTE_PATTERN = /^\s{0,4}[>|\}]/ BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/ - QUOTE_START_PATTERN = /(^\s*Excerpts from)|(^\s*In message )|(^\s*In article )|(^\s*Quoting )|((wrote|writes|said|says)\s*:\s*$)/ + QUOTE_START_PATTERN = /\w.*:$/ SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/ MAX_SIG_DISTANCE = 15 # lines from the end @@ -38,8 +37,8 @@ class Message DEFAULT_SENDER = "(missing sender)" attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source, - :cc, :bcc, :labels, :list_address, :recipient_email, :replyto, - :source_info, :chunks, :list_subscribe, :list_unsubscribe + :cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto, + :source_info, :list_subscribe, :list_unsubscribe bool_reader :dirty, :source_marked_read, :snippet_contains_encrypted_content @@ -55,31 +54,39 @@ class Message @dirty = false @encrypted = false @chunks = nil + @attachments = [] + + ## we need to initialize this. see comments in parse_header as to + ## why. + @refs = [] parse_header(opts[:header] || @source.load_header(@source_info)) end def parse_header header header.each { |k, v| header[k.downcase] = v } - + + fakeid = nil + fakename = nil + @id = if header["message-id"] sanitize_message_id header["message-id"] else - returning("sup-faked-" + Digest::MD5.hexdigest(raw_header)) do |id| - Redwood::log "faking message-id for message from #@from: #{id}" - end + fakeid = "sup-faked-" + Digest::MD5.hexdigest(raw_header) end @from = if header["from"] PersonManager.person_for header["from"] else - name = "Sup Auto-generated Fake Sender " - Redwood::log "faking from for message #@id: #{name}" - PersonManager.person_for name + fakename = "Sup Auto-generated Fake Sender " + PersonManager.person_for fakename end + Redwood::log "faking message-id for message from #@from: #{id}" if fakeid + Redwood::log "faking from for message #@id: #{fakename}" if fakename + date = header["date"] @date = case date @@ -89,7 +96,8 @@ class Message begin Time.parse date rescue ArgumentError => e - raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}" + Redwood::log "faking date header for #{@id} due to error parsing date #{header['date'].inspect}: #{e.message}" + Time.now end else Redwood::log "faking date header for #{@id}" @@ -100,7 +108,13 @@ class Message @to = PersonManager.people_for header["to"] @cc = PersonManager.people_for header["cc"] @bcc = PersonManager.people_for header["bcc"] - @refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } + + ## before loading our full header from the source, we can actually + ## have some extra refs set by the UI. (this happens when the user + ## joins threads manually). so we will merge the current refs values + ## in here. + refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } + @refs = (@refs + refs).uniq @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } @replyto = PersonManager.person_for header["reply-to"] @@ -118,6 +132,15 @@ class Message end private :parse_header + def add_ref ref + @refs << ref + @dirty = true + end + + def remove_ref ref + @dirty = true if @refs.delete ref + end + def snippet; @snippet || (chunks && @snippet); end def is_list_message?; !@list_address.nil?; end def is_draft?; @source.is_a? DraftLoader; end @@ -126,11 +149,24 @@ class Message @source.fn_for_offset @source_info end - def sanitize_message_id mid; mid.gsub(/\s/, "") end + ## sanitize message ids by removing spaces and non-ascii characters. + ## also, truncate to 255 characters. all these steps are necessary + ## to make ferret happy. of course, we probably fuck up a couple + ## valid message ids as well. as long as we're consistent, this + ## should be fine, though. + ## + ## also, mostly the message ids that are changed by this belong to + ## spam email. + ## + ## an alternative would be to SHA1 or MD5 all message ids on a regular basis. + ## don't tempt me. + def sanitize_message_id mid; mid.gsub(/(\s|[^\000-\177])+/, "")[0..254] end def save index - index.sync_message self if @dirty + return unless @dirty + index.sync_message self @dirty = false + true end def has_label? t; @labels.member? t; end @@ -154,11 +190,16 @@ class Message @dirty = true end + def chunks + load_from_source! + @chunks + end + ## this is called when the message body needs to actually be loaded. def load_from_source! @chunks ||= if @source.has_errors? - [Chunk::Text.new(error_message(@source.error.message.split("\n")))] + [Chunk::Text.new(error_message(@source.error.message).split("\n"))] else begin ## we need to re-read the header because it contains information @@ -177,7 +218,7 @@ class Message ## up the error message one @source.error ||= e Redwood::report_broken_sources :force_to_top => true - [Chunk::Text.new(error_message(e.message))] + [Chunk::Text.new(error_message(e.message).split("\n"))] end end end @@ -226,13 +267,14 @@ EOS with_source_errors_handled { @source.each_raw_message_line(@source_info, &b) } end - def content + ## returns all the content from a message that will be indexed + def indexable_content load_from_source! [ - from && "#{from.name} #{from.email}", - to.map { |p| "#{p.name} #{p.email}" }, - cc.map { |p| "#{p.name} #{p.email}" }, - bcc.map { |p| "#{p.name} #{p.email}" }, + from && from.indexable_content, + to.map { |p| p.indexable_content }, + cc.map { |p| p.indexable_content }, + bcc.map { |p| p.indexable_content }, chunks.select { |c| c.is_a? Chunk::Text }.map { |c| c.lines }, Message.normalize_subj(subj), ].flatten.compact.join " " @@ -350,7 +392,8 @@ private payload = RMail::Parser.read(m.body) from = payload.header.from.first from_person = from ? PersonManager.person_for(from.format) : nil - [Chunk::EnclosedMessage.new(from_person, payload.to_s)] + [Chunk::EnclosedMessage.new(from_person, payload.to_s)] + + message_to_chunks(payload, encrypted) else filename = ## first, paw through the headers looking for a filename @@ -375,24 +418,30 @@ private ## if there's a filename, we'll treat it as an attachment. if filename + # add this to the attachments list if its not a generated html + # attachment (should we allow images with generated names?). + # Lowercase the filename because searches are easier that way + @attachments.push filename.downcase unless filename =~ /^sup-attachment-/ + add_label :attachment unless filename =~ /^sup-attachment-/ [Chunk::Attachment.new(m.header.content_type, filename, m, sibling_types)] ## otherwise, it's body text else - body = Message.convert_from m.decode, m.charset - text_to_chunks (body || "").normalize_whitespace.split("\n"), encrypted + body = Message.convert_from m.decode, m.charset if m.body + text_to_chunks((body || "").normalize_whitespace.split("\n"), encrypted) end end end def self.convert_from body, charset + charset = "utf-8" if charset =~ /UTF_?8/i begin raise MessageFormatError, "RubyMail decode returned a null body" unless body return body unless charset Iconv.iconv($encoding + "//IGNORE", charset, body + " ").join[0 .. -2] rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence, MessageFormatError => e Redwood::log "warning: error (#{e.class.name}) decoding message body from #{charset}: #{e.message}" - File.open("sup-unable-to-decode.txt", "w") { |f| f.write body } + File.open(File.join(BASE_DIR,"unable-to-decode.txt"), "w") { |f| f.write body } body end end @@ -412,7 +461,7 @@ private when :text newstate = nil - if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && (nextline =~ QUOTE_PATTERN || nextline =~ QUOTE_START_PATTERN)) + if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && nextline =~ QUOTE_PATTERN) newstate = :quote elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig @@ -431,7 +480,7 @@ private when :quote newstate = nil - if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN #|| line =~ /^\s*$/ + if line =~ QUOTE_PATTERN || (line =~ /^\s*$/ && nextline =~ QUOTE_PATTERN) chunk_lines << line elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig