-require 'date'
+## Herein is all the code responsible for threading messages. I use an
+## online version of the JWZ threading algorithm:
+## http://www.jwz.org/doc/threading.html
+##
+## I certainly didn't implement it for efficiency, but thanks to our
+## search engine backend, it's typically not applied to very many
+## messages at once.
+
+## At the top level, we have a ThreadSet. A ThreadSet represents a set
+## of threads, e.g. a message folder or an inbox. Each ThreadSet
+## contains zero or more Threads. A Thread represents all the message
+## related to a particular subject. Each Thread has one or more
+## Containers. A Container is a recursive structure that holds the
+## tree structure as determined by the references: and in-reply-to:
+## headers. A Thread with multiple Containers occurs if they have the
+## same subject, but (most likely due to someone using a primitive
+## MUA) we don't have evidence from in-reply-to: or references:
+## headers, only subject: (and thus our tree is probably broken). A
+## Container holds zero or one message. In the case of no message, it
+## means we've seen a reference to the message but haven't seen the
+## message itself (yet).
module Redwood
attr_reader :containers
def initialize
+ ## ah, the joys of a multithreaded application with a class called
+ ## "Thread". i keep instantiating the wrong one...
+ raise "wrong Thread class, buddy!" if block_given?
@containers = []
end
puts "=== end thread ==="
end
- ## yields each message and some stuff
+ ## yields each message, its depth, and its parent. note that the
+ ## message can be a Message object, or :fake_root, or nil.
def each fake_root=false
adj = 0
root = @containers.find_all { |c| !Message.subj_is_reply?(c) }.argmin { |c| c.date }
end
end
+ def first; each { |m, *o| return m if m }; nil; end
def dirty?; any? { |m, *o| m && m.dirty? }; end
def date; map { |m, *o| m.date if m }.compact.max; end
def snippet; argfind { |m, *o| m && m.snippet }; end
(c = @messages[m.id]) && c.root.thread
end
- def delete_empties
- @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? }
+ def delete_cruft
+ @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? || v.subj != k }
end
- private :delete_empties
+ private :delete_cruft
- def threads; delete_empties; @subj_thread.values; end
- def size; delete_empties; @subj_thread.size; end
+ def threads; delete_cruft; @subj_thread.values; end
+ def size; delete_cruft; @subj_thread.size; end
def dump
@subj_thread.each do |s, t|
if root == oldroot
if oldroot.thread
- # puts "*** root (#{root.subj}) == oldroot (#{oldroot.subj}); ignoring"
+ ## check to see if the subject is still the same (in the case
+ ## that we first added a child message with a different
+ ## subject)
+
+ ## this code is duplicated below. sorry! TODO: refactor
+ s = Message.normalize_subj(root.subj)
+ unless @subj_thread[s] == root.thread
+ ## Redwood::log "[1] moving thread to new subject #{root.subj}"
+ if @subj_thread[s]
+ @subj_thread[s] << root
+ root.thread = @subj_thread[s]
+ else
+ @subj_thread[s] = root.thread
+ end
+ end
+
else
## to disable subject grouping, use the next line instead
## (and the same for below)
- Redwood::log "[1] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}"
+ #Redwood::log "[1] for #{root}, subject #{Message.normalize_subj(root.subj)} has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread"
thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
#thread = (@subj_thread[root.id] ||= Thread.new)
thread << root
root.thread = thread
- # puts "# (1) added #{root} to #{thread}"
+ # Redwood::log "[1] added #{root} to #{thread}"
end
else
if oldroot.thread
## new root. need to drop old one and put this one in its place
- # puts "*** DROPPING #{oldroot} from #{oldroot.thread}"
oldroot.thread.drop oldroot
oldroot.thread = nil
end
if root.thread
- # puts "*** IGNORING cuz root already has a thread"
+ ## check to see if the subject is still the same (in the case
+ ## that we first added a child message with a different
+ ## subject)
+ s = Message.normalize_subj(root.subj)
+ unless @subj_thread[s] == root.thread
+ # Redwood::log "[2] moving thread to new subject #{root.subj}"
+ if @subj_thread[s]
+ @subj_thread[s] << root
+ root.thread = @subj_thread[s]
+ else
+ @subj_thread[s] = root.thread
+ end
+ end
+
else
## to disable subject grouping, use the next line instead
## (and the same above)
- Redwood::log "[2] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}"
+
+ ## this code is duplicated above. sorry! TODO: refactor
+ # Redwood::log "[2] for #{root}, subject '#{Message.normalize_subj(root.subj)}' has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread"
+
thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
#thread = (@subj_thread[root.id] ||= Thread.new)
thread << root
root.thread = thread
- # puts "# (2) added #{root} to #{thread}"
+ # Redwood::log "[2] added #{root} to #{thread}"
end
end