-require 'date'
+## Herein lies all the code responsible for threading messages. It's
+## basically an online version of the JWZ threading algorithm:
+## http://www.jwz.org/doc/threading.html
+##
+## I didn't implement it for efficiency, but thanks to our search
+## engine backend, it's typically not applied to very many messages at
+## once.
+##
+## At the top level, we have a ThreadSet, which represents a set of
+## threads, e.g. a message folder or an inbox. Each ThreadSet contains
+## zero or more Threads. A Thread represents all the message related
+## to a particular subject. Each Thread has one or more Containers. A
+## Container is a recursive structure that holds the message tree as
+## determined by the references: and in-reply-to: headers. Each
+## Container holds zero or one messages. In the case of zero messages,
+## it means we've seen a reference to the message but haven't (yet)
+## seen the message itself.
+##
+## A Thread can have multiple top-level Containers if we decide to
+## group them together independent of tree structure, typically if
+## (e.g. due to someone using a primitive MUA) the messages have the
+## same subject but we don't have evidence from in-reply-to: or
+## references: headers. In this case Thread#each can optionally yield
+## a faked root object tying them all together into one tree
+## structure.
+
+require 'set'
module Redwood
attr_reader :containers
def initialize
- raise "wrong thread, buddy!" if block_given?
+ ## ah, the joys of a multithreaded application with a class called
+ ## "Thread". i keep instantiating the wrong one...
+ raise "wrong Thread class, buddy!" if block_given?
@containers = []
end
end
def empty?; @containers.empty?; end
-
- def drop c
- raise "bad drop" unless @containers.member? c
- @containers.delete c
+ def empty!; @containers.clear; end
+ def drop c; @containers.delete(c) or raise "bad drop"; end
+
+ ## unused
+ def dump f=$stdout
+ f.puts "=== start thread with #{@containers.length} trees ==="
+ @containers.each { |c| c.dump_recursive f; f.puts }
+ f.puts "=== end thread ==="
end
- def dump
- puts "=== start thread #{self} with #{@containers.length} trees ==="
- @containers.each { |c| c.dump_recursive }
- puts "=== end thread ==="
- end
-
- ## yields each message and some stuff
+ ## yields each message, its depth, and its parent. the message yield
+ ## parameter can be a Message object, or :fake_root, or nil (no
+ ## message found but the presence of one deduced from other
+ ## messages).
def each fake_root=false
adj = 0
- root = @containers.find_all { |c| !Message.subj_is_reply?(c) }.argmin { |c| c.date }
+ root = @containers.find_all { |c| c.message && !Message.subj_is_reply?(c.message.subj) }.argmin { |c| c.date }
if root
adj = 1
end
end
+ def first; each { |m, *o| return m if m }; nil; end
def dirty?; any? { |m, *o| m && m.dirty? }; end
def date; map { |m, *o| m.date if m }.compact.max; end
- def snippet; argfind { |m, *o| m && m.snippet }; end
+ def snippet
+ with_snippets = select { |m, *o| m && m.snippet && !m.snippet.empty? }
+ first_unread, * = with_snippets.select { |m, *o| m.has_label?(:unread) }.sort_by { |m, *o| m.date }.first
+ return first_unread.snippet if first_unread
+ last_read, * = with_snippets.sort_by { |m, *o| m.date }.last
+ return last_read.snippet if last_read
+ ""
+ end
def authors; map { |m, *o| m.from if m }.compact.uniq; end
def apply_label t; each { |m, *o| m && m.add_label(t) }; end
- def remove_label t
- each { |m, *o| m && m.remove_label(t) }
- end
+ def remove_label t; each { |m, *o| m && m.remove_label(t) }; end
def toggle_label label
if has_label? label
remove_label label
- return false
+ false
else
apply_label label
- return true
+ true
end
end
def set_labels l; each { |m, *o| m && m.labels = l }; end
-
def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end
- def save index; each { |m, *o| m && m.save(index) }; end
+ def save_state index; each { |m, *o| m && m.save_state(index) }; end
def direct_participants
map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq
def size; map { |m, *o| m ? 1 : 0 }.sum; end
def subj; argfind { |m, *o| m && m.subj }; end
- def labels
- map { |m, *o| m && m.labels }.flatten.compact.uniq.sort_by { |t| t.to_s }
- end
+ def labels; inject(Set.new) { |s, (m, *o)| m ? s | m.labels : s } end
def labels= l
- each { |m, *o| m && m.labels = l.clone }
+ raise ArgumentError, "not a set" unless l.is_a?(Set)
+ each { |m, *o| m && m.labels = l.dup }
end
def latest_message
- inject(nil) do |a, b|
+ inject(nil) do |a, b|
b = b.first
if a.nil?
b
@id = id
@message, @parent, @thread = nil, nil, nil
@children = []
- end
+ end
def each_with_stuff parent=nil
yield self, 0, parent
def root?; @parent.nil?; end
def root; root? ? self : @parent.root; end
+ ## skip over any containers which are empty and have only one child. we use
+ ## this make the threaded display a little nicer, and only stick in the
+ ## "missing message" line when it's graphically necessary, i.e. when the
+ ## missing message has more than one descendent.
def first_useful_descendant
if empty? && @children.size == 1
@children.first.first_useful_descendant
def subj; find_attr :subj; end
def date; find_attr :date; end
- def is_reply?; subj && Message.subject_is_reply?(subj); end
+ def is_reply?; subj && Message.subj_is_reply?(subj); end
def to_s
[ "<#{id}",
].compact.join(" ") + ">"
end
- def dump_recursive indent=0, root=true, parent=nil
+ def dump_recursive f=$stdout, indent=0, root=true, parent=nil
raise "inconsistency" unless parent.nil? || parent.children.include?(self)
unless root
- print " " * indent
- print "+->"
+ f.print " " * indent
+ f.print "+->"
end
- line = #"[#{useful? ? 'U' : ' '}] " +
+ line = "[#{thread.nil? ? ' ' : '*'}] " + #"[#{useful? ? 'U' : ' '}] " +
if @message
- "[#{thread}] #{@message.subj} " ##{@message.refs.inspect} / #{@message.replytos.inspect}"
+ message.subj ##{@message.refs.inspect} / #{@message.replytos.inspect}"
else
"<no message>"
end
- puts "#{id} #{line}"#[0 .. (105 - indent)]
+ f.puts "#{id} #{line}"#[0 .. (105 - indent)]
indent += 3
- @children.each { |c| c.dump_recursive indent, false, self }
+ @children.each { |c| c.dump_recursive f, indent, false, self }
end
end
-## a set of threads (so a forest). builds the thread structures by
-## reading messages from an index.
+## A set of threads, so a forest. Is integrated with the index and
+## builds thread structures by reading messages from it.
+##
+## If 'thread_by_subj' is true, puts messages with the same subject in
+## one thread, even if they don't reference each other. This is
+## helpful for crappy MUAs that don't set In-reply-to: or References:
+## headers, but means that messages may be threaded unnecessarily.
+##
+## The following invariants are maintained: every Thread has at least one
+## Container tree, and every Container tree has at least one Message.
class ThreadSet
attr_reader :num_messages
+ bool_reader :thread_by_subj
- def initialize index
+ def initialize index, thread_by_subj=true
@index = index
@num_messages = 0
- @messages = {} ## map from message ids to container objects
- @subj_thread = {} ## map from subject strings to thread objects
+ ## map from message ids to container objects
+ @messages = SavingHash.new { |id| Container.new id }
+ ## map from subject strings or (or root message ids) to thread objects
+ @threads = SavingHash.new { Thread.new }
+ @thread_by_subj = thread_by_subj
end
- def contains_id? id; @messages.member?(id) && !@messages[id].empty?; end
- def thread_for m
- (c = @messages[m.id]) && c.root.thread
- end
-
- def delete_empties
- @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? }
- end
- private :delete_empties
+ def thread_for_id mid; @messages.member?(mid) && @messages[mid].root.thread end
+ def contains_id? id; @messages.member?(id) && !@messages[id].empty? end
+ def thread_for m; thread_for_id m.id end
+ def contains? m; contains_id? m.id end
- def threads; delete_empties; @subj_thread.values; end
- def size; delete_empties; @subj_thread.size; end
+ def threads; @threads.values end
+ def size; @threads.size end
- def dump
- @subj_thread.each do |s, t|
- puts "**********************"
- puts "** for subject #{s} **"
- puts "**********************"
- t.dump
+ def dump f=$stdout
+ @threads.each do |s, t|
+ f.puts "**********************"
+ f.puts "** for subject #{s} **"
+ f.puts "**********************"
+ t.dump f
end
end
+ ## link two containers
def link p, c, overwrite=false
if p == c || p.descendant_of?(c) || c.descendant_of?(p) # would create a loop
-# puts "*** linking parent #{p} and child #{c} would create a loop"
+ #puts "*** linking parent #{p.id} and child #{c.id} would create a loop"
return
end
- if c.parent.nil? || overwrite
- c.parent.children.delete c if overwrite && c.parent
- if c.thread
- c.thread.drop c
- c.thread = nil
- end
- p.children << c
- c.parent = p
- end
+ #puts "in link for #{p.id} to #{c.id}, perform? #{c.parent.nil?} || #{overwrite}"
+
+ return unless c.parent.nil? || overwrite
+ remove_container c
+ p.children << c
+ c.parent = p
+
+ ## if the child was previously a top-level container, it now ain't,
+ ## so ditch our thread and kill it if necessary
+ prune_thread_of c
end
private :link
- def remove mid
- return unless(c = @messages[mid])
+ def remove_container c
+ c.parent.children.delete c if c.parent # remove from tree
+ end
+ private :remove_container
+
+ def prune_thread_of c
+ return unless c.thread
+ c.thread.drop c
+ @threads.delete_if { |k, v| v == c.thread } if c.thread.empty?
+ c.thread = nil
+ end
+ private :prune_thread_of
- c.parent.children.delete c if c.parent
- if c.thread
- c.thread.drop c
- c.thread = nil
- end
+ def remove_id mid
+ return unless @messages.member?(mid)
+ c = @messages[mid]
+ remove_container c
+ prune_thread_of c
+ end
+
+ def remove_thread_containing_id mid
+ return unless @messages.member?(mid)
+ c = @messages[mid]
+ t = c.root.thread
+ @threads.delete_if { |key, thread| t == thread }
end
## load in (at most) num number of threads from the index
def load_n_threads num, opts={}
@index.each_id_by_date opts do |mid, builder|
- break if size >= num
+ break if size >= num unless num == -1
next if contains_id? mid
m = builder.call
- add_message m
- load_thread_for_message m
- yield @subj_thread.size if block_given?
+ load_thread_for_message m, :skip_killed => opts[:skip_killed], :load_deleted => opts[:load_deleted], :load_spam => opts[:load_spam]
+ yield size if block_given?
end
end
## loads in all messages needed to thread m
- def load_thread_for_message m
- @index.each_message_in_thread_for m, :limit => 100 do |mid, builder|
+ ## may do nothing if m's thread is killed
+ def load_thread_for_message m, opts={}
+ good = @index.each_message_in_thread_for m, opts do |mid, builder|
next if contains_id? mid
add_message builder.call
end
+ add_message m if good
+ end
+
+ ## merges in a pre-loaded thread
+ def add_thread t
+ raise "duplicate" if @threads.values.member? t
+ t.each { |m, *o| add_message m }
+ end
+
+ ## merges two threads together. both must be members of this threadset.
+ ## does its best, heuristically, to determine which is the parent.
+ def join_threads threads
+ return if threads.size < 2
+
+ containers = threads.map do |t|
+ c = @messages.member?(c) ? @messages[t.first.id] : nil
+ raise "not in threadset: #{t.first.id}" unless c && c.message
+ c
+ end
+
+ ## use subject headers heuristically
+ parent = containers.find { |c| !c.is_reply? }
+
+ ## no thread was rooted by a non-reply, so make a fake parent
+ parent ||= @messages["joining-ref-" + containers.map { |c| c.id }.join("-")]
+
+ containers.each do |c|
+ next if c == parent
+ c.message.add_ref parent.id
+ link parent, c
+ end
+
+ true
end
def is_relevant? m
- m.refs.any? { |ref_id| @messages[ref_id] }
+ m.refs.any? { |ref_id| @messages.member? ref_id }
end
- ## an "online" version of the jwz threading algorithm.
+ ## the heart of the threading code
def add_message message
- id = message.id
- el = (@messages[id] ||= Container.new id)
- return if @messages[id].message # we've seen it before
+ el = @messages[message.id]
+ return if el.message # we've seen it before
+
+ #puts "adding: #{message.id}, refs #{message.refs.inspect}"
el.message = message
oldroot = el.root
## link via references:
- prev = nil
- message.refs.each do |ref_id|
- raise "non-String ref id #{ref_id.inspect} (full: #{message.refs.inspect})" unless ref_id.is_a?(String)
- ref = (@messages[ref_id] ||= Container.new ref_id)
+ (message.refs + [el.id]).inject(nil) do |prev, ref_id|
+ ref = @messages[ref_id]
link prev, ref if prev
- prev = ref
+ ref
end
- link prev, el, true if prev
## link via in-reply-to:
message.replytos.each do |ref_id|
- ref = (@messages[ref_id] ||= Container.new ref_id)
+ ref = @messages[ref_id]
link ref, el, true
break # only do the first one
end
- ## update subject grouping
root = el.root
- # puts "> have #{el}, root #{root}, oldroot #{oldroot}"
- # el.dump_recursive
-
- if root == oldroot
- if oldroot.thread
- # puts "*** root (#{root.subj}) == oldroot (#{oldroot.subj}); ignoring"
+ key =
+ if thread_by_subj?
+ Message.normalize_subj root.subj
else
- ## to disable subject grouping, use the next line instead
- ## (and the same for below)
- #Redwood::log "[1] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}"
- thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
- #thread = (@subj_thread[root.id] ||= Thread.new)
-
- thread << root
- root.thread = thread
- # puts "# (1) added #{root} to #{thread}"
- end
- else
- if oldroot.thread
- ## new root. need to drop old one and put this one in its place
- # puts "*** DROPPING #{oldroot} from #{oldroot.thread}"
- oldroot.thread.drop oldroot
- oldroot.thread = nil
+ root.id
end
- if root.thread
- # puts "*** IGNORING cuz root already has a thread"
- else
- ## to disable subject grouping, use the next line instead
- ## (and the same above)
- #Redwood::log "[2] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}"
- thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
- #thread = (@subj_thread[root.id] ||= Thread.new)
-
- thread << root
- root.thread = thread
- # puts "# (2) added #{root} to #{thread}"
+ ## check to see if the subject is still the same (in the case
+ ## that we first added a child message with a different
+ ## subject)
+ if root.thread
+ if @threads.member?(key) && @threads[key] != root.thread
+ @threads.delete key
end
+ else
+ thread = @threads[key]
+ thread << root
+ root.thread = thread
end
## last bit