## the index structure for redwood. interacts with ferret.
-require 'thread'
require 'fileutils'
require 'ferret'
+begin
+ require 'chronic'
+ $have_chronic = true
+rescue LoadError => e
+ Redwood::log "optional 'chronic' library not found (run 'gem install chronic' to install)"
+ $have_chronic = false
+end
module Redwood
include Singleton
attr_reader :index
+ alias ferret index
def initialize dir=BASE_DIR
@dir = dir
@sources = {}
end
def start_lock_update_thread
- @lock_update_thread = Redwood::reporting_thread do
+ @lock_update_thread = Redwood::reporting_thread("lock update") do
while true
sleep 30
@lock.touch_yourself
def add_source source
raise "duplicate source!" if @sources.include? source
@sources_dirty = true
- source.id ||= @sources.size
- ##TODO: why was this necessary?
+ max = @sources.max_of { |id, s| s.is_a?(DraftLoader) || s.is_a?(SentLoader) ? 0 : id }
+ source.id ||= (max || 0) + 1
##source.id += 1 while @sources.member? source.id
@sources[source.id] = source
end
docid, entry = load_entry_for_id m.id unless docid && entry
raise "no source info for message #{m.id}" unless m.source && m.source_info
- raise "trying deleting non-corresponding entry #{docid}" if docid && @index[docid][:message_id] != m.id
+ raise "trying to delete non-corresponding entry #{docid} with index message-id #{@index[docid][:message_id].inspect} and parameter message id #{m.id.inspect}" if docid && @index[docid][:message_id] != m.id
source_id =
if m.source.is_a? Integer
- raise "Debugging: integer source set"
m.source
else
m.source.id or raise "unregistered source #{m.source} (id #{m.source.id.inspect})"
end
to = (m.to + m.cc + m.bcc).map { |x| x.email }.join(" ")
+ snippet =
+ if m.snippet_contains_encrypted_content? && $config[:discard_snippets_from_encrypted_messages]
+ ""
+ else
+ m.snippet
+ end
+
d = {
:message_id => m.id,
:source_id => source_id,
:source_info => m.source_info,
:date => m.date.to_indexable_s,
:body => m.content,
- :snippet => m.snippet,
- :label => m.labels.join(" "),
+ :snippet => snippet,
+ :label => m.labels.uniq.join(" "),
:from => m.from ? m.from.email : "",
:to => (m.to + m.cc + m.bcc).map { |x| x.email }.join(" "),
:subject => wrap_subj(Message.normalize_subj(m.subj)),
docid, entry = load_entry_for_id m.id
## this hasn't been triggered in a long time. TODO: decide whether it's still a problem.
- raise "just added message #{m.id} but couldn't find it in a search" unless docid
+ raise "just added message #{m.id.inspect} but couldn't find it in a search" unless docid
true
end
## message-building lambdas, so that building an unwanted message
## can be skipped in the block if desired.
##
- ## stops loading any thread if a message with a :killed flag is found.
+ ## only two options, :limit and :skip_killed. if :skip_killed is
+ ## true, stops loading any thread if a message with a :killed flag
+ ## is found.
SAME_SUBJECT_DATE_LIMIT = 7
+ MAX_CLAUSES = 1000
def each_message_in_thread_for m, opts={}
#Redwood::log "Building thread for #{m.id}: #{m.subj}"
messages = {}
end
until pending.empty? || (opts[:limit] && messages.size >= opts[:limit])
- id = pending.pop
- next if searched.member? id
- searched[id] = true
q = Ferret::Search::BooleanQuery.new true
- q.add_query Ferret::Search::TermQuery.new(:message_id, id), :should
- q.add_query Ferret::Search::TermQuery.new(:refs, id), :should
+ # this disappeared in newer ferrets... wtf.
+ # q.max_clause_count = 2048
+
+ lim = [MAX_CLAUSES / 2, pending.length].min
+ pending[0 ... lim].each do |id|
+ searched[id] = true
+ q.add_query Ferret::Search::TermQuery.new(:message_id, id), :should
+ q.add_query Ferret::Search::TermQuery.new(:refs, id), :should
+ end
+ pending = pending[lim .. -1]
- ## load_killed is true so that we can abort if any message in
- ## the thread has the killed label.
- q = build_query :qobj => q, :load_killed => true
+ q = build_query :qobj => q
num_queries += 1
killed = false
@index.search_each(q, :limit => :all) do |docid, score|
break if opts[:limit] && messages.size >= opts[:limit]
- if @index[docid][:label].split(/\s+/).include?("killed") && !opts[:load_killed]
+ if @index[docid][:label].split(/\s+/).include?("killed") && opts[:skip_killed]
killed = true
break
end
#Redwood::log "got #{mid} as a child of #{id}"
messages[mid] ||= lambda { build_message docid }
refs = @index[docid][:refs].split(" ")
- pending += refs
+ pending += refs.select { |id| !searched[id] }
end
end
end
+
if killed
Redwood::log "thread for #{m.id} is killed, ignoring"
false
"date" => Time.at(doc[:date].to_i),
"subject" => unwrap_subj(doc[:subject]),
"from" => doc[:from],
- "to" => doc[:to],
+ "to" => doc[:to].split(/\s+/).join(", "), # reformat
"message-id" => doc[:message_id],
- "references" => doc[:refs],
+ "references" => doc[:refs].split(/\s+/).map { |x| "<#{x}>" }.join(" "),
}
Message.new :source => source, :source_info => doc[:source_info].to_i,
protected
+ ## do any specialized parsing
+ ## returns nil and flashes error message if parsing failed
def parse_user_query_string str
- str2 = str.gsub(/(to|from):(\S+)/) do
+ result = str.gsub(/\b(to|from):(\S+)\b/) do
field, name = $1, $2
if(p = ContactManager.contact_for(name))
[field, p.email]
+ elsif name == "me"
+ [field, "(" + AccountManager.user_emails.join("||") + ")"]
else
[field, name]
end.join(":")
end
- Redwood::log "translated #{str} to #{str2}" unless str2 == str
- @qparser.parse str2
+ if $have_chronic
+ chronic_failure = false
+ result = result.gsub(/\b(before|on|in|after):(\((.+?)\)\B|(\S+)\b)/) do
+ break if chronic_failure
+ field, datestr = $1, ($3 || $4)
+ realdate = Chronic.parse(datestr, :guess => false, :context => :none)
+ if realdate
+ case field
+ when "after"
+ Redwood::log "chronic: translated #{field}:#{datestr} to #{realdate.end}"
+ "date:(>= #{sprintf "%012d", realdate.end.to_i})"
+ when "before"
+ Redwood::log "chronic: translated #{field}:#{datestr} to #{realdate.begin}"
+ "date:(<= #{sprintf "%012d", realdate.begin.to_i})"
+ else
+ Redwood::log "chronic: translated #{field}:#{datestr} to #{realdate}"
+ "date:(<= #{sprintf "%012d", realdate.end.to_i}) date:(>= #{sprintf "%012d", realdate.begin.to_i})"
+ end
+ else
+ BufferManager.flash "Don't understand date #{datestr.inspect}!"
+ chronic_failure = true
+ end
+ end
+ result = nil if chronic_failure
+ end
+
+ Redwood::log "translated #{str.inspect} to #{result}" unless result == str
+ @qparser.parse result if result
end
def build_query opts
query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam)
query.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless opts[:load_deleted] || labels.include?(:deleted)
- query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not unless opts[:load_killed] || labels.include?(:killed)
+ query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if opts[:skip_killed]
query
end