index: consistent naming

author Rich Lane <rlane@club.cc.cmu.edu>

Wed, 17 Jun 2009 00:24:59 +0000 (17:24 -0700)

committer William Morgan <wmorgan-sup@masanjin.net>

Wed, 24 Jun 2009 13:44:45 +0000 (09:44 -0400)
author Rich Lane <rlane@club.cc.cmu.edu>
Wed, 17 Jun 2009 00:24:59 +0000 (17:24 -0700)
committer William Morgan <wmorgan-sup@masanjin.net>
Wed, 24 Jun 2009 13:44:45 +0000 (09:44 -0400)
diff --git a/bin/sup-sync b/bin/sup-sync

index a6e3478775b2b60b85957b08b7cb83c101690e9c..a759cbee63e53835b257eb2559afca06eb8480d9 100755 (executable)
--- a/bin/sup-sync
+++ b/bin/sup-sync
@@ -218,7 +218,7 @@ begin
          unless seen[m.id]
            next unless m.source_info >= opts[:start_at] if opts[:start_at]
            puts "Deleting #{m.id}" if opts[:verbose]
-          index.drop_entry m.id unless opts[:dry_run]
+          index.delete m.id unless opts[:dry_run]
            num_del += 1
          end
        end
diff --git a/bin/sup-tweak-labels b/bin/sup-tweak-labels

index f526a95dac87baf78dd6d11761ec1effcfe7edbf..6f603e25fa579a74d36ff5c8c663fe9114378615 100755 (executable)
--- a/bin/sup-tweak-labels
+++ b/bin/sup-tweak-labels
@@ -2,6 +2,7 @@
  
  require 'rubygems'
  require 'trollop'
+require 'enumerator'
  require "sup"
  
  class Float
@@ -81,7 +82,8 @@ begin
    end
    query += ' ' + opts[:query] if opts[:query]
  
-  docs = Redwood::Index.run_query query
+  parsed_query = index.parse_query query
+  docs = Enumerable::Enumerator.new(index, :each_docid, parsed_query).map
    num_total = docs.size
  
    $stderr.puts "Found #{num_total} documents across #{source_ids.length} sources. Scanning..."
diff --git a/lib/sup/draft.rb b/lib/sup/draft.rb

index 32266b5374eb3d67b336926a28e8fe75eecf30ce..912773999cbbdee52a39f01e73f72dd1d425091c 100644 (file)
--- a/lib/sup/draft.rb
+++ b/lib/sup/draft.rb
@@ -37,7 +37,7 @@ class DraftManager
        return
      end
      raise ArgumentError, "not a draft: source id #{entry[:source_id].inspect}, should be #{DraftManager.source_id.inspect} for #{m.id.inspect} / docno #{docid}" unless entry[:source_id].to_i == DraftManager.source_id
-    Index.drop_entry docid
+    Index.delete m.id
      File.delete @source.fn_for_offset(entry[:source_info])
      UpdateManager.relay self, :single_message_deleted, m
    end
diff --git a/lib/sup/index.rb b/lib/sup/index.rb

index c0910b6ed0fd57a68efa4f9731d7366a5b35ecb5..a621ba168b236f0ba85da2313f403b434b0514be 100644 (file)
--- a/lib/sup/index.rb
+++ b/lib/sup/index.rb
@@ -279,28 +279,28 @@ EOS
    ## you should probably not call this on a block that doesn't break
    ## rather quickly because the results can be very large.
    EACH_BY_DATE_NUM = 100
-  def each_id_by_date opts={}
+  def each_id_by_date query={}
      return if empty? # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
-    query = build_query opts
+    ferret_query = build_ferret_query query
      offset = 0
      while true
-      limit = (opts[:limit])? [EACH_BY_DATE_NUM, opts[:limit] - offset].min : EACH_BY_DATE_NUM
-      results = @index_mutex.synchronize { @index.search query, :sort => "date DESC", :limit => limit, :offset => offset }
-      Redwood::log "got #{results.total_hits} results for query (offset #{offset}) #{query.inspect}"
+      limit = (query[:limit])? [EACH_BY_DATE_NUM, query[:limit] - offset].min : EACH_BY_DATE_NUM
+      results = @index_mutex.synchronize { @index.search ferret_query, :sort => "date DESC", :limit => limit, :offset => offset }
+      Redwood::log "got #{results.total_hits} results for query (offset #{offset}) #{ferret_query.inspect}"
        results.hits.each do |hit|
          yield @index_mutex.synchronize { @index[hit.doc][:message_id] }, lambda { build_message hit.doc }
        end
-      break if opts[:limit] and offset >= opts[:limit] - limit
+      break if query[:limit] and offset >= query[:limit] - limit
        break if offset >= results.total_hits - limit
        offset += limit
      end
    end
  
-  def num_results_for opts={}
+  def num_results_for query={}
      return 0 if empty? # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
  
-    q = build_query opts
-    @index_mutex.synchronize { @index.search(q, :limit => 1).total_hits }
+    ferret_query = build_ferret_query query
+    @index_mutex.synchronize { @index.search(ferret_query, :limit => 1).total_hits }
    end
  
    ## yield all messages in the thread containing 'm' by repeatedly
@@ -313,7 +313,7 @@ EOS
    ## is found.
    SAME_SUBJECT_DATE_LIMIT = 7
    MAX_CLAUSES = 1000
-  def each_message_in_thread_for m, opts={}
+  def each_message_in_thread_for m, query={}
      #Redwood::log "Building thread for #{m.id}: #{m.subj}"
      messages = {}
      searched = {}
@@ -332,7 +332,7 @@ EOS
        q.add_query sq, :must
        q.add_query Ferret::Search::RangeQuery.new(:date, :>= => date_min.to_indexable_s, :<= => date_max.to_indexable_s), :must
  
-      q = build_query :qobj => q
+      q = build_ferret_query :qobj => q
  
        p1 = @index_mutex.synchronize { @index.search(q).hits.map { |hit| @index[hit.doc][:message_id] } }
        Redwood::log "found #{p1.size} results for subject query #{q}"
@@ -343,7 +343,7 @@ EOS
        pending = (pending + p1 + p2).uniq
      end
  
-    until pending.empty? || (opts[:limit] && messages.size >= opts[:limit])
+    until pending.empty? || (query[:limit] && messages.size >= query[:limit])
        q = Ferret::Search::BooleanQuery.new true
        # this disappeared in newer ferrets... wtf.
        # q.max_clause_count = 2048
@@ -356,14 +356,14 @@ EOS
        end
        pending = pending[lim .. -1]
  
-      q = build_query :qobj => q
+      q = build_ferret_query :qobj => q
  
        num_queries += 1
        killed = false
        @index_mutex.synchronize do
          @index.search_each(q, :limit => :all) do |docid, score|
-          break if opts[:limit] && messages.size >= opts[:limit]
-          if @index[docid][:label].split(/\s+/).include?("killed") && opts[:skip_killed]
+          break if query[:limit] && messages.size >= query[:limit]
+          if @index[docid][:label].split(/\s+/).include?("killed") && query[:skip_killed]
              killed = true
              break
            end
@@ -419,7 +419,7 @@ EOS
    def wrap_subj subj; "__START_SUBJECT__ #{subj} __END_SUBJECT__"; end
    def unwrap_subj subj; subj =~ /__START_SUBJECT__ (.*?) __END_SUBJECT__/ && $1; end
  
-  def drop_entry docno; @index_mutex.synchronize { @index.delete docno } end
+  def delete id; @index_mutex.synchronize { @index.delete id } end
  
    def load_entry_for_id mid
      @index_mutex.synchronize do
@@ -478,27 +478,14 @@ EOS
      @index_mutex.synchronize { @index.search(q, :limit => 1).total_hits > 0 }
    end
  
-  ## takes a user query string and returns the list of docids for messages
-  ## that match the query.
-  ##
-  ## messages can then be loaded from the index with #build_message.
-  ##
-  ## raises a ParseError if the parsing failed.
-  def run_query query
-    qobj, opts = Redwood::Index.parse_user_query_string query
-    query = Redwood::Index.build_query opts.merge(:qobj => qobj)
-    results = @index.search query, :limit => (opts[:limit] || :all)
-    results.hits.map { |hit| hit.doc }
-  end
-
-  def each_docid opts={}
-    query = build_query opts
-    results = @index_mutex.synchronize { @index.search query, :limit => (opts[:limit] || :all) }
+  def each_docid query={}
+    ferret_query = build_ferret_query query
+    results = @index_mutex.synchronize { @index.search ferret_query, :limit => (query[:limit] || :all) }
      results.hits.map { |hit| yield hit.doc }
    end
  
-  def each_message opts={}
-    each_docid opts do |docid|
+  def each_message query={}
+    each_docid query do |docid|
        yield build_message(docid)
      end
    end
@@ -507,16 +494,15 @@ EOS
      @index_mutex.synchronize { @index.optimize }
    end
  
-protected
-
    class ParseError < StandardError; end
  
-  ## parse a query string from the user. returns a query object and a set of
-  ## extra flags; both of these are meant to be passed to #build_query.
+  ## parse a query string from the user. returns a query object
+  ## that can be passed to any index method with a 'query'
+  ## argument, as well as build_ferret_query.
    ##
    ## raises a ParseError if something went wrong.
-  def parse_user_query_string s
-    extraopts = {}
+  def parse_query s
+    query = {}
  
      subs = s.gsub(/\b(to|from):(\S+)\b/) do
        field, name = $1, $2
@@ -542,8 +528,8 @@ protected
      ## final stage of query processing. if the user wants to search spam
      ## messages, not adding that is the right thing; if he doesn't want to
      ## search spam messages, then not adding it won't have any effect.
-    extraopts[:load_spam] = true if subs =~ /\blabel:spam\b/
-    extraopts[:load_deleted] = true if subs =~ /\blabel:deleted\b/
+    query[:load_spam] = true if subs =~ /\blabel:spam\b/
+    query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
  
      ## gmail style "is" operator
      subs = subs.gsub(/\b(is|has):(\S+)\b/) do
@@ -552,10 +538,10 @@ protected
        when "read"
          "-label:unread"
        when "spam"
-        extraopts[:load_spam] = true
+        query[:load_spam] = true
          "label:spam"
        when "deleted"
-        extraopts[:load_deleted] = true
+        query[:load_deleted] = true
          "label:deleted"
        else
          "label:#{$2}"
@@ -601,7 +587,7 @@ protected
      subs = subs.gsub(/\blimit:(\S+)\b/) do
        lim = $1
        if lim =~ /^\d+$/
-        extraopts[:limit] = lim.to_i
+        query[:limit] = lim.to_i
          ''
        else
          raise ParseError, "non-numeric limit #{lim.inspect}"
@@ -609,32 +595,36 @@ protected
      end
      
      begin
-      [@qparser.parse(subs), extraopts]
+      query[:qobj] = @qparser.parse(subs)
+      query[:text] = s
+      query
      rescue Ferret::QueryParser::QueryParseException => e
        raise ParseError, e.message
      end
    end
  
-  def build_query opts
-    query = Ferret::Search::BooleanQuery.new
-    query.add_query opts[:qobj], :must if opts[:qobj]
-    labels = ([opts[:label]] + (opts[:labels] || [])).compact
-    labels.each { |t| query.add_query Ferret::Search::TermQuery.new("label", t.to_s), :must }
-    if opts[:participants]
+private
+
+  def build_ferret_query query
+    q = Ferret::Search::BooleanQuery.new
+    q.add_query query[:qobj], :must if query[:qobj]
+    labels = ([query[:label]] + (query[:labels] || [])).compact
+    labels.each { |t| q.add_query Ferret::Search::TermQuery.new("label", t.to_s), :must }
+    if query[:participants]
        q2 = Ferret::Search::BooleanQuery.new
-      opts[:participants].each do |p|
+      query[:participants].each do |p|
          q2.add_query Ferret::Search::TermQuery.new("from", p.email), :should
          q2.add_query Ferret::Search::TermQuery.new("to", p.email), :should
        end
-      query.add_query q2, :must
+      q.add_query q2, :must
      end
          
-    query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam)
-    query.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless opts[:load_deleted] || labels.include?(:deleted)
-    query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if opts[:skip_killed]
+    q.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless query[:load_spam] || labels.include?(:spam)
+    q.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless query[:load_deleted] || labels.include?(:deleted)
+    q.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if query[:skip_killed]
  
-    query.add_query Ferret::Search::TermQuery.new("source_id", opts[:source_id]), :must if opts[:source_id]
-    query
+    q.add_query Ferret::Search::TermQuery.new("source_id", query[:source_id]), :must if query[:source_id]
+    q
    end
  
    def save_sources fn=Redwood::SOURCE_FN
diff --git a/lib/sup/modes/search-results-mode.rb b/lib/sup/modes/search-results-mode.rb

index 227ee9ba7f7a469ed751a45a9bcc52a767c697b9..121e8176a5e1060aa3aa96057f9cf526b758f5e4 100644 (file)
--- a/lib/sup/modes/search-results-mode.rb
+++ b/lib/sup/modes/search-results-mode.rb
@@ -1,11 +1,9 @@
  module Redwood
  
  class SearchResultsMode < ThreadIndexMode
-  def initialize qobj, qopts = nil
-    @qobj = qobj
-    @qopts = qopts
-
-    super [], { :qobj => @qobj }.merge(@qopts)
+  def initialize query
+    @query = query
+    super [], query
    end
  
    register_keymap do |k|
@@ -13,9 +11,9 @@ class SearchResultsMode < ThreadIndexMode
    end
  
    def refine_search
-    query = BufferManager.ask :search, "refine query: ", (@qobj.to_s + " ")
-    return unless query && query !~ /^\s*$/
-    SearchResultsMode.spawn_from_query query
+    text = BufferManager.ask :search, "refine query: ", (@query[:text] + " ")
+    return unless text && text !~ /^\s*$/
+    SearchResultsMode.spawn_from_query text
    end
  
    ## a proper is_relevant? method requires some way of asking ferret
@@ -26,10 +24,10 @@ class SearchResultsMode < ThreadIndexMode
  
    def self.spawn_from_query text
      begin
-      qobj, extraopts = Index.parse_user_query_string(text)
-      return unless qobj
+      query = Index.parse_query(text)
+      return unless query
        short_text = text.length < 20 ? text : text[0 ... 20] + "..."
-      mode = SearchResultsMode.new qobj, extraopts
+      mode = SearchResultsMode.new query
        BufferManager.spawn "search: \"#{short_text}\"", mode
        mode.load_threads :num => mode.buffer.content_height
      rescue Index::ParseError => e
author	Rich Lane <rlane@club.cc.cmu.edu>
	Wed, 17 Jun 2009 00:24:59 +0000 (17:24 -0700)
committer	William Morgan <wmorgan-sup@masanjin.net>
	Wed, 24 Jun 2009 13:44:45 +0000 (09:44 -0400)
bin/sup-sync		patch \| blob \| history
bin/sup-tweak-labels		patch \| blob \| history
lib/sup/draft.rb		patch \| blob \| history
lib/sup/index.rb		patch \| blob \| history
lib/sup/modes/search-results-mode.rb		patch \| blob \| history