index: cleanup interface

author Rich Lane <rlane@club.cc.cmu.edu>

Wed, 17 Jun 2009 00:24:58 +0000 (17:24 -0700)

committer William Morgan <wmorgan-sup@masanjin.net>

Wed, 24 Jun 2009 13:44:45 +0000 (09:44 -0400)
author Rich Lane <rlane@club.cc.cmu.edu>
Wed, 17 Jun 2009 00:24:58 +0000 (17:24 -0700)
committer William Morgan <wmorgan-sup@masanjin.net>
Wed, 24 Jun 2009 13:44:45 +0000 (09:44 -0400)
diff --git a/bin/sup-dump b/bin/sup-dump

index 29f6d6ee7cb3ad4a3d95e3a381c77feca7b053a6..9b0892e0333fbb24453f6599e757adb35e7bfe2f 100755 (executable)
--- a/bin/sup-dump
+++ b/bin/sup-dump
@@ -24,8 +24,6 @@ end
  index = Redwood::Index.new
  index.load
  
-(1 ... index.index.reader.max_doc).each do |i|
-  next if index.index.deleted? i
-  d = index.index[i]
-  puts [d[:message_id], "(" + d[:label] + ")"] * " "
+index.each_message do |m|
+  puts "#{m.id} (#{m.labels * ' '})"
  end
diff --git a/bin/sup-sync b/bin/sup-sync

index 9c342d25693f908e26825c3d7c4348e0b5ba84ee..a6e3478775b2b60b85957b08b7cb83c101690e9c 100755 (executable)
--- a/bin/sup-sync
+++ b/bin/sup-sync
@@ -208,24 +208,17 @@ begin
  
    ## delete any messages in the index that claim they're from one of
    ## these sources, but that we didn't see.
-  ##
-  ## kinda crappy code here, because we delve directly into the Ferret
-  ## API.
-  ##
-  ## TODO: move this to Index, i suppose.
-  if (target == :all || target == :changed) && !opts[:start_at]
+  if (target == :all || target == :changed)
      $stderr.puts "Deleting missing messages from the index..."
      num_del, num_scanned = 0, 0
      sources.each do |source|
        raise "no source id for #{source}" unless source.id
-      q = "+source_id:#{source.id}"
-      q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at]
-      index.index.search_each(q, :limit => :all) do |docid, score|
+      index.each_message :source_id => source.id do |m|
          num_scanned += 1
-        mid = index.index[docid][:message_id]
-        unless seen[mid]
-          puts "Deleting #{mid}" if opts[:verbose]
-          index.index.delete docid unless opts[:dry_run]
+        unless seen[m.id]
+          next unless m.source_info >= opts[:start_at] if opts[:start_at]
+          puts "Deleting #{m.id}" if opts[:verbose]
+          index.drop_entry m.id unless opts[:dry_run]
            num_del += 1
          end
        end
@@ -237,7 +230,7 @@ begin
  
    if opts[:optimize]
      $stderr.puts "Optimizing index..."
-    optt = time { index.index.optimize unless opts[:dry_run] }
+    optt = time { index.optimize unless opts[:dry_run] }
      $stderr.puts "Optimized index of size #{index.size} in #{optt}s."
    end
  rescue Redwood::FatalSourceError => e
diff --git a/bin/sup-tweak-labels b/bin/sup-tweak-labels

index 538db8b38ac402604c6ef5b1f24475895ee7635d..f526a95dac87baf78dd6d11761ec1effcfe7edbf 100755 (executable)
--- a/bin/sup-tweak-labels
+++ b/bin/sup-tweak-labels
@@ -118,7 +118,7 @@ begin
  
    unless num_changed == 0
      $stderr.puts "Optimizing index..."
-    index.ferret.optimize unless opts[:dry_run]
+    index.optimize unless opts[:dry_run]
    end
  
  rescue Exception => e
diff --git a/lib/sup/index.rb b/lib/sup/index.rb

index ca01ee76cc89c118933c5122b616b4b6f45a59bb..c0910b6ed0fd57a68efa4f9731d7366a5b35ecb5 100644 (file)
--- a/lib/sup/index.rb
+++ b/lib/sup/index.rb
@@ -24,11 +24,6 @@ class Index
  
    include Singleton
  
-  ## these two accessors should ONLY be used by single-threaded programs.
-  ## otherwise you will have a naughty ferret on your hands.
-  attr_reader :index
-  alias ferret index
-
    def initialize dir=BASE_DIR
      @index_mutex = Monitor.new
  
@@ -151,7 +146,7 @@ EOS
      if File.exists? dir
        Redwood::log "loading index..."
        @index_mutex.synchronize do
-        @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer)
+        @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer, :id_field => 'message_id')
          Redwood::log "loaded index of #{@index.size} messages"
        end
      else
@@ -171,7 +166,7 @@ EOS
          field_infos.add_field :refs
          field_infos.add_field :snippet, :index => :no, :term_vector => :no
          field_infos.create_index dir
-        @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer)
+        @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer, :id_field => 'message_id')
        end
      end
    end
@@ -496,6 +491,22 @@ EOS
      results.hits.map { |hit| hit.doc }
    end
  
+  def each_docid opts={}
+    query = build_query opts
+    results = @index_mutex.synchronize { @index.search query, :limit => (opts[:limit] || :all) }
+    results.hits.map { |hit| yield hit.doc }
+  end
+
+  def each_message opts={}
+    each_docid opts do |docid|
+      yield build_message(docid)
+    end
+  end
+
+  def optimize
+    @index_mutex.synchronize { @index.optimize }
+  end
+
  protected
  
    class ParseError < StandardError; end
@@ -621,6 +632,8 @@ protected
      query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam)
      query.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless opts[:load_deleted] || labels.include?(:deleted)
      query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if opts[:skip_killed]
+
+    query.add_query Ferret::Search::TermQuery.new("source_id", opts[:source_id]), :must if opts[:source_id]
      query
    end
author	Rich Lane <rlane@club.cc.cmu.edu>
	Wed, 17 Jun 2009 00:24:58 +0000 (17:24 -0700)
committer	William Morgan <wmorgan-sup@masanjin.net>
	Wed, 24 Jun 2009 13:44:45 +0000 (09:44 -0400)
bin/sup-dump		patch \| blob \| history
bin/sup-sync		patch \| blob \| history
bin/sup-tweak-labels		patch \| blob \| history
lib/sup/index.rb		patch \| blob \| history