add Message.indexable_{body, chunks, subject}

[sup] / lib / sup / util.rb
diff --git a/lib/sup/util.rb b/lib/sup/util.rb

index 9909022ffa9b0b1c5796f4fbdff925087dec2519..8f60cc43216979aaa207696324f117ba7025cbdd 100644 (file)
--- a/lib/sup/util.rb
+++ b/lib/sup/util.rb
@@ -133,8 +133,8 @@ class Object
    ## clone of java-style whole-method synchronization
    ## assumes a @mutex variable
    ## TODO: clean up, try harder to avoid namespace collisions
-  def synchronized *meth
-    meth.each do
+  def synchronized *methods
+    methods.each do |meth|
        class_eval <<-EOF
          alias unsynchronized_#{meth} #{meth}
          def #{meth}(*a, &b)
@@ -144,8 +144,8 @@ class Object
      end
    end
  
-  def ignore_concurrent_calls *meth
-    meth.each do
+  def ignore_concurrent_calls *methods
+    methods.each do |meth|
        mutex = "@__concurrent_protector_#{meth}"
        flag = "@__concurrent_flag_#{meth}"
        oldmeth = "__unprotected_#{meth}"
@@ -172,6 +172,16 @@ class Object
  end
  
  class String
+  ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
+  ## the utf8 regex and count those. otherwise, use the byte length.
+  def display_length
+    if $encoding == "UTF-8"
+      scan(/./u).size
+    else
+      size
+    end
+  end
+
    def camel_to_hyphy
      self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
    end
@@ -188,11 +198,6 @@ class String
      ret
    end
  
-  ## one of the few things i miss from perl
-  def ucfirst
-    self[0 .. 0].upcase + self[1 .. -1]
-  end
-
    ## a very complicated regex found on teh internets to split on
    ## commas, unless they occurr within double quotes.
    def split_on_commas
@@ -208,7 +213,7 @@ class String
      region_start = 0
      while pos <= length
        newpos = case state
-        when :escaped_instring, :escaped_outstring: pos
+        when :escaped_instring, :escaped_outstring then pos
          else index(/[,"\\]/, pos)
        end 
        
@@ -222,26 +227,26 @@ class String
        case char
        when ?"
          state = case state
-          when :outstring: :instring
-          when :instring: :outstring
-          when :escaped_instring: :instring
-          when :escaped_outstring: :outstring
+          when :outstring then :instring
+          when :instring then :outstring
+          when :escaped_instring then :instring
+          when :escaped_outstring then :outstring
          end
        when ?,, nil
          state = case state
-          when :outstring, :escaped_outstring:
+          when :outstring, :escaped_outstring then
              ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
              region_start = newpos + 1
              :outstring
-          when :instring: :instring
-          when :escaped_instring: :instring
+          when :instring then :instring
+          when :escaped_instring then :instring
          end
        when ?\\
          state = case state
-          when :instring: :escaped_instring
-          when :outstring: :escaped_outstring
-          when :escaped_instring: :instring
-          when :escaped_outstring: :outstring
+          when :instring then :escaped_instring
+          when :outstring then :escaped_outstring
+          when :escaped_instring then :instring
+          when :escaped_outstring then :outstring
          end
        end
        pos = newpos + 1
@@ -276,6 +281,17 @@ class String
    def normalize_whitespace
      gsub(/\t/, "    ").gsub(/\r/, "")
    end
+
+  if not defined? ord
+    def ord
+      self[0]
+    end
+  end
+
+  ## takes a space-separated list of words, and returns an array of symbols.
+  ## typically used in Sup for translating Ferret's representation of a list
+  ## of labels (a string) to an array of label symbols.
+  def symbolistize; split.map { |x| x.intern } end
  end
  
  class Numeric
@@ -403,6 +419,10 @@ class Array
  
    def last= e; self[-1] = e end
    def nonempty?; !empty? end
+
+  def to_set_of_symbols
+    map { |x| x.is_a?(Symbol) ? x : x.intern }.uniq
+  end
  end
  
  class Time
@@ -617,3 +637,23 @@ class FinishLine
      @m.synchronize { !@over && @over = true }
    end
  end
+
+class Iconv
+  def self.easy_decode target, charset, text
+    return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
+    charset = case charset
+      when /UTF[-_ ]?8/i then "utf-8"
+      when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
+      when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
+      when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
+      else charset
+    end
+
+    begin
+      Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
+    rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e
+      Redwood::log "warning: error (#{e.class.name}) decoding text from #{charset} to #{target}: #{text[0 ... 20]}"
+      text
+    end
+  end
+end