X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Futil.rb;h=068ce6bad904c9012bdcd5d8883ac48a68e59976;hb=da1abd360e25011bdbe23b9194f926f29b8aef44;hp=c26b4dbe9a3fe5207d34be9067394d571c36c979;hpb=3439894f38d1d4140bb912de0232fdb576991de0;p=sup diff --git a/lib/sup/util.rb b/lib/sup/util.rb index c26b4db..068ce6b 100644 --- a/lib/sup/util.rb +++ b/lib/sup/util.rb @@ -2,6 +2,7 @@ require 'thread' require 'lockfile' require 'mime/types' require 'pathname' +require 'set' ## time for some monkeypatching! class Lockfile @@ -24,6 +25,7 @@ class Lockfile def lockinfo_on_disk h = load_lock_id IO.read(path) h['mtime'] = File.mtime path + h['path'] = path h end @@ -90,7 +92,7 @@ end class Range ## only valid for integer ranges (unless I guess it's exclusive) - def size + def size last - first + (exclude_end? ? 0 : 1) end end @@ -133,8 +135,8 @@ class Object ## clone of java-style whole-method synchronization ## assumes a @mutex variable ## TODO: clean up, try harder to avoid namespace collisions - def synchronized *meth - meth.each do + def synchronized *methods + methods.each do |meth| class_eval <<-EOF alias unsynchronized_#{meth} #{meth} def #{meth}(*a, &b) @@ -144,8 +146,8 @@ class Object end end - def ignore_concurrent_calls *meth - meth.each do + def ignore_concurrent_calls *methods + methods.each do |meth| mutex = "@__concurrent_protector_#{meth}" flag = "@__concurrent_flag_#{meth}" oldmeth = "__unprotected_#{meth}" @@ -172,6 +174,16 @@ class Object end class String + ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using + ## the utf8 regex and count those. otherwise, use the byte length. + def display_length + if $encoding == "UTF-8" + scan(/./u).size + else + size + end + end + def camel_to_hyphy self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase end @@ -203,10 +215,10 @@ class String region_start = 0 while pos <= length newpos = case state - when :escaped_instring, :escaped_outstring: pos + when :escaped_instring, :escaped_outstring then pos else index(/[,"\\]/, pos) - end - + end + if newpos char = self[newpos] else @@ -217,26 +229,26 @@ class String case char when ?" state = case state - when :outstring: :instring - when :instring: :outstring - when :escaped_instring: :instring - when :escaped_outstring: :outstring + when :outstring then :instring + when :instring then :outstring + when :escaped_instring then :instring + when :escaped_outstring then :outstring end when ?,, nil state = case state - when :outstring, :escaped_outstring: + when :outstring, :escaped_outstring then ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "") region_start = newpos + 1 :outstring - when :instring: :instring - when :escaped_instring: :instring + when :instring then :instring + when :escaped_instring then :instring end when ?\\ state = case state - when :instring: :escaped_instring - when :outstring: :escaped_outstring - when :escaped_instring: :instring - when :escaped_outstring: :outstring + when :instring then :escaped_instring + when :outstring then :escaped_outstring + when :escaped_instring then :instring + when :escaped_outstring then :outstring end end pos = newpos + 1 @@ -272,10 +284,18 @@ class String gsub(/\t/, " ").gsub(/\r/, "") end - ## takes a space-separated list of words, and returns an array of symbols. - ## typically used in Sup for translating Ferret's representation of a list - ## of labels (a string) to an array of label symbols. - def symbolistize; split.map { |x| x.intern } end + unless method_defined? :ord + def ord + self[0] + end + end + + ## takes a list of words, and returns an array of symbols. typically used in + ## Sup for translating Ferret's representation of a list of labels (a string) + ## to an array of label symbols. + ## + ## split_on will be passed to String#split, so you can leave this nil for space. + def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end end class Numeric @@ -403,10 +423,6 @@ class Array def last= e; self[-1] = e end def nonempty?; !empty? end - - def to_set_of_symbols - map { |x| x.is_a?(Symbol) ? x : x.intern }.uniq - end end class Time @@ -480,19 +496,20 @@ class Time end end -## simple singleton module. far less complete and insane than the ruby -## standard library one, but automatically forwards methods calls and -## allows for constructors that take arguments. +## simple singleton module. far less complete and insane than the ruby standard +## library one, but it automatically forwards methods calls and allows for +## constructors that take arguments. ## -## You must have #initialize call "self.class.i_am_the_instance self" -## at some point or everything will fail horribly. +## classes that inherit this can define initialize. however, you cannot call +## .new on the class. To get the instance of the class, call .instance; +## to create the instance, call init. module Singleton module ClassMethods def instance; @instance; end def instantiated?; defined?(@instance) && !@instance.nil?; end def deinstantiate!; @instance = nil; end def method_missing meth, *a, &b - raise "no instance defined!" unless defined? @instance + raise "no #{name} instance defined in method call to #{meth}!" unless defined? @instance ## if we've been deinstantiated, just drop all calls. this is ## useful because threads that might be active during the @@ -502,13 +519,14 @@ module Singleton @instance.send meth, *a, &b end - def i_am_the_instance o + def init *args raise "there can be only one! (instance)" if defined? @instance - @instance = o + @instance = new(*args) end end def self.included klass + klass.private_class_method :allocate, :new klass.extend ClassMethods end end @@ -527,7 +545,7 @@ class Recoverable def has_errors?; !@error.nil?; end def method_missing m, *a, &b; __pass m, *a, &b end - + def id; __pass :id; end def to_s; __pass :to_s; end def to_yaml x; __pass :to_yaml, x; end @@ -626,16 +644,18 @@ class Iconv def self.easy_decode target, charset, text return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i charset = case charset - when /UTF[-_ ]?8/i: "utf-8" - when /(iso[-_ ])?latin[-_ ]?1$/i: "ISO-8859-1" - when /iso[-_ ]?8859[-_ ]?15/i: 'ISO-8859-15' - when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i: "utf-7" - else charset - end - - # Convert: - # - # Remember - Iconv.open(to, from)! - Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2] + when /UTF[-_ ]?8/i then "utf-8" + when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1" + when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15' + when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7" + else charset + end + + begin + Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2] + rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence => e + warn "couldn't transcode text from #{charset} to #{target} (\"#{text[0 ... 20]}\"...) (got #{e.message}); using original as is" + text + end end end