X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=lib%2Fsup%2Futil.rb;h=8f60cc43216979aaa207696324f117ba7025cbdd;hb=9716c862c1e2cd37ffbcb6b63be576ebe6ed9779;hp=b479908e1263dc1384537c72c2d0c9fcaf4c9146;hpb=15beb44ef5327ce1ea038a3f396e50784ee1adb2;p=sup diff --git a/lib/sup/util.rb b/lib/sup/util.rb index b479908..8f60cc4 100644 --- a/lib/sup/util.rb +++ b/lib/sup/util.rb @@ -133,8 +133,8 @@ class Object ## clone of java-style whole-method synchronization ## assumes a @mutex variable ## TODO: clean up, try harder to avoid namespace collisions - def synchronized *meth - meth.each do + def synchronized *methods + methods.each do |meth| class_eval <<-EOF alias unsynchronized_#{meth} #{meth} def #{meth}(*a, &b) @@ -144,8 +144,8 @@ class Object end end - def ignore_concurrent_calls *meth - meth.each do + def ignore_concurrent_calls *methods + methods.each do |meth| mutex = "@__concurrent_protector_#{meth}" flag = "@__concurrent_flag_#{meth}" oldmeth = "__unprotected_#{meth}" @@ -172,7 +172,15 @@ class Object end class String - def display_length; scan(/./u).size end + ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using + ## the utf8 regex and count those. otherwise, use the byte length. + def display_length + if $encoding == "UTF-8" + scan(/./u).size + else + size + end + end def camel_to_hyphy self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase @@ -205,7 +213,7 @@ class String region_start = 0 while pos <= length newpos = case state - when :escaped_instring, :escaped_outstring: pos + when :escaped_instring, :escaped_outstring then pos else index(/[,"\\]/, pos) end @@ -219,26 +227,26 @@ class String case char when ?" state = case state - when :outstring: :instring - when :instring: :outstring - when :escaped_instring: :instring - when :escaped_outstring: :outstring + when :outstring then :instring + when :instring then :outstring + when :escaped_instring then :instring + when :escaped_outstring then :outstring end when ?,, nil state = case state - when :outstring, :escaped_outstring: + when :outstring, :escaped_outstring then ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "") region_start = newpos + 1 :outstring - when :instring: :instring - when :escaped_instring: :instring + when :instring then :instring + when :escaped_instring then :instring end when ?\\ state = case state - when :instring: :escaped_instring - when :outstring: :escaped_outstring - when :escaped_instring: :instring - when :escaped_outstring: :outstring + when :instring then :escaped_instring + when :outstring then :escaped_outstring + when :escaped_instring then :instring + when :escaped_outstring then :outstring end end pos = newpos + 1 @@ -274,6 +282,12 @@ class String gsub(/\t/, " ").gsub(/\r/, "") end + if not defined? ord + def ord + self[0] + end + end + ## takes a space-separated list of words, and returns an array of symbols. ## typically used in Sup for translating Ferret's representation of a list ## of labels (a string) to an array of label symbols. @@ -628,16 +642,18 @@ class Iconv def self.easy_decode target, charset, text return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i charset = case charset - when /UTF[-_ ]?8/i: "utf-8" - when /(iso[-_ ])?latin[-_ ]?1$/i: "ISO-8859-1" - when /iso[-_ ]?8859[-_ ]?15/i: 'ISO-8859-15' - when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i: "utf-7" - else charset - end - - # Convert: - # - # Remember - Iconv.open(to, from)! - Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2] + when /UTF[-_ ]?8/i then "utf-8" + when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1" + when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15' + when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7" + else charset + end + + begin + Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2] + rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e + Redwood::log "warning: error (#{e.class.name}) decoding text from #{charset} to #{target}: #{text[0 ... 20]}" + text + end end end