From 45d06190590cb21d67868b0cfa1e4aa254a637d3 Mon Sep 17 00:00:00 2001 From: William Morgan Date: Mon, 8 Jun 2009 14:07:51 -0400 Subject: [PATCH] reduce quote parsing worst-case behavior Split a quote regex into two to increase performance on certain long strings. Thanks to Edward Z. Yang. --- lib/sup/message.rb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/sup/message.rb b/lib/sup/message.rb index 5993729..5372fc7 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -26,7 +26,6 @@ class Message QUOTE_PATTERN = /^\s{0,4}[>|\}]/ BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/ - QUOTE_START_PATTERN = /\w.*:$/ SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/ MAX_SIG_DISTANCE = 15 # lines from the end @@ -449,7 +448,11 @@ private when :text newstate = nil - if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && nextline =~ QUOTE_PATTERN) + ## the following /:$/ followed by /\w/ is an attempt to detect the + ## start of a quote. this is split into two regexen because the + ## original regex /\w.*:$/ had very poor behavior on long lines + ## like ":a:a:a:a:a" that occurred in certain emails. + if line =~ QUOTE_PATTERN || (line =~ /:$/ && line =~ /\w/ && nextline =~ QUOTE_PATTERN) newstate = :quote elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig -- 2.45.2