diff options
author | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2021-02-25 13:02:10 +0100 |
---|---|---|
committer | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2021-02-25 13:02:10 +0100 |
commit | 4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1 (patch) | |
tree | 9def6e696246629ab26a853318c4e6064d8bebde | |
parent | second optimization: have a fast path in replace for single character strings (diff) | |
download | pypy-4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1.tar.gz pypy-4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1.tar.bz2 pypy-4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1.zip |
fix a tiny performance bug in our string search that we ported from cpython.
the condition is a bit complicated:
- we need a last character that is unique in the string
- we are at a position in the string that matches the last character, but a
previous char is a mismatch
- the next char in the haystack is in the bloom filter
if all this is met, we want to skip a whole needle length, not len(needle) - 1
this was pointed out by Tim Peters here: https://bugs.python.org/msg378301
-rw-r--r-- | rpython/rlib/rstring.py | 4 | ||||
-rw-r--r-- | rpython/rlib/test/test_rstring.py | 2 | ||||
-rw-r--r-- | rpython/rtyper/lltypesystem/rstr.py | 4 |
3 files changed, 6 insertions, 4 deletions
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py index 0f30b6aa2b..ed7a61d734 100644 --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -434,7 +434,7 @@ def _search(value, other, start, end, mode): return -1 mlast = m - 1 - skip = mlast - 1 + skip = mlast mask = 0 if mode != SEARCH_RFIND: @@ -447,7 +447,7 @@ def _search(value, other, start, end, mode): i = start - 1 while i + 1 <= start + w: i += 1 - if value[i + m - 1] == other[m - 1]: + if value[i + mlast] == other[mlast]: for j in range(mlast): if value[i + j] != other[j]: break diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py index b8b0cd8482..10724b07ee 100644 --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -251,6 +251,8 @@ def test_search(): check_search(find, 'one two three', 'ne', 5, 13, res=-1) check_search(find, 'one two three', '', 0, 13, res=0) + check_search(find, '000000p00000000', 'ap', 0, 15, res=-1) + check_search(rfind, 'one two three', 'e', 0, 13, res=12) check_search(rfind, 'one two three', 'e', 0, 1, res=-1) check_search(rfind, 'one two three', '', 0, 13, res=13) diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py index 0ae4c2f459..3f05629757 100644 --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -794,7 +794,7 @@ class LLHelpers(AbstractLLHelpers): return -1 mlast = m - 1 - skip = mlast - 1 + skip = mlast mask = 0 if mode != FAST_RFIND: @@ -807,7 +807,7 @@ class LLHelpers(AbstractLLHelpers): i = start - 1 while i + 1 <= start + w: i += 1 - if s1.chars[i + m - 1] == s2.chars[m - 1]: + if s1.chars[i + mlast] == s2.chars[mlast]: for j in range(mlast): if s1.chars[i + j] != s2.chars[j]: break |