From 4c3aee2dcfcc1fa2ddab8b84354a476cd936bad1 Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Thu, 25 Feb 2021 13:02:10 +0100 Subject: fix a tiny performance bug in our string search that we ported from cpython. the condition is a bit complicated: - we need a last character that is unique in the string - we are at a position in the string that matches the last character, but a previous char is a mismatch - the next char in the haystack is in the bloom filter if all this is met, we want to skip a whole needle length, not len(needle) - 1 this was pointed out by Tim Peters here: https://bugs.python.org/msg378301 --- rpython/rlib/rstring.py | 4 ++-- rpython/rlib/test/test_rstring.py | 2 ++ rpython/rtyper/lltypesystem/rstr.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py index 0f30b6aa2b..ed7a61d734 100644 --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -434,7 +434,7 @@ def _search(value, other, start, end, mode): return -1 mlast = m - 1 - skip = mlast - 1 + skip = mlast mask = 0 if mode != SEARCH_RFIND: @@ -447,7 +447,7 @@ def _search(value, other, start, end, mode): i = start - 1 while i + 1 <= start + w: i += 1 - if value[i + m - 1] == other[m - 1]: + if value[i + mlast] == other[mlast]: for j in range(mlast): if value[i + j] != other[j]: break diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py index b8b0cd8482..10724b07ee 100644 --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -251,6 +251,8 @@ def test_search(): check_search(find, 'one two three', 'ne', 5, 13, res=-1) check_search(find, 'one two three', '', 0, 13, res=0) + check_search(find, '000000p00000000', 'ap', 0, 15, res=-1) + check_search(rfind, 'one two three', 'e', 0, 13, res=12) check_search(rfind, 'one two three', 'e', 0, 1, res=-1) check_search(rfind, 'one two three', '', 0, 13, res=13) diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py index 0ae4c2f459..3f05629757 100644 --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -794,7 +794,7 @@ class LLHelpers(AbstractLLHelpers): return -1 mlast = m - 1 - skip = mlast - 1 + skip = mlast mask = 0 if mode != FAST_RFIND: @@ -807,7 +807,7 @@ class LLHelpers(AbstractLLHelpers): i = start - 1 while i + 1 <= start + w: i += 1 - if s1.chars[i + m - 1] == s2.chars[m - 1]: + if s1.chars[i + mlast] == s2.chars[mlast]: for j in range(mlast): if s1.chars[i + j] != s2.chars[j]: break -- cgit v1.2.3-65-gdbad