From 03f54320860eb959cb6d41f7bb7827caa738f1e8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 26 Sep 2023 12:29:27 +0100 Subject: [PATCH] use text prefix in regex to speed up query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit for selector; {"selector":{"_id":{"$regex":"doc.+"}}} before; { "include_docs": true, "view_type": "map", "reduce": false, "partition": null, "start_key": [], "end_key": [ "" ], "direction": "fwd", "stable": false, "update": true, "conflicts": "undefined" } after; { "include_docs": true, "view_type": "map", "reduce": false, "partition": null, "start_key": [ "doc" ], "end_key": [ "doc�", "" ], "direction": "fwd", "stable": false, "update": true, "conflicts": "undefined" } closes: https://github.com/apache/couchdb/issues/4775 --- src/mango/src/mango_idx_view.erl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 25d75d55d0a..0bdd4ab90b6 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -35,6 +35,8 @@ -include("mango.hrl"). -include("mango_idx_view.hrl"). +-define(PREFIX_RE, "^\\w+"). + validate_new(#idx{} = Idx, _Db) -> {ok, Def} = do_validate(Idx#idx.def), {ok, Idx#idx{def = Def}}. @@ -310,6 +312,8 @@ indexable({[{<<"$gte">>, _}]}) -> % Making `$exists` indexable should not cause problems in other cases. indexable({[{<<"$exists">>, _}]}) -> true; +indexable({[{<<"$regex">>, _}]}) -> + true; % All other operators are currently not indexable. % This is also a subtle assertion that we don't % call indexable/1 on a field name. @@ -485,6 +489,14 @@ range({[{<<"$gt">>, Arg}]}, LCmp, Low, HCmp, High) -> max -> empty end; +% use any text prefix in the regex to narrow the query +range({[{<<"$regex">>, Arg}]}, LCmp, Low, HCmp, High) -> + case re:run(Arg, ?PREFIX_RE, [{capture, first, binary}]) of + {match, [Prefix]} -> + {'$gte', Prefix, '$lte', <>}; + nomatch -> + {LCmp, Low, HCmp, High} + end; % There's some other un-indexable restriction on the index % that will be applied as a post-filter. Ignore it and % carry on our merry way.