diff --git a/CHANGELOG.md b/CHANGELOG.md index e8410c1804..0263e88658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,65 @@ +Tantivy 0.22 +================================ + +Tantivy 0.22 will be able to read indices created with Tantivy 0.21. + +#### Bugfixes +- Fix null byte handling in JSON paths (null bytes in json keys caused panic during indexing) [#2345](https://github.com/quickwit-oss/tantivy/pull/2345)(@PSeitz) +- Fix bug that can cause `get_docids_for_value_range` to panic. [#2295](https://github.com/quickwit-oss/tantivy/pull/2295)(@fulmicoton) +- Avoid 1 document indices by increase min memory to 15MB for indexing [#2176](https://github.com/quickwit-oss/tantivy/pull/2176)(@PSeitz) +- Fix merge panic for JSON fields [#2284](https://github.com/quickwit-oss/tantivy/pull/2284)(@PSeitz) +- Fix bug occuring when merging JSON object indexed with positions. [#2253](https://github.com/quickwit-oss/tantivy/pull/2253)(@fulmicoton) +- Fix empty DateHistogram gap bug [#2183](https://github.com/quickwit-oss/tantivy/pull/2183)(@PSeitz) +- Fix range query end check (fields with less than 1 value per doc are affected) [#2226](https://github.com/quickwit-oss/tantivy/pull/2226)(@PSeitz) +- Handle exclusive out of bounds ranges on fastfield range queries [#2174](https://github.com/quickwit-oss/tantivy/pull/2174)(@PSeitz) + +#### Breaking API Changes +- rename ReloadPolicy onCommit to onCommitWithDelay [#2235](https://github.com/quickwit-oss/tantivy/pull/2235)(@giovannicuccu) +- Move exports from the root into modules [#2220](https://github.com/quickwit-oss/tantivy/pull/2220)(@PSeitz) +- Accept field name instead of `Field` in FilterCollector [#2196](https://github.com/quickwit-oss/tantivy/pull/2196)(@PSeitz) +- remove deprecated IntOptions and DateTime [#2353](https://github.com/quickwit-oss/tantivy/pull/2353)(@PSeitz) + +#### Features/Improvements +- Tantivy documents as a trait: Index data directly without converting to tantivy types first [#2071](https://github.com/quickwit-oss/tantivy/pull/2071)(@ChillFish8) +- encode some part of posting list as -1 instead of direct values (smaller inverted indices) [#2185](https://github.com/quickwit-oss/tantivy/pull/2185)(@trinity-1686a) +- **Aggregation** + - Support to deserialize f64 from string [#2311](https://github.com/quickwit-oss/tantivy/pull/2311)(@PSeitz) + - Add a top_hits aggregator [#2198](https://github.com/quickwit-oss/tantivy/pull/2198)(@ditsuke) + - Support bool type in term aggregation [#2318](https://github.com/quickwit-oss/tantivy/pull/2318)(@PSeitz) + - Support ip adresses in term aggregation [#2319](https://github.com/quickwit-oss/tantivy/pull/2319)(@PSeitz) + - Support date type in term aggregation [#2172](https://github.com/quickwit-oss/tantivy/pull/2172)(@PSeitz) + - Support escaped dot when addressing field [#2250](https://github.com/quickwit-oss/tantivy/pull/2250)(@PSeitz) + +- Add ExistsQuery to check documents that have a value [#2160](https://github.com/quickwit-oss/tantivy/pull/2160)(@imotov) +- Expose TopDocs::order_by_u64_field again [#2282](https://github.com/quickwit-oss/tantivy/pull/2282)(@ditsuke) + +- **Memory/Performance** + - Faster TopN: replace BinaryHeap with TopNComputer [#2186](https://github.com/quickwit-oss/tantivy/pull/2186)(@PSeitz) + - reduce number of allocations during indexing [#2257](https://github.com/quickwit-oss/tantivy/pull/2257)(@PSeitz) + - Less Memory while indexing: docid deltas while indexing [#2249](https://github.com/quickwit-oss/tantivy/pull/2249)(@PSeitz) + - Faster indexing: use term hashmap in fastfield [#2243](https://github.com/quickwit-oss/tantivy/pull/2243)(@PSeitz) + - term hashmap remove copy in is_empty, unused unordered_id [#2229](https://github.com/quickwit-oss/tantivy/pull/2229)(@PSeitz) + - add method to fetch block of first values in columnar [#2330](https://github.com/quickwit-oss/tantivy/pull/2330)(@PSeitz) + - Faster aggregations: add fast path for full columns in fetch_block [#2328](https://github.com/quickwit-oss/tantivy/pull/2328)(@PSeitz) + - Faster sstable loading: use fst for sstable index [#2268](https://github.com/quickwit-oss/tantivy/pull/2268)(@trinity-1686a) + +- **QueryParser** + - allow newline where we allow space in query parser [#2302](https://github.com/quickwit-oss/tantivy/pull/2302)(@trinity-1686a) + - allow some mixing of occur and bool in strict query parser [#2323](https://github.com/quickwit-oss/tantivy/pull/2323)(@trinity-1686a) + - handle * inside term in lenient query parser [#2228](https://github.com/quickwit-oss/tantivy/pull/2228)(@trinity-1686a) + - add support for exists query syntax in query parser [#2170](https://github.com/quickwit-oss/tantivy/pull/2170)(@trinity-1686a) +- Add shared search executor [#2312](https://github.com/quickwit-oss/tantivy/pull/2312)(@MochiXu) +- Truncate keys to u16::MAX in term hashmap [#2299](https://github.com/quickwit-oss/tantivy/pull/2299)(@PSeitz) +- report if a term matched when warming up posting list [#2309](https://github.com/quickwit-oss/tantivy/pull/2309)(@trinity-1686a) +- Support json fields in FuzzyTermQuery [#2173](https://github.com/quickwit-oss/tantivy/pull/2173)(@PingXia-at) +- Read list of fields encoded in term dictionary for JSON fields [#2184](https://github.com/quickwit-oss/tantivy/pull/2184)(@PSeitz) +- add collect_block to BoxableSegmentCollector [#2331](https://github.com/quickwit-oss/tantivy/pull/2331)(@PSeitz) +- expose collect_block buffer size [#2326](https://github.com/quickwit-oss/tantivy/pull/2326)(@PSeitz) +- Forward regex parser errors [#2288](https://github.com/quickwit-oss/tantivy/pull/2288)(@adamreichold) +- Make FacetCounts defaultable and cloneable. [#2322](https://github.com/quickwit-oss/tantivy/pull/2322)(@adamreichold) +- Derive Debug for SchemaBuilder [#2254](https://github.com/quickwit-oss/tantivy/pull/2254)(@GodTamIt) +- add missing inlines to tantivy options [#2245](https://github.com/quickwit-oss/tantivy/pull/2245)(@PSeitz) + Tantivy 0.21.1 ================================ #### Bugfixes diff --git a/Cargo.toml b/Cargo.toml index 6497e79b97..3580168e1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.22.0-dev" +version = "0.22.0" authors = ["Paul Masurel "] license = "MIT" categories = ["database-implementations", "data-structures"] @@ -52,13 +52,13 @@ itertools = "0.12.0" measure_time = "0.8.2" arc-swap = "1.5.0" -columnar = { version= "0.2", path="./columnar", package ="tantivy-columnar" } -sstable = { version= "0.2", path="./sstable", package ="tantivy-sstable", optional = true } -stacker = { version= "0.2", path="./stacker", package ="tantivy-stacker" } -query-grammar = { version= "0.21.0", path="./query-grammar", package = "tantivy-query-grammar" } -tantivy-bitpacker = { version= "0.5", path="./bitpacker" } -common = { version= "0.6", path = "./common/", package = "tantivy-common" } -tokenizer-api = { version= "0.2", path="./tokenizer-api", package="tantivy-tokenizer-api" } +columnar = { version= "0.3", path="./columnar", package ="tantivy-columnar" } +sstable = { version= "0.3", path="./sstable", package ="tantivy-sstable", optional = true } +stacker = { version= "0.3", path="./stacker", package ="tantivy-stacker" } +query-grammar = { version= "0.22.0", path="./query-grammar", package = "tantivy-query-grammar" } +tantivy-bitpacker = { version= "0.6", path="./bitpacker" } +common = { version= "0.7", path = "./common/", package = "tantivy-common" } +tokenizer-api = { version= "0.3", path="./tokenizer-api", package="tantivy-tokenizer-api" } sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] } futures-util = { version = "0.3.28", optional = true } fnv = "1.0.7" diff --git a/bitpacker/Cargo.toml b/bitpacker/Cargo.toml index b1e7db0215..104f5f8059 100644 --- a/bitpacker/Cargo.toml +++ b/bitpacker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-bitpacker" -version = "0.5.0" +version = "0.6.0" edition = "2021" authors = ["Paul Masurel "] license = "MIT" diff --git a/cliff.toml b/cliff.toml index 99bd506620..03424f52bd 100644 --- a/cliff.toml +++ b/cliff.toml @@ -1,6 +1,10 @@ # configuration file for git-cliff{ pattern = "foo", replace = "bar"} # see https://github.com/orhun/git-cliff#configuration-file +[remote.github] +owner = "quickwit-oss" +repo = "tantivy" + [changelog] # changelog header header = """ @@ -8,15 +12,43 @@ header = """ # template for the changelog body # https://tera.netlify.app/docs/#introduction body = """ -{% if version %}\ - {{ version | trim_start_matches(pat="v") }} ({{ timestamp | date(format="%Y-%m-%d") }}) - ================== -{% else %}\ - ## [unreleased] -{% endif %}\ +## What's Changed + +{%- if version %} in {{ version }}{%- endif -%} {% for commit in commits %} - - {% if commit.breaking %}[**breaking**] {% endif %}{{ commit.message | split(pat="\n") | first | trim | upper_first }}(@{{ commit.author.name }})\ -{% endfor %} + {% if commit.github.pr_title -%} + {%- set commit_message = commit.github.pr_title -%} + {%- else -%} + {%- set commit_message = commit.message -%} + {%- endif -%} + - {{ commit_message | split(pat="\n") | first | trim }}\ + {% if commit.github.pr_number %} \ + [#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}){% if commit.github.username %}(@{{ commit.github.username }}){%- endif -%} \ + {%- endif %} +{%- endfor -%} + +{% if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %} + {% raw %}\n{% endraw -%} + ## New Contributors +{%- endif %}\ +{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %} + * @{{ contributor.username }} made their first contribution + {%- if contributor.pr_number %} in \ + [#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \ + {%- endif %} +{%- endfor -%} + +{% if version %} + {% if previous.version %} + **Full Changelog**: {{ self::remote_url() }}/compare/{{ previous.version }}...{{ version }} + {% endif %} +{% else -%} + {% raw %}\n{% endraw %} +{% endif %} + +{%- macro remote_url() -%} + https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }} +{%- endmacro -%} """ # remove the leading and trailing whitespace from the template trim = true @@ -25,53 +57,24 @@ footer = """ """ postprocessors = [ - { pattern = 'Paul Masurel', replace = "fulmicoton"}, # replace with github user - { pattern = 'PSeitz', replace = "PSeitz"}, # replace with github user - { pattern = 'Adam Reichold', replace = "adamreichold"}, # replace with github user - { pattern = 'trinity-1686a', replace = "trinity-1686a"}, # replace with github user - { pattern = 'Michael Kleen', replace = "mkleen"}, # replace with github user - { pattern = 'Adrien Guillo', replace = "guilload"}, # replace with github user - { pattern = 'François Massot', replace = "fmassot"}, # replace with github user - { pattern = 'Naveen Aiathurai', replace = "naveenann"}, # replace with github user - { pattern = '', replace = ""}, # replace with github user ] [git] # parse the commits based on https://www.conventionalcommits.org # This is required or commit.message contains the whole commit message and not just the title -conventional_commits = true +conventional_commits = false # filter out the commits that are not conventional -filter_unconventional = false +filter_unconventional = true # process each line of a commit as an individual commit split_commits = false # regex for preprocessing the commit messages commit_preprocessors = [ - { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "[#${2}](https://github.com/quickwit-oss/tantivy/issues/${2})"}, # replace issue numbers + { pattern = '\((\w+\s)?#([0-9]+)\)', replace = ""}, ] #link_parsers = [ #{ pattern = "#(\\d+)", href = "https://github.com/quickwit-oss/tantivy/pulls/$1"}, #] # regex for parsing and grouping commits -commit_parsers = [ - { message = "^feat", group = "Features"}, - { message = "^fix", group = "Bug Fixes"}, - { message = "^doc", group = "Documentation"}, - { message = "^perf", group = "Performance"}, - { message = "^refactor", group = "Refactor"}, - { message = "^style", group = "Styling"}, - { message = "^test", group = "Testing"}, - { message = "^chore\\(release\\): prepare for", skip = true}, - { message = "(?i)clippy", skip = true}, - { message = "(?i)dependabot", skip = true}, - { message = "(?i)fmt", skip = true}, - { message = "(?i)bump", skip = true}, - { message = "(?i)readme", skip = true}, - { message = "(?i)comment", skip = true}, - { message = "(?i)spelling", skip = true}, - { message = "^chore", group = "Miscellaneous Tasks"}, - { body = ".*security", group = "Security"}, - { message = ".*", group = "Other", default_scope = "other"}, -] # protect breaking changes from being skipped due to matching a skipping commit_parser protect_breaking_commits = false # filter out the commits that are not matched by commit parsers diff --git a/columnar/Cargo.toml b/columnar/Cargo.toml index 2599654164..36a5a55d51 100644 --- a/columnar/Cargo.toml +++ b/columnar/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-columnar" -version = "0.2.0" +version = "0.3.0" edition = "2021" license = "MIT" homepage = "https://github.com/quickwit-oss/tantivy" @@ -12,10 +12,10 @@ categories = ["database-implementations", "data-structures", "compression"] itertools = "0.12.0" fastdivide = "0.4.0" -stacker = { version= "0.2", path = "../stacker", package="tantivy-stacker"} -sstable = { version= "0.2", path = "../sstable", package = "tantivy-sstable" } -common = { version= "0.6", path = "../common", package = "tantivy-common" } -tantivy-bitpacker = { version= "0.5", path = "../bitpacker/" } +stacker = { version= "0.3", path = "../stacker", package="tantivy-stacker"} +sstable = { version= "0.3", path = "../sstable", package = "tantivy-sstable" } +common = { version= "0.7", path = "../common", package = "tantivy-common" } +tantivy-bitpacker = { version= "0.6", path = "../bitpacker/" } serde = "1.0.152" downcast-rs = "1.2.0" diff --git a/common/Cargo.toml b/common/Cargo.toml index 91765b8f7e..a04bfcdb3f 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-common" -version = "0.6.0" +version = "0.7.0" authors = ["Paul Masurel ", "Pascal Seitz "] license = "MIT" edition = "2021" @@ -14,7 +14,7 @@ repository = "https://github.com/quickwit-oss/tantivy" [dependencies] byteorder = "1.4.3" -ownedbytes = { version= "0.6", path="../ownedbytes" } +ownedbytes = { version= "0.7", path="../ownedbytes" } async-trait = "0.1" time = { version = "0.3.10", features = ["serde-well-known"] } serde = { version = "1.0.136", features = ["derive"] } diff --git a/ownedbytes/Cargo.toml b/ownedbytes/Cargo.toml index 8f990b3d37..2391dbef6f 100644 --- a/ownedbytes/Cargo.toml +++ b/ownedbytes/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Paul Masurel ", "Pascal Seitz "] name = "ownedbytes" -version = "0.6.0" +version = "0.7.0" edition = "2021" description = "Expose data as static slice" license = "MIT" diff --git a/query-grammar/Cargo.toml b/query-grammar/Cargo.toml index 26be4e72a6..b9fecb25ab 100644 --- a/query-grammar/Cargo.toml +++ b/query-grammar/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-query-grammar" -version = "0.21.0" +version = "0.22.0" authors = ["Paul Masurel "] license = "MIT" categories = ["database-implementations", "data-structures"] diff --git a/sstable/Cargo.toml b/sstable/Cargo.toml index 7cce076961..91d629229b 100644 --- a/sstable/Cargo.toml +++ b/sstable/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-sstable" -version = "0.2.0" +version = "0.3.0" edition = "2021" license = "MIT" homepage = "https://github.com/quickwit-oss/tantivy" @@ -10,8 +10,8 @@ categories = ["database-implementations", "data-structures", "compression"] description = "sstables for tantivy" [dependencies] -common = {version= "0.6", path="../common", package="tantivy-common"} -tantivy-bitpacker = { version= "0.5", path="../bitpacker" } +common = {version= "0.7", path="../common", package="tantivy-common"} +tantivy-bitpacker = { version= "0.6", path="../bitpacker" } tantivy-fst = "0.5" # experimental gives us access to Decompressor::upper_bound zstd = { version = "0.13", features = ["experimental"] } diff --git a/stacker/Cargo.toml b/stacker/Cargo.toml index 80062ac418..1702940cd0 100644 --- a/stacker/Cargo.toml +++ b/stacker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-stacker" -version = "0.2.0" +version = "0.3.0" edition = "2021" license = "MIT" homepage = "https://github.com/quickwit-oss/tantivy" @@ -9,7 +9,7 @@ description = "term hashmap used for indexing" [dependencies] murmurhash32 = "0.3" -common = { version = "0.6", path = "../common/", package = "tantivy-common" } +common = { version = "0.7", path = "../common/", package = "tantivy-common" } ahash = { version = "0.8.11", default-features = false, optional = true } rand_distr = "0.4.3" diff --git a/tokenizer-api/Cargo.toml b/tokenizer-api/Cargo.toml index e8e47589f0..0ebcbb89ae 100644 --- a/tokenizer-api/Cargo.toml +++ b/tokenizer-api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy-tokenizer-api" -version = "0.2.0" +version = "0.3.0" license = "MIT" edition = "2021" description = "Tokenizer API of tantivy"