Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stem prefix items #30553

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class Matching implements Cloneable, Serializable {

/** Maximum number of characters to consider when searching in this field. Used for limiting resources, especially in streaming search. */
private Integer maxLength;

/** Maximum number of occurrences for each term */
private Integer maxTermOccurrences;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ public class NoPrefixForIndexes implements Validator {
@Override
public void validate(Context context) {
for (SearchCluster cluster : context.model().getSearchClusters()) {
if (cluster instanceof IndexedSearchCluster) {
IndexedSearchCluster sc = (IndexedSearchCluster) cluster;
if (cluster instanceof IndexedSearchCluster sc) {
for (DocumentDatabase docDb : sc.getDocumentDbs()) {
DerivedConfiguration sdConfig = docDb.getDerivedConfiguration();
Schema schema = sdConfig.getSchema();
Expand Down
25 changes: 20 additions & 5 deletions container-search/abi-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -524,9 +524,12 @@
"methods" : [
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
"public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.ExactStringItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
"public java.lang.String stringValue()"
"public java.lang.String stringValue()",
"public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
Expand Down Expand Up @@ -914,6 +917,7 @@
"public"
],
"methods" : [
"public com.yahoo.prelude.query.MarkerWordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public boolean isStartAnchor()",
"public boolean isEndAnchor()",
"protected java.lang.String getEncodedWord()",
Expand All @@ -923,7 +927,8 @@
"public static com.yahoo.prelude.query.MarkerWordItem createStartOfHost(java.lang.String)",
"public static com.yahoo.prelude.query.MarkerWordItem createStartOfHost()",
"public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost(java.lang.String)",
"public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost()"
"public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost()",
"public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
Expand Down Expand Up @@ -1296,9 +1301,12 @@
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
"public void <init>(java.lang.String, java.lang.String)",
"public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.PrefixItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
"public java.lang.String stringValue()"
"public java.lang.String stringValue()",
"public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
Expand Down Expand Up @@ -1622,9 +1630,12 @@
"methods" : [
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
"public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.SubstringItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
"public java.lang.String stringValue()"
"public java.lang.String stringValue()",
"public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
Expand All @@ -1637,9 +1648,12 @@
"methods" : [
"public void <init>(java.lang.String)",
"public void <init>(java.lang.String, boolean)",
"public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.SuffixItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
"public java.lang.String stringValue()"
"public java.lang.String stringValue()",
"public bridge synthetic com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)"
],
"fields" : [ ]
},
Expand Down Expand Up @@ -1962,6 +1976,7 @@
"public void <init>(com.yahoo.prelude.query.parser.Token, boolean)",
"public void <init>(java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public void <init>(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.WordItem newInstance(java.lang.String, java.lang.String, boolean, com.yahoo.prelude.query.Substring)",
"public com.yahoo.prelude.query.Item$ItemType getItemType()",
"public java.lang.String getName()",
"public void setWord(java.lang.String)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ public ExactStringItem(String substring, boolean isFromQuery) {
super(substring, isFromQuery);
}

public ExactStringItem(String word, String indexName, boolean isFromQuery, Substring origin) {
super(word, indexName, isFromQuery, origin);
}

public ExactStringItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
return new ExactStringItem(word, indexName, isFromQuery, origin);
}

@Override
public ItemType getItemType() {
return ItemType.EXACT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ private MarkerWordItem(String publicSymbol, String markerWord, String indexName)
this.markerWord = markerWord;
}

private MarkerWordItem(String publicSymbol, String markerWord, String indexName, boolean isFromQuery, Substring origin) {
super(publicSymbol, indexName);
this.markerWord = markerWord;
}

/** Returns a new instance of this kind of WordItem, initialized with the given data and nothing else. */
public MarkerWordItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
return new MarkerWordItem(word, markerWord, indexName, isFromQuery, origin);
}

public boolean isStartAnchor() { return getWord().equals(startAnchor); }

public boolean isEndAnchor() { return getWord().equals(endAnchor); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ public PrefixItem(String prefix, boolean isFromQuery) {
super(prefix, isFromQuery);
}

public PrefixItem(String prefix, String indexName) { super(prefix, indexName); }
public PrefixItem(String prefix, String indexName) {
super(prefix, indexName);
}

public PrefixItem(String prefix, String indexName, boolean isFromQuery, Substring origin) {
super(prefix, indexName, isFromQuery, origin);
}

public PrefixItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
return new PrefixItem(word, indexName, isFromQuery, origin);
}

@Override
public ItemType getItemType() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ public SubstringItem(String substring, boolean isFromQuery) {
super(substring, isFromQuery);
}

public SubstringItem(String substring, String indexName, boolean isFromQuery, Substring origin) {
super(substring, indexName, isFromQuery, origin);
}

public SubstringItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
return new SubstringItem(word, indexName, isFromQuery, origin);
}

@Override
public ItemType getItemType() {
return ItemType.SUBSTRING;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ public SuffixItem(String suffix, boolean isFromQuery) {
super(suffix, isFromQuery);
}

public SuffixItem(String substring, String indexName, boolean isFromQuery, Substring origin) {
super(substring, indexName, isFromQuery, origin);
}

public SuffixItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
return new SuffixItem(word, indexName, isFromQuery, origin);
}

@Override
public ItemType getItemType() {
return ItemType.SUFFIX;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ public WordItem(String word, String indexName, boolean isFromQuery, Substring or
setWord(word);
}

/**
* Returns a new instance of this kind of WordItem, initialized with the given data and any other
* fields belonging to the item subclass copied from this instance.
*/
public WordItem newInstance(String word, String indexName, boolean isFromQuery, Substring origin) {
return new WordItem(word, indexName, isFromQuery, origin);
}

public ItemType getItemType() {
return ItemType.WORD;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ private Item scan(Item item, StemContext context) {
}

private Item checkBlock(BlockItem b, StemContext context) {
if (b instanceof PrefixItem || !b.isWords()) return (Item) b;
if (!b.isWords()) return (Item) b;

if (b.isFromQuery() && !b.isStemmed()) {
Index index = context.indexFacts.getIndex(b.getIndexName());
Expand All @@ -190,10 +190,8 @@ private Substring getOffsets(BlockItem b) {

// The rewriting logic is here
private Item stem(BlockItem current, StemContext context, Index index) {
Item blockAsItem = (Item)current;
CompositeItem composite;
List<StemList> segments = linguistics.getStemmer().stem(current.stringValue(), index.getStemMode(), context.language);
if (segments.isEmpty()) return blockAsItem;
if (segments.isEmpty()) return (Item)current;

String indexName = current.getIndexName();
Substring substring = getOffsets(current);
Expand All @@ -203,6 +201,7 @@ private Item stem(BlockItem current, StemContext context, Index index) {
return (Item)w;
}

CompositeItem composite;
if (context.isCJK)
composite = chooseCompositeForCJK(current, ((Item) current).getParent(), indexName);
else
Expand All @@ -219,7 +218,7 @@ private Item stem(BlockItem current, StemContext context, Index index) {
if (composite instanceof AndSegmentItem) {
andSegmentConnectivity(current, context.reverseConnectivity, composite);
}
copyAttributes(blockAsItem, composite);
copyAttributes((Item)current, composite);
composite.lock();

if (composite instanceof PhraseSegmentItem replacement) {
Expand Down Expand Up @@ -320,7 +319,11 @@ private void setMetaData(BlockItem current, Map<Item, TaggableItem> reverseConne

private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName,
Substring substring) {
WordItem replacement = new WordItem(stem, indexName, true, substring);
WordItem replacement;
if (blockAsItem instanceof WordItem) // preserve the WordItem subclass type
replacement = ((WordItem)blockAsItem).newInstance(stem, indexName, true, substring);
else
replacement = new WordItem(stem, indexName, true, substring);
replacement.setStemmed(true);
copyAttributes(blockAsItem, replacement);
return replacement;
Expand Down