From 5005762098177b7c0d59f4d2a2cbbf908c53b652 Mon Sep 17 00:00:00 2001 From: Manav Patel Date: Mon, 3 Feb 2025 15:21:54 -0800 Subject: [PATCH] Added docstrings, added verifySearchable to check if the field can be searched, fixed issues with min_chars --- .../server/analysis/PrefixWrappedAnalyzer.java | 17 +++++++++++++++++ .../nrtsearch/server/field/AtomFieldDef.java | 1 + .../nrtsearch/server/field/TextFieldDef.java | 12 ++++++------ .../field/properties/PrefixQueryable.java | 11 ++++++++++- .../server/query/QueryNodeMapper.java | 18 ++++-------------- .../server/field/PrefixFieldDefTest.java | 6 +++--- .../server/query/PrefixQueryTest.java | 5 +++-- 7 files changed, 44 insertions(+), 26 deletions(-) diff --git a/src/main/java/com/yelp/nrtsearch/server/analysis/PrefixWrappedAnalyzer.java b/src/main/java/com/yelp/nrtsearch/server/analysis/PrefixWrappedAnalyzer.java index 0447434d1..59eb1b62c 100644 --- a/src/main/java/com/yelp/nrtsearch/server/analysis/PrefixWrappedAnalyzer.java +++ b/src/main/java/com/yelp/nrtsearch/server/analysis/PrefixWrappedAnalyzer.java @@ -20,11 +20,23 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +/** + * An {@link AnalyzerWrapper} that wraps another analyzer and applies an Edge N-Gram token filter to + * the token stream. + */ public class PrefixWrappedAnalyzer extends AnalyzerWrapper { private final int minChars; private final int maxChars; private final Analyzer delegate; + /** + * Create a new {@link PrefixWrappedAnalyzer} that wraps the given {@link Analyzer} and sets + * applies an Edge N-Gram token filter to the token stream. + * + * @param delegate the analyzer to wrap + * @param minChars the minimum number of characters for the edge n-grams + * @param maxChars the maximum number of characters for the edge n-grams + */ public PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) { super(delegate.getReuseStrategy()); this.delegate = delegate; @@ -44,4 +56,9 @@ protected TokenStreamComponents wrapComponents( new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false); return new TokenStreamComponents(components.getSource(), filter); } + + @Override + public String toString() { + return "PrefixWrappedAnalyzer(" + delegate.toString() + ")"; + } } diff --git a/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java index f69d8ce88..800555bc3 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java @@ -153,6 +153,7 @@ public Query getRangeQuery(RangeQuery rangeQuery) { @Override public Query getPrefixQuery(PrefixQuery prefixQuery, MultiTermQuery.RewriteMethod rewriteMethod) { + verifySearchable("Prefix query"); return new org.apache.lucene.search.PrefixQuery( new Term(prefixQuery.getField(), prefixQuery.getPrefix()), rewriteMethod); } diff --git a/src/main/java/com/yelp/nrtsearch/server/field/TextFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/field/TextFieldDef.java index 94ffec658..4e50422bf 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/TextFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/TextFieldDef.java @@ -42,10 +42,7 @@ public TextFieldDef( String name, Field requestField, FieldDefCreator.FieldDefCreatorContext context) { super(name, requestField, context); if (requestField.hasIndexPrefixes()) { - if (!requestField.getSearch()) { - throw new IllegalArgumentException( - "Cannot set index_prefixes on unindexed field [" + name + "]"); - } + verifySearchable("Prefix query"); int minChars = requestField.getIndexPrefixes().hasMinChars() ? requestField.getIndexPrefixes().getMinChars() @@ -61,6 +58,8 @@ public TextFieldDef( getName(), Field.newBuilder() .setSearch(true) + .setAnalyzer(requestField.getAnalyzer()) + .setIndexAnalyzer(requestField.getIndexAnalyzer()) .setIndexPrefixes( IndexPrefixes.newBuilder() .setMinChars(minChars) @@ -129,7 +128,8 @@ public void parseDocumentField( @Override public Query getPrefixQuery(PrefixQuery prefixQuery, MultiTermQuery.RewriteMethod rewriteMethod) { - if (prefixFieldDef != null && prefixFieldDef.accept(prefixQuery.getPrefix().length())) { + verifySearchable("Prefix query"); + if (hasPrefix() && prefixFieldDef.accept(prefixQuery.getPrefix().length())) { Query query = prefixFieldDef.getPrefixQuery(prefixQuery); if (rewriteMethod == null || rewriteMethod == MultiTermQuery.CONSTANT_SCORE_REWRITE @@ -147,7 +147,7 @@ public void validatePrefix(int minChars, int maxChars) { throw new IllegalArgumentException( "min_chars [" + minChars + "] must be less than max_chars [" + maxChars + "]"); } - if (minChars < 2) { + if (minChars < 1) { throw new IllegalArgumentException("min_chars [" + minChars + "] must be greater than zero"); } if (maxChars >= 20) { diff --git a/src/main/java/com/yelp/nrtsearch/server/field/properties/PrefixQueryable.java b/src/main/java/com/yelp/nrtsearch/server/field/properties/PrefixQueryable.java index 70e1d0311..da9ac74a1 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/properties/PrefixQueryable.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/properties/PrefixQueryable.java @@ -1,5 +1,5 @@ /* - * Copyright 2020 Yelp Inc. + * Copyright 2025 Yelp Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,10 +15,19 @@ */ package com.yelp.nrtsearch.server.field.properties; +import com.yelp.nrtsearch.server.field.FieldDef; import com.yelp.nrtsearch.server.grpc.PrefixQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +/** Trait interface for {@link FieldDef} types that can be queried by prefix queries. */ public interface PrefixQueryable { + /** + * Build a prefix query for this field type with the given configuration. + * + * @param prefixQuery prefix query configuration + * @param rewriteMethod method to use for rewriting the prefix query + * @return lucene prefix query + */ Query getPrefixQuery(PrefixQuery prefixQuery, MultiTermQuery.RewriteMethod rewriteMethod); } diff --git a/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java b/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java index 3f8e645cb..7cacd37f4 100644 --- a/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java +++ b/src/main/java/com/yelp/nrtsearch/server/query/QueryNodeMapper.java @@ -20,7 +20,6 @@ import com.yelp.nrtsearch.server.analysis.AnalyzerCreator; import com.yelp.nrtsearch.server.doc.DocLookup; import com.yelp.nrtsearch.server.field.FieldDef; -import com.yelp.nrtsearch.server.field.IndexableFieldDef; import com.yelp.nrtsearch.server.field.TextBaseFieldDef; import com.yelp.nrtsearch.server.field.properties.*; import com.yelp.nrtsearch.server.grpc.ExistsQuery; @@ -51,7 +50,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.function.FunctionMatchQuery; import org.apache.lucene.queries.function.FunctionScoreQuery; @@ -567,23 +565,15 @@ private Query getExistsQuery(ExistsQuery existsQuery, IndexState state) { private static Query getPrefixQuery(PrefixQuery prefixQuery, IndexState state) { FieldDef fieldDef = state.getFieldOrThrow(prefixQuery.getField()); - if (!(fieldDef instanceof IndexableFieldDef)) { - throw new IllegalArgumentException( - "Field \"" + prefixQuery.getPrefix() + "\" is not indexable"); - } - IndexOptions indexOptions = ((IndexableFieldDef) fieldDef).getFieldType().indexOptions(); - if (indexOptions == IndexOptions.NONE) { - throw new IllegalArgumentException( - "Field \"" + prefixQuery.getField() + "\" is not indexed with terms"); - } - MultiTermQuery.RewriteMethod rewriteMethod = - getRewriteMethod(prefixQuery.getRewrite(), prefixQuery.getRewriteTopTermsSize()); if (!(fieldDef instanceof PrefixQueryable)) { throw new IllegalArgumentException( - "Field " + fieldDef.getName() + "does not support PrefixQuery"); + "Field " + fieldDef.getName() + " does not support PrefixQuery"); } + MultiTermQuery.RewriteMethod rewriteMethod = + getRewriteMethod(prefixQuery.getRewrite(), prefixQuery.getRewriteTopTermsSize()); + return ((PrefixQueryable) fieldDef).getPrefixQuery(prefixQuery, rewriteMethod); } diff --git a/src/test/java/com/yelp/nrtsearch/server/field/PrefixFieldDefTest.java b/src/test/java/com/yelp/nrtsearch/server/field/PrefixFieldDefTest.java index 27c14c4f5..16565e860 100644 --- a/src/test/java/com/yelp/nrtsearch/server/field/PrefixFieldDefTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/field/PrefixFieldDefTest.java @@ -70,12 +70,12 @@ public void testInvalidMinChars() { Field field = Field.newBuilder() .setSearch(true) - .setIndexPrefixes(IndexPrefixes.newBuilder().setMinChars(1).setMaxChars(5).build()) + .setIndexPrefixes(IndexPrefixes.newBuilder().setMinChars(0).setMaxChars(5).build()) .build(); createFieldDef(field); - fail("Should throw IllegalArgumentException for min_chars < 2"); + fail("Should throw IllegalArgumentException for min_chars < 1"); } catch (IllegalArgumentException e) { - assertEquals("min_chars [1] must be greater than zero", e.getMessage()); + assertEquals("min_chars [0] must be greater than zero", e.getMessage()); } } diff --git a/src/test/java/com/yelp/nrtsearch/server/query/PrefixQueryTest.java b/src/test/java/com/yelp/nrtsearch/server/query/PrefixQueryTest.java index 267c538fd..f49ee6811 100644 --- a/src/test/java/com/yelp/nrtsearch/server/query/PrefixQueryTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/query/PrefixQueryTest.java @@ -122,7 +122,7 @@ public void testFieldNotIndexable() { doQuery("virtual", "prefix"); fail(); } catch (StatusRuntimeException e) { - assertTrue(e.getMessage().contains("Field \"prefix\" is not indexable")); + assertTrue(e.getMessage().contains("Field virtual does not support PrefixQuery")); } } @@ -133,7 +133,8 @@ public void testFieldNoTerms() { fail(); } catch (StatusRuntimeException e) { assertTrue( - e.getMessage().contains("Field \"text_field.not_searchable\" is not indexed with terms")); + e.getMessage() + .contains("Prefix query requires field to be searchable: text_field.not_searchable")); } }