Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ public final class Constants {
public static final String INDEX_SEARCH_MAX_RESULT_SET_SIZE = "atlas.graph.index.search.max-result-set-size";
public static final String INDEX_SEARCH_TYPES_MAX_QUERY_STR_LENGTH = "atlas.graph.index.search.types.max-query-str-length";
public static final String INDEX_SEARCH_TAGS_MAX_QUERY_STR_LENGTH = "atlas.graph.index.search.tags.max-query-str-length";
public static final String INDEX_SEARCH_SOLR_MAX_TOKEN_LENGTH = "atlas.graph.solr.index.search.max-token-length";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adds a new configurable constant tied to Solr’s default maxTokenLen (255).

Useful for operators (STARTS_WITH, ENDS_WITH, CONTAINS) that can’t safely be processed via the index when values exceed the token length.

Why this helps: avoids Solr truncation and allows search to fall back to the graph safely on long values.

public static final String INDEX_SEARCH_VERTEX_PREFIX_PROPERTY = "atlas.graph.index.search.vertex.prefix";
public static final String INDEX_SEARCH_VERTEX_PREFIX_DEFAULT = "$v$";
public static final String MAX_FULLTEXT_QUERY_STR_LENGTH = "atlas.graph.fulltext-max-query-str-length";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ public abstract class SearchProcessor {
public static final int MAX_RESULT_SIZE = getApplicationProperty(Constants.INDEX_SEARCH_MAX_RESULT_SET_SIZE, 150);
public static final int MAX_QUERY_STR_LENGTH_TYPES = getApplicationProperty(Constants.INDEX_SEARCH_TYPES_MAX_QUERY_STR_LENGTH, 512);
public static final int MAX_QUERY_STR_LENGTH_TAGS = getApplicationProperty(Constants.INDEX_SEARCH_TAGS_MAX_QUERY_STR_LENGTH, 512);
public static final int SOLR_MAX_TOKEN_STR_LENGTH = getApplicationProperty(Constants.INDEX_SEARCH_SOLR_MAX_TOKEN_LENGTH, 255);
public static final String INDEX_SEARCH_PREFIX = AtlasGraphUtilsV2.getIndexSearchPrefix();
public static final String AND_STR = " AND ";
public static final String EMPTY_STRING = "";
Expand Down Expand Up @@ -671,6 +672,10 @@ protected AtlasGraphQuery toGraphFilterQuery(Set<? extends AtlasStructType> stru

for (AtlasStructType structType : structTypes) {
String qualifiedName = structType.getVertexPropertyName(criteria.getAttributeName());
if (isIndexSearchable(criteria, structType)) {
Copy link
Contributor

@chaitalicod chaitalicod May 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fix was tested for how many hive_columns as for a hive_table for multiple hive_tables with this huge qualifiedname and name using basic search ?

LOG.debug("toGraphFilterQuery() ==> skipped attribute: {}, filter value: {}, and operator: {}", criteria.getAttributeName(), criteria.getAttributeValue(), criteria.getOperator());
continue;
}

if (filterAttributes.contains(qualifiedName)) {
String attrName = criteria.getAttributeName();
Expand Down Expand Up @@ -866,6 +871,10 @@ private boolean isIndexSearchable(FilterCriteria filterCriteria, AtlasStructType
} else if (operator == SearchParameters.Operator.CONTAINS && AtlasAttribute.hastokenizeChar(attributeValue) && indexType == null) { // indexType = TEXT
LOG.debug("{} operator found for string (TEXT) attribute {} and special characters found in filter value {}, deferring to in-memory or graph query (might cause poor performance)", operator, qualifiedName, attributeValue);

ret = false;
} else if ((operator == SearchParameters.Operator.STARTS_WITH || operator == SearchParameters.Operator.ENDS_WITH || operator == SearchParameters.Operator.CONTAINS) && attributeValue.length() > SOLR_MAX_TOKEN_STR_LENGTH) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bypasses index for these operators when value length exceeds the limit.

Falls back to a graph traversal!

LOG.debug("{} operator found for string attribute {} (SOLR MAX TOKEN STR LENGTH:{}) and filter value {}, deferring to in-memory or graph query (might cause poor performance)", operator, qualifiedName, SOLR_MAX_TOKEN_STR_LENGTH, attributeValue);

ret = false;
}
}
Expand Down Expand Up @@ -920,6 +929,10 @@ private String toIndexQuery(Set<? extends AtlasStructType> structTypes, FilterCr
ArrayList<String> orExpQuery = new ArrayList<>();

for (AtlasStructType structType : structTypes) {
if (!isIndexSearchable(criteria, structType)) {
LOG.debug("toIndexQuery() ==> skipped attribute: {}, and filter value: {}, and operator: {}", criteria.getAttributeName(), criteria.getAttributeValue(), criteria.getOperator());
continue;
}
String name = structType.getVertexPropertyName(criteria.getAttributeName());

if (filterAttributes.contains(name)) {
Expand Down
61 changes: 59 additions & 2 deletions repository/src/test/java/org/apache/atlas/BasicTestSetup.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ public abstract class BasicTestSetup extends AtlasTestBase {
protected static final String HIVE_TABLE_TYPE = "hive_table";
protected static final String DATASET_SUBTYPE = "Asset";
protected static final String HDFS_PATH = "hdfs_path";
protected static final String COLUMN_TYPE = "hive_column";

private static final String COLUMN_TYPE = "hive_column";
private static final String HIVE_PROCESS_TYPE = "hive_process";
private static final String STORAGE_DESC_TYPE = "hive_storagedesc";
private static final String VIEW_TYPE = "hive_process";
Expand Down Expand Up @@ -289,6 +289,45 @@ public AtlasEntity.AtlasEntitiesWithExtInfo hiveTestEntities() {

entities.add(datasetSubType);

List<AtlasEntity> sampleColumns = ImmutableList.of(column("sm_time_id", "int", "time id"),
column("sm_product_id", "int", "product id"),
column("sm_customer_id", "int", "customer id"),
column("sm_sales", "double", "product id"),
column("sm_test", "int", "test 1"),
column("sm_test_limit", "int", "test limit 1"));

entities.addAll(sampleColumns);

AtlasEntity sampleTable = table("sample_table", "sample table", salesDB, sd, "Dev 1", "Managed", sampleColumns);
sampleTable.setAttribute("createTime", new Date(2018, 01, 01));

entities.add(sampleTable);

List<AtlasEntity> longTableNameColumns = ImmutableList.of(column("l_id", "int", "time id"),
column("l_product_id", "int", "product id"),
column("l_customer_id", "int", "customer id"),
column("l_sales", "double", "product id"),
column("l_test", "int", "test 1"));

entities.addAll(longTableNameColumns);

AtlasEntity longTableName = table("rltdvhrxhocivajyqojaxulwzhgzgzpkfolziacfnndzncjkthzeaeykdhhrjqdeibhdgiepwqkinvqzqxevushydtwjaabzgbfjmzvcbsoxewruhyhciyjefzsnokxvbeiiowzbhlkmujcnwilslgeswobzwwvkugyupsemqxsbdcmgrlpxmeuljvxyddvpccvcloupjorziwhogwnjvsdrwksvrbxcxjlcrcmrvvmbdmenafmvgrqzcaqbgpnhxiqbvxcbnudafsmjzvlzzzzpqmjkngbximmbjbijrqfb", "table name length 300", salesDB, sd, "Dev 2", "Managed", longTableNameColumns);
longTableName.setAttribute("createTime", new Date(2025, 05, 16));

entities.add(longTableName);

List<AtlasEntity> longTokenizeTableNameColumns = ImmutableList.of(column("l_tknz_id", "int", "time id"),
column("l_tknz_product_id", "int", "product id"),
column("l_tknz_sales", "double", "product id"),
column("l_tknz_test", "int", "test 1"));

entities.addAll(longTokenizeTableNameColumns);

AtlasEntity longTokenizeTableName = table("rrrrtokenizeeeeivajaxulwzhgzgzpkfoianndzncjkthzeaeykdhhrjqdeibhdgiepwqkinvqzqxevushydtwjaabzgbfjmzvcbsoxewruhyhciyjefzsnokxvbeiiowzbhlkmujcnwilslgeswobzwwvkugyupsemqxsbdcmgrlpxmeuljvxyddvpccvcloupjogrqzcaqbgpnhxiqbvxcbnudafsmaajzvlzzzzpqmjkngbximmbjbijrq1", "table name length 300, Tokenized", salesDB, sd, "Dev 3", "Managed", longTokenizeTableNameColumns);
longTokenizeTableName.setAttribute("createTime", new Date(2025, 05, 16));

entities.add(longTokenizeTableName);

return new AtlasEntity.AtlasEntitiesWithExtInfo(entities);
}

Expand Down Expand Up @@ -317,6 +356,24 @@ public SearchParameters.FilterCriteria getSingleFilterCondition(String attName,
return filterCriteria;
}

public SearchParameters.FilterCriteria getSingleFilterCriteria(String attName, SearchParameters.Operator op, String attrValue) {
SearchParameters.FilterCriteria f1 = new SearchParameters.FilterCriteria();
f1.setAttributeName(attName);
f1.setOperator(op);
f1.setAttributeValue(attrValue);
return f1;
}

public SearchParameters getSearchParameters(String typeName, SearchParameters.FilterCriteria entityFilter, int limit) {
SearchParameters params = new SearchParameters();
params.setTypeName(typeName);
if (entityFilter != null) {
params.setEntityFilters(entityFilter);
}
params.setLimit(limit);
return params;
}

public void assignGlossary() {
try {
AtlasGlossary glossary = new AtlasGlossary();
Expand Down Expand Up @@ -530,7 +587,7 @@ protected AtlasEntity table(String name, String description, AtlasEntity db, Atl

AtlasEntity table = new AtlasEntity(HIVE_TABLE_TYPE);
table.setAttribute("name", name);
table.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, dbName + "." + name);
table.setAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, dbName + "." + name + "@" + clusterName);
table.setAttribute("description", description);
table.setAttribute("owner", owner);
table.setAttribute("tableType", tableType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public void term() throws AtlasBaseException {

params.setTermName(SALES_TERM + "@" + SALES_GLOSSARY);

assertSearchProcessorWithoutMarker(params, 10);
assertSearchProcessorWithoutMarker(params, 13);
}

// TSP execute and CSP,ESP filter
Expand All @@ -130,7 +130,7 @@ public void termEntity() throws AtlasBaseException {
params.setTermName(SALES_TERM + "@" + SALES_GLOSSARY);
params.setTypeName(HIVE_TABLE_TYPE);

assertSearchProcessorWithoutMarker(params, 10);
assertSearchProcessorWithoutMarker(params, 13);
}

@Test
Expand Down Expand Up @@ -458,7 +458,7 @@ public void termMarker() throws AtlasBaseException {
params.setTermName(SALES_TERM + "@" + SALES_GLOSSARY);
params.setMarker("*");

assertSearchProcessorWithoutMarker(params, 10);
assertSearchProcessorWithoutMarker(params, 13);
}

@Test
Expand Down Expand Up @@ -667,8 +667,8 @@ public void searchWithEntityQuickSearchSortAsc() throws AtlasBaseException {

assertTrue(CollectionUtils.isNotEmpty(list));
assertEquals(list.size(), 3);
assertTrue(list.get(0).getAttribute("owner").toString().equalsIgnoreCase("Jane BI"));
assertTrue(list.get(1).getAttribute("owner").toString().equalsIgnoreCase("Joe"));
assertTrue(list.get(0).getAttribute("owner").toString().equalsIgnoreCase("Dev 1"));
assertTrue(list.get(1).getAttribute("owner").toString().equalsIgnoreCase("Dev 2"));
}

@Test
Expand All @@ -688,7 +688,7 @@ public void searchWithEntityQuickSearchSortDesc() throws AtlasBaseException {
assertTrue(CollectionUtils.isNotEmpty(list));
assertEquals(list.size(), 3);
assertTrue(list.get(0).getDisplayText().equalsIgnoreCase("time_dim"));
assertTrue(list.get(1).getDisplayText().equalsIgnoreCase("sales_fact_monthly_mv"));
assertTrue(list.get(1).getDisplayText().equalsIgnoreCase("sample_table"));
}

@Test
Expand Down
Loading