Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ public enum StacBasicField {
"summaries.organisation_vocabs"
),
Links("links", "links"),
LinksTitle("links.title", "links.title"),
LinksAiRole("links.ai:role", "links.ai:role"),
Collection("collection", "collection", "collection.keyword"),
AssetsSummary("assets", "assets"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,20 +286,16 @@ public ElasticSearchBase.SearchResult<StacCollectionModel> searchByParameters(Li
else {
should.add(CQLFields.fuzzy_title.getPropertyEqualToQuery(term));
should.add(CQLFields.fuzzy_desc.getPropertyEqualToQuery(term));
// Phrase match for acronym-synonym support (ticket #8387): when an acronym
// is expanded into its multi-word full name, match_phrase requires those
// words to appear consecutively, alongside (not replacing) fuzzy matching.
should.add(CQLFields.phrase_title.getPropertyEqualToQuery(term));
should.add(CQLFields.phrase_desc.getPropertyEqualToQuery(term));
}
should.add(CQLFields.parameter_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.id.getPropertyEqualToQuery(term));
// A request to not using acronym in title and description in metadata, hence these
// acronym moved to links, for example NRMN record is mentioned in the link title.
// This is a work-around to the requirement but still allow use of NRMN
// links_title_contains and credit_contains use match query by default, exact match is not applied here
// links_title_contains weighted lower as it may contain combined title+description content
should.add(BoolQuery.of(b -> b
.should(CQLFields.links_title_contains.getPropertyEqualToQuery(term))
.boost(0.5f) // lower boost to reduce promotion of link-title-only matches
)._toQuery());
should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,27 +128,30 @@ public void verifyApiCollectionsQueryOnText2() throws IOException {
collections.getBody().getCollections().get(1).getId(),
"Correct UUID - 9fdb1eee-bc28-43a9-88c5-972324784837");
}
/**
* Acronym is not encourage to use in title or description, so NRMN record is not found, the acronym usually
* appears in links title, this test is make sure NRMN record is found from link as well.
* @throws IOException - IO Exception
*/

/** Searching an acronym ("NRMN") matches records that only contain its full form ("National Reef Monitoring Network"). */
@Test
public void verifyApiCollectionsQueryOnText3() throws IOException {
public void verifyAcronymSynonymSearch() throws IOException {
super.insertJsonToElasticRecordIndex(
// This is NRMN record where word NRMN not in title/desc but links
"8cdcdcad-399b-4bed-8cb2-29c486b6b124.json",
"7709f541-fc0c-4318-b5b9-9053aa474e0e.json"
"acronym_demo_only_acronym.json",
"acronym_demo_only_fullname.json",
"acronym_demo_unrelated.json"
);

// Call rest api directly and get query result
ResponseEntity<ExtendedCollections> collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q=NRMN", ExtendedCollections.class);
assertEquals(1, Objects.requireNonNull(collections.getBody()).getTotal(), "Only 1 hit");
// Search the acronym -> should hit B (the full-name-only record) via synonym expansion.
ResponseEntity<ExtendedCollections> byAcronym = testRestTemplate.getForEntity(
getBasePath() + "/collections?q=NRMN", ExtendedCollections.class);
assertEquals(1,
Objects.requireNonNull(byAcronym.getBody()).getTotal(),
"Searching 'NRMN' should find the full-name-only record via synonym expansion"
);
assertEquals(
"8cdcdcad-399b-4bed-8cb2-29c486b6b124",
collections.getBody().getCollections().get(0).getId(),
"Correct UUID - 8cdcdcad-399b-4bed-8cb2-29c486b6b124");
"acdemo02-0000-0000-0000-000000000002",
byAcronym.getBody().getCollections().get(0).getId(),
"The matched record should be the full-name-only fixture (B)"
);
}

/**
* The datetime field after xxx/.. xxx/ etc. It uses CQL internally so no need to test Before After During in CQL
*/
Expand Down Expand Up @@ -565,7 +568,7 @@ public void verifyCQLPropertyScore() throws IOException {

// Lower score but the fuzzy is now with operator AND, therefore it will try to match all words 'dataset' and 'includes' with fuzzy
collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q='dataset includes'&filter=score>=1", Collections.class);
assertEquals(3, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, with score 3");
assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, with score 3");
assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011");

// Increase score will drop two record
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
"Record return size correct"
);
// Total number of record should be this
assertEquals(5, collections.getBody().getTotal(), "Get total works");
assertEquals(4, collections.getBody().getTotal(), "Get total works");

// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after have three values");
Expand Down Expand Up @@ -273,7 +273,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
"Record return size correct"
);
// Total number of record should be this as the same search criteria applies
assertEquals(5, collections.getBody().getTotal(), "Get total works");
assertEquals(4, collections.getBody().getTotal(), "Get total works");

// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after have three values");
Expand Down Expand Up @@ -302,7 +302,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
"Record return size correct, returns the 3 remaining matching docs"
);
// Total number of record should be this as the same search criteria applies
assertEquals(5, collections.getBody().getTotal(), "Get total works");
assertEquals(4, collections.getBody().getTotal(), "Get total works");

// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
Expand Down Expand Up @@ -370,7 +370,7 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException {
"Record return size correct"
);
// Total number of record should be this
assertEquals(5, collections.getBody().getTotal(), "Get total works");
assertEquals(4, collections.getBody().getTotal(), "Get total works");

// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
Expand Down Expand Up @@ -414,7 +414,7 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException {
"Record return size should be 3 or 4 (bc55eff4 borderline), got: " + returnedSize);

// Total number of record should be this as the same search criteria applies
assertEquals(5, collections.getBody().getTotal(), "Get total works");
assertEquals(4, collections.getBody().getTotal(), "Get total works");

// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,16 @@ public void searchByParametersWithDoubleQuote() {
} else {
should.add(CQLFields.fuzzy_title.getPropertyEqualToQuery(term));
should.add(CQLFields.fuzzy_desc.getPropertyEqualToQuery(term));
should.add(CQLFields.phrase_title.getPropertyEqualToQuery(term));
should.add(CQLFields.phrase_desc.getPropertyEqualToQuery(term));
}
should.add(CQLFields.parameter_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.id.getPropertyEqualToQuery(term));
should.add(BoolQuery.of(b -> b
.should(CQLFields.links_title_contains.getPropertyEqualToQuery(term))
.boost(0.5f) // lower boost to reduce promotion of link-title-only matches
)._toQuery());
should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term));
}
assertEquals(8, should.size(), "Exact match should produce 8 queries (title + description + other fields)");
assertEquals(7, should.size(), "Exact match should produce 7 queries (title + description + 5 other fields)");
assertTrue(should.get(0).isMatchPhrase(), "Title query should be MatchPhraseQuery");
assertTrue(should.get(1).isMatchPhrase(), "Description query should be MatchPhraseQuery");
}
Expand All @@ -182,18 +180,16 @@ public void searchByParametersWithoutDoubleQuote() {
} else {
should.add(CQLFields.fuzzy_title.getPropertyEqualToQuery(term));
should.add(CQLFields.fuzzy_desc.getPropertyEqualToQuery(term));
should.add(CQLFields.phrase_title.getPropertyEqualToQuery(term));
should.add(CQLFields.phrase_desc.getPropertyEqualToQuery(term));
}
should.add(CQLFields.parameter_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term));
should.add(CQLFields.id.getPropertyEqualToQuery(term));
should.add(BoolQuery.of(b -> b
.should(CQLFields.links_title_contains.getPropertyEqualToQuery(term))
.boost(0.5f) // lower boost to reduce promotion of link-title-only matches
)._toQuery());
should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term));
}
assertEquals(8, should.size(), "Fuzzy match should produce 8 queries");
assertEquals(9, should.size(), "Fuzzy match should produce 9 queries (fuzzy + phrase title/desc + 5 other fields)");
assertTrue(should.get(0).isMatch(), "fuzzy_title should be MatchQuery");
}
}
17 changes: 17 additions & 0 deletions server/src/test/resources/databag/acronym_demo_only_acronym.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"id": "acdemo01-0000-0000-0000-000000000001",
"title": "NRMN Reef Survey Data 2024",
"description": "Reef benthic cover survey data collected at standard NRMN sites along the Australian coast.",
"extent": {
"bbox": [[110.0, -45.0, 155.0, -10.0]],
"temporal": [["2024-01-01T00:00:00Z", "2024-12-31T23:59:59Z"]]
},
"summaries": {
"score": 50,
"status": "completed",
"scope": { "code": "dataset", "name": "Demo - only acronym in title/description" },
"parameter_vocabs": [],
"platform_vocabs": [],
"organisation_vocabs": []
}
}
17 changes: 17 additions & 0 deletions server/src/test/resources/databag/acronym_demo_only_fullname.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"id": "acdemo02-0000-0000-0000-000000000002",
"title": "National Reef Monitoring Network Sub-Facility Survey",
"description": "Long-term observations of reef benthic and fish communities conducted by the National Reef Monitoring Network across temperate and tropical Australia.",
"extent": {
"bbox": [[110.0, -45.0, 155.0, -10.0]],
"temporal": [["2024-01-01T00:00:00Z", "2024-12-31T23:59:59Z"]]
},
"summaries": {
"score": 50,
"status": "completed",
"scope": { "code": "dataset", "name": "Demo - only full name in title/description" },
"parameter_vocabs": [],
"platform_vocabs": [],
"organisation_vocabs": []
}
}
17 changes: 17 additions & 0 deletions server/src/test/resources/databag/acronym_demo_unrelated.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"id": "acdemo03-0000-0000-0000-000000000003",
"title": "Ocean Temperature Observations off Tasmania",
"description": "Hourly sea surface temperature measurements collected by moored buoys.",
"extent": {
"bbox": [[143.0, -45.0, 150.0, -40.0]],
"temporal": [["2024-01-01T00:00:00Z", "2024-12-31T23:59:59Z"]]
},
"summaries": {
"score": 50,
"status": "completed",
"scope": { "code": "dataset", "name": "Demo - unrelated negative sample" },
"parameter_vocabs": [],
"platform_vocabs": [],
"organisation_vocabs": []
}
}
Loading
Loading