From 8376fae933ffc347829b380a745d24a20654af08 Mon Sep 17 00:00:00 2001 From: amber Date: Thu, 28 May 2026 09:54:43 +1000 Subject: [PATCH 1/5] add phrase query to improve acronym matching --- .../core/model/enumeration/CQLFields.java | 25 +++++++++++++++++++ .../server/core/service/ElasticSearch.java | 2 ++ 2 files changed, 27 insertions(+) diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java index 6d18a300..345bf313 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java @@ -269,6 +269,31 @@ public enum CQLFields implements CQLFieldsInterface { .query(literal))._toQuery(), null ), + // Phrase match on title for acronym-synonym support. + // When an acronym is expanded into its multi-word full name by the + // search-time synonym filter, match_phrase requires those words to + // appear consecutively and in order, which avoids the loose-word + // noise that match with Operator.And alone still allows. + // Note: match_phrase does not support fuzziness, so this is added + // ALONGSIDE fuzzy_title, not as a replacement for it. + phrase_title( + null, + StacBasicField.Title.displayField, + (literal) -> MatchPhraseQuery.of(m -> m + .field(StacBasicField.Title.searchField) + .boost(2.0F) // keep title weighting consistent with fuzzy_title + .query(literal))._toQuery(), + null + ), + // Phrase match on description for acronym-synonym support (ticket #8387). + phrase_desc( + null, + StacBasicField.Description.displayField, + (literal) -> MatchPhraseQuery.of(m -> m + .field(StacBasicField.Description.searchField) + .query(literal))._toQuery(), + null + ), // Contains cloud-optimized data assets_summary( StacBasicField.AssetsSummary.searchField, diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java index 5dc30e36..bac4052d 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java @@ -256,6 +256,8 @@ public ElasticSearchBase.SearchResult searchByParameters(Li for (String t : keywords) { should.add(CQLFields.fuzzy_title.getPropertyEqualToQuery(t)); should.add(CQLFields.fuzzy_desc.getPropertyEqualToQuery(t)); + should.add(CQLFields.phrase_title.getPropertyEqualToQuery(t)); + should.add(CQLFields.phrase_desc.getPropertyEqualToQuery(t)); should.add(CQLFields.parameter_vocabs.getPropertyEqualToQuery(t)); should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(t)); should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(t)); From 4dc883cbc2945abce5f01bbc47035dd28cd1c6ba Mon Sep 17 00:00:00 2001 From: amber Date: Thu, 28 May 2026 17:06:53 +1000 Subject: [PATCH 2/5] remove links_title workaround for acronym search --- .../server/core/model/enumeration/CQLFields.java | 15 --------------- .../core/model/enumeration/StacBasicField.java | 1 - .../ogcapi/server/core/service/ElasticSearch.java | 4 ---- 3 files changed, 20 deletions(-) diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java index 345bf313..26cb211a 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/CQLFields.java @@ -179,21 +179,6 @@ public enum CQLFields implements CQLFieldsInterface { null, null ), - links_title_contains( - StacBasicField.LinksTitle.searchField, - StacBasicField.LinksTitle.displayField, - (literal) -> NestedQuery.of(m -> m - .path(StacBasicField.Links.searchField) - // We want the words exact so need to add space in front and end - .query(q -> q - .match(mq -> mq - .field(StacBasicField.LinksTitle.searchField) - .query(literal) - ) - ) - )._toQuery(), - null - ), links_airole_contains( StacBasicField.LinksAiRole.searchField, StacBasicField.LinksAiRole.displayField, diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/StacBasicField.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/StacBasicField.java index 8556e1bc..f6de0e17 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/StacBasicField.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/model/enumeration/StacBasicField.java @@ -21,7 +21,6 @@ public enum StacBasicField { "summaries.organisation_vocabs" ), Links("links", "links"), - LinksTitle("links.title", "links.title"), LinksAiRole("links.ai:role", "links.ai:role"), Collection("collection", "collection", "collection.keyword"), AssetsSummary("assets", "assets"), diff --git a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java index bac4052d..5b7b51db 100644 --- a/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java +++ b/server/src/main/java/au/org/aodn/ogcapi/server/core/service/ElasticSearch.java @@ -262,10 +262,6 @@ public ElasticSearchBase.SearchResult searchByParameters(Li should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(t)); should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(t)); should.add(CQLFields.id.getPropertyEqualToQuery(t)); - // A request to not using acronym in title and description in metadata, hence these - // acronym moved to links, for example NRMN record is mentioned in the link title. - // This is a work-around to the requirement but still allow use of NRMN - should.add(CQLFields.links_title_contains.getPropertyEqualToQuery(t)); should.add(CQLFields.credit_contains.getPropertyEqualToQuery(t)); } } From d6119b62294bd2788514a93726f4906124d084da Mon Sep 17 00:00:00 2001 From: amber Date: Mon, 1 Jun 2026 09:50:00 +1000 Subject: [PATCH 3/5] remove NRMN-via-link test and update affected counts --- .../ogcapi/server/common/RestApiTest.java | 25 ++----------------- .../ogcapi/server/features/RestApiTest.java | 10 ++++---- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java index 89d97c78..649d8306 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java @@ -128,27 +128,6 @@ public void verifyApiCollectionsQueryOnText2() throws IOException { collections.getBody().getCollections().get(1).getId(), "Correct UUID - 9fdb1eee-bc28-43a9-88c5-972324784837"); } - /** - * Acronym is not encourage to use in title or description, so NRMN record is not found, the acronym usually - * appears in links title, this test is make sure NRMN record is found from link as well. - * @throws IOException - IO Exception - */ - @Test - public void verifyApiCollectionsQueryOnText3() throws IOException { - super.insertJsonToElasticRecordIndex( - // This is NRMN record where word NRMN not in title/desc but links - "8cdcdcad-399b-4bed-8cb2-29c486b6b124.json", - "7709f541-fc0c-4318-b5b9-9053aa474e0e.json" - ); - - // Call rest api directly and get query result - ResponseEntity collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q=NRMN", ExtendedCollections.class); - assertEquals(1, Objects.requireNonNull(collections.getBody()).getTotal(), "Only 1 hit"); - assertEquals( - "8cdcdcad-399b-4bed-8cb2-29c486b6b124", - collections.getBody().getCollections().get(0).getId(), - "Correct UUID - 8cdcdcad-399b-4bed-8cb2-29c486b6b124"); - } /** * The datetime field after xxx/.. xxx/ etc. It uses CQL internally so no need to test Before After During in CQL */ @@ -565,12 +544,12 @@ public void verifyCQLPropertyScore() throws IOException { // Lower score but the fuzzy is now with operator AND, therefore it will try to match all words 'dataset' and 'includes' with fuzzy collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q='dataset includes'&filter=score>=1", Collections.class); - assertEquals(3, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, with score 3"); + assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 1, with score 3"); assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011"); // Increase score will drop one record collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q='dataset includes'&filter=score>=3", Collections.class); - assertEquals(2, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 3, with score 3"); + assertEquals(1, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 3, with score 3"); assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011"); } diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java index e1960270..c88a00e0 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/features/RestApiTest.java @@ -239,7 +239,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { "Record return size correct" ); // Total number of record should be this - assertEquals(5, collections.getBody().getTotal(), "Get total works"); + assertEquals(4, collections.getBody().getTotal(), "Get total works"); // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); @@ -268,7 +268,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { "Record return size correct" ); // Total number of record should be this as the same search criteria applies - assertEquals(5, collections.getBody().getTotal(), "Get total works"); + assertEquals(4, collections.getBody().getTotal(), "Get total works"); // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); @@ -297,7 +297,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException { "Record return size correct, total hit is 4, we move to the third record" ); // Total number of record should be this as the same search criteria applies - assertEquals(5, collections.getBody().getTotal(), "Get total works"); + assertEquals(4, collections.getBody().getTotal(), "Get total works"); // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); @@ -355,7 +355,7 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException { "Record return size correct" ); // Total number of record should be this - assertEquals(5, collections.getBody().getTotal(), "Get total works"); + assertEquals(4, collections.getBody().getTotal(), "Get total works"); // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); @@ -395,7 +395,7 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException { "Record return size correct" ); // Total number of record should be this as the same search criteria applies - assertEquals(5, collections.getBody().getTotal(), "Get total works"); + assertEquals(4, collections.getBody().getTotal(), "Get total works"); // The search after give you the value to go to next batch assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields"); From e7c84262cc4f34fea0c92438f15a5323dd9b9676 Mon Sep 17 00:00:00 2001 From: amber Date: Mon, 1 Jun 2026 09:51:00 +1000 Subject: [PATCH 4/5] verify acronym expands to full form in search --- .../ogcapi/server/common/RestApiTest.java | 24 ++++++++++++++++ .../databag/acronym_demo_only_acronym.json | 17 +++++++++++ .../databag/acronym_demo_only_fullname.json | 17 +++++++++++ .../databag/acronym_demo_unrelated.json | 17 +++++++++++ .../portal_records_index_schema.json | 28 +++++++++++++------ 5 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 server/src/test/resources/databag/acronym_demo_only_acronym.json create mode 100644 server/src/test/resources/databag/acronym_demo_only_fullname.json create mode 100644 server/src/test/resources/databag/acronym_demo_unrelated.json diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java index 649d8306..de50b173 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/common/RestApiTest.java @@ -128,6 +128,30 @@ public void verifyApiCollectionsQueryOnText2() throws IOException { collections.getBody().getCollections().get(1).getId(), "Correct UUID - 9fdb1eee-bc28-43a9-88c5-972324784837"); } + + /** Searching an acronym ("NRMN") matches records that only contain its full form ("National Reef Monitoring Network"). */ + @Test + public void verifyAcronymSynonymSearch() throws IOException { + super.insertJsonToElasticRecordIndex( + "acronym_demo_only_acronym.json", + "acronym_demo_only_fullname.json", + "acronym_demo_unrelated.json" + ); + + // Search the acronym -> should hit B (the full-name-only record) via synonym expansion. + ResponseEntity byAcronym = testRestTemplate.getForEntity( + getBasePath() + "/collections?q=NRMN", ExtendedCollections.class); + assertEquals(1, + Objects.requireNonNull(byAcronym.getBody()).getTotal(), + "Searching 'NRMN' should find the full-name-only record via synonym expansion" + ); + assertEquals( + "acdemo02-0000-0000-0000-000000000002", + byAcronym.getBody().getCollections().get(0).getId(), + "The matched record should be the full-name-only fixture (B)" + ); + } + /** * The datetime field after xxx/.. xxx/ etc. It uses CQL internally so no need to test Before After During in CQL */ diff --git a/server/src/test/resources/databag/acronym_demo_only_acronym.json b/server/src/test/resources/databag/acronym_demo_only_acronym.json new file mode 100644 index 00000000..c56f65ed --- /dev/null +++ b/server/src/test/resources/databag/acronym_demo_only_acronym.json @@ -0,0 +1,17 @@ +{ + "id": "acdemo01-0000-0000-0000-000000000001", + "title": "NRMN Reef Survey Data 2024", + "description": "Reef benthic cover survey data collected at standard NRMN sites along the Australian coast.", + "extent": { + "bbox": [[110.0, -45.0, 155.0, -10.0]], + "temporal": [["2024-01-01T00:00:00Z", "2024-12-31T23:59:59Z"]] + }, + "summaries": { + "score": 50, + "status": "completed", + "scope": { "code": "dataset", "name": "Demo - only acronym in title/description" }, + "parameter_vocabs": [], + "platform_vocabs": [], + "organisation_vocabs": [] + } +} diff --git a/server/src/test/resources/databag/acronym_demo_only_fullname.json b/server/src/test/resources/databag/acronym_demo_only_fullname.json new file mode 100644 index 00000000..72a54903 --- /dev/null +++ b/server/src/test/resources/databag/acronym_demo_only_fullname.json @@ -0,0 +1,17 @@ +{ + "id": "acdemo02-0000-0000-0000-000000000002", + "title": "National Reef Monitoring Network Sub-Facility Survey", + "description": "Long-term observations of reef benthic and fish communities conducted by the National Reef Monitoring Network across temperate and tropical Australia.", + "extent": { + "bbox": [[110.0, -45.0, 155.0, -10.0]], + "temporal": [["2024-01-01T00:00:00Z", "2024-12-31T23:59:59Z"]] + }, + "summaries": { + "score": 50, + "status": "completed", + "scope": { "code": "dataset", "name": "Demo - only full name in title/description" }, + "parameter_vocabs": [], + "platform_vocabs": [], + "organisation_vocabs": [] + } +} diff --git a/server/src/test/resources/databag/acronym_demo_unrelated.json b/server/src/test/resources/databag/acronym_demo_unrelated.json new file mode 100644 index 00000000..547534c7 --- /dev/null +++ b/server/src/test/resources/databag/acronym_demo_unrelated.json @@ -0,0 +1,17 @@ +{ + "id": "acdemo03-0000-0000-0000-000000000003", + "title": "Ocean Temperature Observations off Tasmania", + "description": "Hourly sea surface temperature measurements collected by moored buoys.", + "extent": { + "bbox": [[143.0, -45.0, 150.0, -40.0]], + "temporal": [["2024-01-01T00:00:00Z", "2024-12-31T23:59:59Z"]] + }, + "summaries": { + "score": 50, + "status": "completed", + "scope": { "code": "dataset", "name": "Demo - unrelated negative sample" }, + "parameter_vocabs": [], + "platform_vocabs": [], + "organisation_vocabs": [] + } +} diff --git a/server/src/test/resources/portal_records_index_schema.json b/server/src/test/resources/portal_records_index_schema.json index b0d9dec7..4fc991ae 100644 --- a/server/src/test/resources/portal_records_index_schema.json +++ b/server/src/test/resources/portal_records_index_schema.json @@ -5,17 +5,17 @@ "custom_analyser": { "type": "custom", "tokenizer": "standard", - "filter": [ - "lowercase", - "english_stop" - ] + "filter": ["lowercase", "english_stop"] + }, + "acronym_search_analyser": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "english_stop", "acronym_synonym_filter"] }, "shingle_analyser": { "type": "custom", "tokenizer": "standard", - "char_filter": [ - "html_strip" - ], + "char_filter": ["html_strip"], "filter": [ "lowercase", "asciifolding", @@ -36,6 +36,12 @@ "type": "stop", "stopwords": "_english_" }, + "acronym_synonym_filter": { + "type": "synonym_graph", + "synonyms": [ + "nrmn => national reef monitoring network" + ] + }, "shingle_filter": { "type": "shingle", "min_shingle_size": 2, @@ -57,7 +63,7 @@ "pattern": "\\b\\d+\\b", "replacement": "" }, - "token_limit" : { + "token_limit": { "type": "limit", "max_token_count": 350 }, @@ -94,6 +100,8 @@ }, "title": { "type": "text", + "analyzer": "custom_analyser", + "search_analyzer": "acronym_search_analyser", "fields": { "keyword": { "type": "keyword" @@ -150,7 +158,9 @@ } }, "description": { - "type": "text" + "type": "text", + "analyzer": "custom_analyser", + "search_analyzer": "acronym_search_analyser" }, "license": { "type": "keyword", From e2cc9556fc0b5acf1a3624833515e4884da90844 Mon Sep 17 00:00:00 2001 From: amber Date: Mon, 1 Jun 2026 10:51:37 +1000 Subject: [PATCH 5/5] fix test error --- .../ogcapi/server/service/ElasticSearchTest.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/server/src/test/java/au/org/aodn/ogcapi/server/service/ElasticSearchTest.java b/server/src/test/java/au/org/aodn/ogcapi/server/service/ElasticSearchTest.java index 07998e0d..c294b4a2 100644 --- a/server/src/test/java/au/org/aodn/ogcapi/server/service/ElasticSearchTest.java +++ b/server/src/test/java/au/org/aodn/ogcapi/server/service/ElasticSearchTest.java @@ -152,18 +152,16 @@ public void searchByParametersWithDoubleQuote() { } else { should.add(CQLFields.fuzzy_title.getPropertyEqualToQuery(term)); should.add(CQLFields.fuzzy_desc.getPropertyEqualToQuery(term)); + should.add(CQLFields.phrase_title.getPropertyEqualToQuery(term)); + should.add(CQLFields.phrase_desc.getPropertyEqualToQuery(term)); } should.add(CQLFields.parameter_vocabs.getPropertyEqualToQuery(term)); should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term)); should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term)); should.add(CQLFields.id.getPropertyEqualToQuery(term)); - should.add(BoolQuery.of(b -> b - .should(CQLFields.links_title_contains.getPropertyEqualToQuery(term)) - .boost(0.5f) // lower boost to reduce promotion of link-title-only matches - )._toQuery()); should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term)); } - assertEquals(8, should.size(), "Exact match should produce 8 queries (title + description + other fields)"); + assertEquals(7, should.size(), "Exact match should produce 7 queries (title + description + 5 other fields)"); assertTrue(should.get(0).isMatchPhrase(), "Title query should be MatchPhraseQuery"); assertTrue(should.get(1).isMatchPhrase(), "Description query should be MatchPhraseQuery"); } @@ -182,18 +180,16 @@ public void searchByParametersWithoutDoubleQuote() { } else { should.add(CQLFields.fuzzy_title.getPropertyEqualToQuery(term)); should.add(CQLFields.fuzzy_desc.getPropertyEqualToQuery(term)); + should.add(CQLFields.phrase_title.getPropertyEqualToQuery(term)); + should.add(CQLFields.phrase_desc.getPropertyEqualToQuery(term)); } should.add(CQLFields.parameter_vocabs.getPropertyEqualToQuery(term)); should.add(CQLFields.organisation_vocabs.getPropertyEqualToQuery(term)); should.add(CQLFields.platform_vocabs.getPropertyEqualToQuery(term)); should.add(CQLFields.id.getPropertyEqualToQuery(term)); - should.add(BoolQuery.of(b -> b - .should(CQLFields.links_title_contains.getPropertyEqualToQuery(term)) - .boost(0.5f) // lower boost to reduce promotion of link-title-only matches - )._toQuery()); should.add(CQLFields.credit_contains.getPropertyEqualToQuery(term)); } - assertEquals(8, should.size(), "Fuzzy match should produce 8 queries"); + assertEquals(9, should.size(), "Fuzzy match should produce 9 queries (fuzzy + phrase title/desc + 5 other fields)"); assertTrue(should.get(0).isMatch(), "fuzzy_title should be MatchQuery"); } }