我是ElasticSearch的新手。我在ES中尝试一个简单的家谱项目,并希望使用同义词作为名字和姓氏。我有以下的ElasticSearch索引设置,我希望添加两个同义词分析器到我的设置,然后使用这两个做搜索时间同义词处理不同的字段。我不知道如何将正确的条目添加到“安装”JSON文件中
我的两个同义词文件名为given_synonyms.txt和surname_synonyms.txt.我非常希望在为这个json文件找到正确的语法方面提供一些帮助。
我为我想做的事情添加了非常通用的psudo代码,希望这样对读者更有意义。
{
"settings": {
"index": {
"number_of_shards": "128",
"number_of_replicas": "0",
"analysis": {
"filter": {
"dbl_metaphone": {
"type": "phonetic",
"encoder": "double_metaphone",
"max_code_len" : 5
}
},
"analyzer": {
"dbl_metaphone": {
"tokenizer": "standard",
"filter": "dbl_metaphone"
}
}
I ASSUME THE SYNONYM ENTRIES GO HERE FOR THE TWO SYNONYM FILES - BUT I DON'T KNOW HOW THAT SHOULD BE DONE
}
}
},
"mappings": {
"test": {
"_all": {
"enabled": false
},
"_source": {
"enabled": true
},
"properties": {
"GivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
}
},
"Surnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
}
},
"FatherGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"FatherSurnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"MotherGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"MotherSurnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"SpouseGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"SpouseSurnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"ChildrenGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone"
}
}
I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
},
"BirthYears": {
"type": "short"
},
"BirthLocations": {
"type": "integer"
},
"DeathYears": {
"type": "short"
},
"DeathLocations": {
"type": "integer"
},
"MarriageLocations": {
"type": "integer"
},
"MarriageYears": {
"type": "integer"
},
"ResidenceLocations": {
"type": "integer"
}
}
}
}
}发布于 2020-04-06 23:42:44
首先,让我们看看同义词分析器以及如何包含该文件。在这里,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-tokenfilter.html,您可以看到如何配置它。你提到了两个文件。您最好定义一个包含或同义词的文件。
{
"settings": {
"index": {
"number_of_shards": "128",
"number_of_replicas": "0",
"analysis": {
"filter": {
"dbl_metaphone": {
"type": "phonetic",
"encoder": "double_metaphone",
"max_code_len": 5
},
"given_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/given_synonyms.txt"
},
"surname_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/surname_synonyms.txt"
}
},
"analyzer": {
"dbl_metaphone": {
"tokenizer": "standard",
"filter": "dbl_metaphone"
},
"given_synonyms": {
"tokenizer": "whitespace",
"filter": "given_synonyms"
},
"surname_synonyms": {
"tokenizer": "whitespace",
"filter": "surname_synonyms"
}
}
}
}
}
}请记住,当对同义词使用文件(或多个文件)时,需要确保每个elasticsearch节点都可以访问该文件。另一种方法是在“设置”部分中指定同义词。如果同义词的数量不是很大,或者在使elasticsearch节点可以访问文件方面存在问题,这可能会更好。您可以检查在文档中定义同义词的更多方法。
{
"settings": {
"index": {
"number_of_shards": "128",
"number_of_replicas": "0",
"analysis": {
"filter": {
"dbl_metaphone": {
"type": "phonetic",
"encoder": "double_metaphone",
"max_code_len": 5
},
"given_synonyms": {
"type": "synonym",
"synonyms": [ ... YOUR SYNONYMS HERE ...]
},
"surname_synonyms": {
"type": "synonym",
"synonyms": [ ... YOUR SYNONYMS HERE ...]
}
},
"analyzer": {
"dbl_metaphone": {
"tokenizer": "standard",
"filter": "dbl_metaphone"
},
"given_synonyms": {
"tokenizer": "standard",
"filter": "given_synonyms"
},
"surname_synonyms": {
"tokenizer": "standard",
"filter": "surname_synonyms"
}
}
}
}
}
}为了达到最终的解决方案,您可以有这样的东西
{
"settings": {
"index": {
"number_of_shards": "128",
"number_of_replicas": "0",
"analysis": {
"filter": {
"dbl_metaphone": {
"type": "phonetic",
"encoder": "double_metaphone",
"max_code_len": 5
},
"given_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/given_synonyms.txt"
},
"surname_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/surname_synonyms.txt"
}
},
"analyzer": {
"dbl_metaphone": {
"tokenizer": "standard",
"filter": "dbl_metaphone"
},
"dbl_metaphone_given_synonym": {
"tokenizer": "standard",
"filter": [
"given_synonyms",
"dbl_metaphone"
]
},
"dbl_metaphone_surname_synonym": {
"tokenizer": "standard",
"filter": [
"surname_synonyms",
"dbl_metaphone"
]
}
}
}
}
}
}在这里,一共有三个分析器。其中两个是组合两个过滤器(第一个过滤器的输出是第二个过滤器的输入,所以顺序很重要)。在elasticsearch中,您可以指定一个字段将在索引时间内使用一个分析器进行分析,当某个输入搜索时,该输入将由另一个分析器进行分析。所以你可以这样做(从这里开始,https://www.elastic.co/guide/en/elasticsearch/reference/current/search-analyzer.html)
{
"mappings": {
"properties": {
"text": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
}
}所以你的设置/映射会变成这样
{
"settings": {
"index": {
"number_of_shards": "128",
"number_of_replicas": "0",
"analysis": {
"filter": {
"dbl_metaphone": {
"type": "phonetic",
"encoder": "double_metaphone",
"max_code_len": 5
},
"given_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/given_synonyms.txt"
},
"surname_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/surname_synonyms.txt"
}
},
"analyzer": {
"dbl_metaphone": {
"tokenizer": "standard",
"filter": "dbl_metaphone"
},
"dbl_metaphone_given_synonym": {
"tokenizer": "standard",
"filter": [
"given_synonyms",
"dbl_metaphone"
]
},
"dbl_metaphone_surname_synonym": {
"tokenizer": "standard",
"filter": [
"surname_synonyms",
"dbl_metaphone"
]
}
}
}
}
},
"mappings": {
"test": {
"_all": {
"enabled": false
},
"_source": {
"enabled": true
},
"properties": {
"GivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_given_synonym"
}
}
},
"Surnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_surname_synonym"
}
}
},
"FatherGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_given_synonym"
}
}
},
"FatherSurnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_surname_synonym"
}
}
},
"MotherGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_given_synonym"
}
}
},
"MotherSurnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_given_synonym"
}
}
},
"SpouseGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_given_synonym"
}
}
},
"SpouseSurnames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_surname_synonym"
}
}
},
"ChildrenGivenNames": {
"type": "keyword",
"index_options": "freqs",
"store": "false",
"similarity": "boolean",
"norms": "false",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "dbl_metaphone",
"search_analyzer": "dbl_metaphone_given_synonym"
}
}
},
"BirthYears": {
"type": "short"
},
"BirthLocations": {
"type": "integer"
},
"DeathYears": {
"type": "short"
},
"DeathLocations": {
"type": "integer"
},
"MarriageLocations": {
"type": "integer"
},
"MarriageYears": {
"type": "integer"
},
"ResidenceLocations": {
"type": "integer"
}
}
}
}
}https://stackoverflow.com/questions/61067409
复制相似问题