首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >ElasticSearch索引中的同义词用法/语法

ElasticSearch索引中的同义词用法/语法
EN

Stack Overflow用户
提问于 2020-04-06 19:22:56
回答 1查看 142关注 0票数 1

我是ElasticSearch的新手。我在ES中尝试一个简单的家谱项目,并希望使用同义词作为名字和姓氏。我有以下的ElasticSearch索引设置,我希望添加两个同义词分析器到我的设置,然后使用这两个做搜索时间同义词处理不同的字段。我不知道如何将正确的条目添加到“安装”JSON文件中

我的两个同义词文件名为given_synonyms.txtsurname_synonyms.txt.我非常希望在为这个json文件找到正确的语法方面提供一些帮助。

我为我想做的事情添加了非常通用的psudo代码,希望这样对读者更有意义。

代码语言:javascript
复制
    {
      "settings": {
        "index": {
          "number_of_shards": "128",
          "number_of_replicas": "0",
          "analysis": {
            "filter": {
              "dbl_metaphone": {
                "type":    "phonetic",
                "encoder": "double_metaphone",
                "max_code_len" : 5
              }
            },
            "analyzer": {
              "dbl_metaphone": {
                "tokenizer": "standard",
                "filter":    "dbl_metaphone"
              }
            }
    I ASSUME THE SYNONYM ENTRIES GO HERE FOR THE TWO SYNONYM FILES - BUT I DON'T KNOW HOW THAT SHOULD BE DONE
          }
        }
      },
      "mappings": {
        "test": {
          "_all": {
            "enabled": false
          },
          "_source": {
            "enabled": true
          },
          "properties": {
            "GivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
                I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
              }
            },
            "Surnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
                I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
              }
            },
            "FatherGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "FatherSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "MotherGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "MotherSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "SpouseGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "SpouseSurnames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE SURNAME_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "ChildrenGivenNames": {
              "type": "keyword",
              "index_options": "freqs",
              "store": "false",
              "similarity": "boolean",
              "norms": "false",
              "fields": {
                "phonetic": {
                  "type": "text",
                  "analyzer": "dbl_metaphone"
                }
              }
              I WANT TO USE THE GIVEN_SYNONYMS SEARCH ANALYZER HERE IN ADDITION TO THE DBL_METAPHONE
            },
            "BirthYears": {
              "type": "short"
            },
            "BirthLocations": {
              "type": "integer"
            },
            "DeathYears": {
              "type": "short"
            },
            "DeathLocations": {
              "type": "integer"
            },
            "MarriageLocations": {
              "type": "integer"
            },
            "MarriageYears": {
              "type": "integer"
            },
            "ResidenceLocations": {
              "type": "integer"
            }
          }
        }
      }
    }
EN

回答 1

Stack Overflow用户

发布于 2020-04-06 23:42:44

首先,让我们看看同义词分析器以及如何包含该文件。在这里,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-tokenfilter.html,您可以看到如何配置它。你提到了两个文件。您最好定义一个包含或同义词的文件。

代码语言:javascript
复制
{
  "settings": {
    "index": {
      "number_of_shards": "128",
      "number_of_replicas": "0",
      "analysis": {
        "filter": {
          "dbl_metaphone": {
            "type": "phonetic",
            "encoder": "double_metaphone",
            "max_code_len": 5
          },
          "given_synonyms": {
            "type": "synonym",
            "synonyms_path": "analysis/given_synonyms.txt"
          },
          "surname_synonyms": {
            "type": "synonym",
            "synonyms_path": "analysis/surname_synonyms.txt"
          }
        },
        "analyzer": {
          "dbl_metaphone": {
            "tokenizer": "standard",
            "filter": "dbl_metaphone"
          },
          "given_synonyms": {
            "tokenizer": "whitespace",
            "filter": "given_synonyms"
          },
          "surname_synonyms": {
            "tokenizer": "whitespace",
            "filter": "surname_synonyms"
          }
        }
      }
    }
  }
}

请记住,当对同义词使用文件(或多个文件)时,需要确保每个elasticsearch节点都可以访问该文件。另一种方法是在“设置”部分中指定同义词。如果同义词的数量不是很大,或者在使elasticsearch节点可以访问文件方面存在问题,这可能会更好。您可以检查在文档中定义同义词的更多方法。

代码语言:javascript
复制
{
  "settings": {
    "index": {
      "number_of_shards": "128",
      "number_of_replicas": "0",
      "analysis": {
        "filter": {
          "dbl_metaphone": {
            "type": "phonetic",
            "encoder": "double_metaphone",
            "max_code_len": 5
          },
          "given_synonyms": {
            "type": "synonym",
            "synonyms": [ ... YOUR SYNONYMS HERE ...]
          },
          "surname_synonyms": {
            "type": "synonym",
            "synonyms": [ ... YOUR SYNONYMS HERE ...]
          }
        },
        "analyzer": {
          "dbl_metaphone": {
            "tokenizer": "standard",
            "filter": "dbl_metaphone"
          },
          "given_synonyms": {
            "tokenizer": "standard",
            "filter": "given_synonyms"
          },
          "surname_synonyms": {
            "tokenizer": "standard",
            "filter": "surname_synonyms"
          }
        }
      }
    }
  }
}

为了达到最终的解决方案,您可以有这样的东西

代码语言:javascript
复制
{
  "settings": {
    "index": {
      "number_of_shards": "128",
      "number_of_replicas": "0",
      "analysis": {
        "filter": {
          "dbl_metaphone": {
            "type": "phonetic",
            "encoder": "double_metaphone",
            "max_code_len": 5
          },
          "given_synonyms": {
            "type": "synonym",
            "synonyms_path": "analysis/given_synonyms.txt"
          },
          "surname_synonyms": {
            "type": "synonym",
            "synonyms_path": "analysis/surname_synonyms.txt"
          }
        },
        "analyzer": {
          "dbl_metaphone": {
            "tokenizer": "standard",
            "filter": "dbl_metaphone"
          },
          "dbl_metaphone_given_synonym": {
            "tokenizer": "standard",
            "filter": [
              "given_synonyms",
              "dbl_metaphone"
            ]
          },
          "dbl_metaphone_surname_synonym": {
            "tokenizer": "standard",
            "filter": [
              "surname_synonyms",
              "dbl_metaphone"
            ]
          }
        }
      }
    }
  }
}

在这里,一共有三个分析器。其中两个是组合两个过滤器(第一个过滤器的输出是第二个过滤器的输入,所以顺序很重要)。在elasticsearch中,您可以指定一个字段将在索引时间内使用一个分析器进行分析,当某个输入搜索时,该输入将由另一个分析器进行分析。所以你可以这样做(从这里开始,https://www.elastic.co/guide/en/elasticsearch/reference/current/search-analyzer.html)

代码语言:javascript
复制
{
  "mappings": {
    "properties": {
      "text": {
        "type": "text",
        "analyzer": "autocomplete",
        "search_analyzer": "standard"
      }
    }
  }
}

所以你的设置/映射会变成这样

代码语言:javascript
复制
{
  "settings": {
    "index": {
      "number_of_shards": "128",
      "number_of_replicas": "0",
      "analysis": {
        "filter": {
          "dbl_metaphone": {
            "type": "phonetic",
            "encoder": "double_metaphone",
            "max_code_len": 5
          },
          "given_synonyms": {
            "type": "synonym",
            "synonyms_path": "analysis/given_synonyms.txt"
          },
          "surname_synonyms": {
            "type": "synonym",
            "synonyms_path": "analysis/surname_synonyms.txt"
          }
        },
        "analyzer": {
          "dbl_metaphone": {
            "tokenizer": "standard",
            "filter": "dbl_metaphone"
          },
          "dbl_metaphone_given_synonym": {
            "tokenizer": "standard",
            "filter": [
              "given_synonyms",
              "dbl_metaphone"
            ]
          },
          "dbl_metaphone_surname_synonym": {
            "tokenizer": "standard",
            "filter": [
              "surname_synonyms",
              "dbl_metaphone"
            ]
          }
        }
      }
    }
  },
  "mappings": {
    "test": {
      "_all": {
        "enabled": false
      },
      "_source": {
        "enabled": true
      },
      "properties": {
        "GivenNames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_given_synonym"
            }
          }
        },
        "Surnames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_surname_synonym"
            }
          }
        },
        "FatherGivenNames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_given_synonym"
            }
          }
        },
        "FatherSurnames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_surname_synonym"
            }
          }
        },
        "MotherGivenNames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_given_synonym"
            }
          }
        },
        "MotherSurnames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_given_synonym"
            }
          }
        },
        "SpouseGivenNames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_given_synonym"
            }
          }
        },
        "SpouseSurnames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_surname_synonym"
            }
          }
        },
        "ChildrenGivenNames": {
          "type": "keyword",
          "index_options": "freqs",
          "store": "false",
          "similarity": "boolean",
          "norms": "false",
          "fields": {
            "phonetic": {
              "type": "text",
              "analyzer": "dbl_metaphone",
              "search_analyzer": "dbl_metaphone_given_synonym"
            }
          }
        },
        "BirthYears": {
          "type": "short"
        },
        "BirthLocations": {
          "type": "integer"
        },
        "DeathYears": {
          "type": "short"
        },
        "DeathLocations": {
          "type": "integer"
        },
        "MarriageLocations": {
          "type": "integer"
        },
        "MarriageYears": {
          "type": "integer"
        },
        "ResidenceLocations": {
          "type": "integer"
        }
      }
    }
  }
}
票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/61067409

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档