ES-模糊查询

模糊查询

1 wildcard

准备数据

POST demolike/_bulk
{"index": {"_id": "1"}
}
{"text": "草莓熊是个大坏蛋"
}
{"index": {"_id": "2"}
}
{"text": "wolf 也是一个坏蛋"
}
{"index": {"_id": "3"}
}
{"text": "我们一起去看小姐姐"
}
{"index": {"_id": "4"}
}
{"text": "真相只有一个"
}

使用案例

GET demolike/_search  
{"query": {"wildcard": {"text.keyword": {"value": "*坏蛋*"}}}
}GET demolike/_search  
{"query": {"wildcard": {"text.keyword": {"value": "*个*"}}}
}

在这里插入图片描述

正则

GET demolike/_search
{"query":{"regexp": {"text": "[\\s\\S]*是[\\s\\S]*"}}
}

在这里插入图片描述
-fuzzy(更适合用于生产环境)
拥有纠错的能力

POST demolikefu/_bulk
{"index":{"_id":"1"}}
{"text":"hello cat"}
{"index":{"_id":"2"}}
{"text":"hello fdsaf"}
{"index":{"_id":"3"}}
{"text":"hello cfasat"}
GET demolikefu/_search
{"query": {"fuzzy": {"text": {"value": "act", #fuzzy 会进行纠错"fuzziness": 1, #编辑距离 也就是可以进行多少次操作变成正确的字符 act -> cat c和a 交换就可已变成cat 编辑距离为1"transpositions": true #es 里面有两种算法 老算法:认为ac都移动了   新算法:交换只算移动了一次 false 是老算法 true是新算法}}}
}

在这里插入图片描述

前缀搜索

GET demolikefu/_search
{"query":{"match_phrase_prefix": {"text": "zhangsan and l" #会搜索出 zhangsan and list}}
}
#这个也是分词的 会搜索分词后的
GET demolike/_search
{"query":{"prefix": {"text": {"value": "是" #如果要搜整个句子 用 text.keyword}}}
}

ngram
性能会比 fuzzy 好，但是ngram会浪费空间，如果是要追求极致的性能一般使用ngram

PUT my_index
{"settings": {"analysis": {"filter": {"2_3_ngram": {"type": "ngram","min_gram": 2, #最小"max_gram": 3  #最大  比如she经过这个作用 sh he she 等}},"analyzer": {"my_ngram": {"type": "custom","filter": "2_3_ngram", #这个是在分词的基础上对每个单词进行分词"tokenizer": "standard" #这个是分词的 比如 hello world 分为 hello和world}}}},"mappings": { #建立索引的时候一般就默认 流量特别大的时候更合适用这个自定义的方式创建索引"properties": {"text": {"type": "text","analyzer": "my_ngram", #存储的时候怎么切分"search_analyzer": "standard" #查询语句怎么切分}}}
}

edge_ngram

put my_index
{"settings":{"analysis":{"filter":{"2_3_ngram":{"type":"ngram","min_gram":2,"max_gram":3}},"analyzer":{"my_ngram":{"type":"custom","filter":"2_3_ngram","tokenizer":"standard"}}}},"mappings":{"properties":{"text":{"type":"text","analyzer":"my_ngram","search_analyzer":"standard"}}}
}

从左向右切分，比ngram 切分的数量更少。

suggest

POST product_suggest/_bulk
{"index":{"_id" : 1}}
{"text":"你是一个小笨蛋"}
{"index":{"_id" : 2}}
{"text":"疯狂学习中"}
{"index":{"_id" : 3}}
{"text":"来呀摆烂躺平呀"}
{"index":{"_id" : 4}}
{"text":"我真的好想成为优秀的工程师"}PUT product_suggest{"mappings":{"properties":{"text":{"type":"text","analyzer":"ik_smart","fields":{"suggest":{"type":"completion", #补全"analyzer":"ik_smart"}}},"content":{"type":"text","analyzer":"ik_smart"}}}}#推荐补全
GET product_suggest/_search
{"suggest":{"my_suggest":{"prefix":"我", #suggest 中prefix是性能最好的"completion":{"field":"text.suggest"}} }
}