目录
5.5 bool 查询 : 合并多个过滤条件查询结果的布尔逻辑
5. 7 wildcards : 使用标准的shell通配符查询
注意: 版本要与es 环境保持一致
pip install elasticsearch==7.8.0
- # 使用python操作ElasticSearch
- from elasticsearch import Elasticsearch, helpers
- # 连接ES
- es = Elasticsearch(hosts="http://192.168.21.103:9200", request_timeout=3600)
说明 :指定mapping可以为对应字段构建索引,便于检索
- def create_index(es, index_name):
- mappings = {
- "settings": {
- "index": {
- "number_of_shards": 1,
- "number_of_replicas": 1
- }
- },
- "mappings": {
- "properties": {
- "id": {
- "type": "keyword"
- },
- "url": {
- "type": "keyword"
- },
- "summary": {
- "type": "text"
- },
- "author": {
- "properties": {
- "value": {
- "type": "text"
- }
- }
- },
- "title": {
- "type": "text"
- },
- "periodical": {
- "properties": {
- "name": {
- "type": "text"
- }
- }
- },
- "doi": {
- "type": "keyword"
- },
- "citations": {
- "type": "keyword"
- },
- "year": {
- "type": "keyword"
- },
-
- }
- }
- }
- # ignore : 如果索引已存在, 则忽略报错信息不进行创建
- if not es.indices.exists(index_name):
- result = es.indices.create(index=index_name, body=mappings, ignore=400)
- if result.get("acknowledged"):
- print("索引创建成功")
- else:
- print(f"索引创建失败:{result}")
- else:
- print("索引已存在无需重复创建!")
单条插入:
- def insert_data(es, index_name, data):
-
- if data.get("id"): # 指定id
- es.index(index=index_name, id=data.get("id"), body=data)
- print("插入成功")
- else: # 不指定id, 会自动生成id
- es.index(index=index_name, body=data)
批量插入
- from elasticsearch import helpers
-
- def bulk_list(es, index_name, data_list):
- """
- 批量插入数据
- :param es:
- :param index_name:
- :param data_list:
- """
- actions = []
- for data in data_list:
- action = {
- "_index": index_name,
- "_type": "_doc",
- "_id": data.get("id"),
- "_source": data
- }
- actions.append(action)
- if len(actions) % 100 == 0:
- helpers.bulk(es, actions)
- print(f"数据批量插入成功,数据大小:{len(actions)}, 索引:{index_name}")
- actions.clear()
- if actions:
- helpers.bulk(es, actions)
- print(f"数据批量插入成功,数据大小:{len(actions)}, 索引:{index_name}")
- actions.clear()
- def delete_index(es, index_name):
- if es.indices.exists(index_name):
- es.indices.delete(index_name)
- print("索引删除成功")
- else:
- print("索引不存在")
match_all : 查询到所有文档,默认返回一页
match :使用关键词match,默认根据_socre降序排列
multi_match : 同时搜索多个字段
match_phrase : 短语查询
- body = {
- "query": {
- "match_all": {
- }
- }
- }
-
- body = {
- "query": {
- "match": {
- "title": "Compliance, identification, and internalization three processes of attitude change"
- }
- }
- }
- # multi_match 查询–match查询的基础上同时搜索多个字段,在多个字段中同时查一个
- body = {
- "query": {
- "multi_match": {
- "query": "comprehensive",
- "fields": ["title", "summary"]
- }
- }
- }
- # 短语匹配
- body = {
- "query": {
- "match_phrase": {
- "title": "modern marketing"
- }
- }
- }
-
- es.search(index=index_name, body=body)
term : 过滤–term主要用于精确匹配哪些值,比如数字,日期,布尔值或 not_analyzed 的字符串(未经切词的文本数据类型)
terms : 允许指定多个匹配条件
- body = {
- "query": {
- "term": {
- "year": 1958
- }
- }
- }
-
- body = {
- "query": {
- "terms": {
- "year": [1958, 2010]
- }
- }
- }
- es.search(index=index_name, body=body)
- # 指定返回的数量大小
- body = {
- "query": {
- "match_all": {
- }
- },
- "from": 1, # 页码
- "size": 2 # 一页大小
- }
- es.search(index=index_name, body=body)
按照指定范围查询数据:
gt : 大于
gte: 大于等于
lt : 小于
lte : 小于等于
- body = {
- "query": {
- "range": {
- "year": {
- "gt": 2018
- }
- }
- }
- }
- es.search(index=index_name, body=body)
must :: 多个查询条件的完全匹配,相当于 and。
must_not :: 多个查询条件的相反匹配,相当于 not。
should :: 至少有一个查询条件匹配, 相当于 or。
- body = {
- "query": {
- "bool": {
- "must": [
- {"term": {"year": 1958}},
- {"term": {"doi": "10.5694/j.1326-5377.1958.tb67127.x"}}
- ]
- }
- }
- }
-
- body = {
- "query": {
- "bool": {
- "must": [
- {"term": {"year": 1958}},
- {"range": {"citations": {"gt": 3000}}}
- ]
- }
- }
- }
-
- body = {
- "query": {
- "bool": {
- "must": {
- "term": {"year": 1958}
- },
- "must_not": {
- "exists": {
- "field": "name"
- }
- }
- }
- }
- }
- es.search(index=index_name, body=body)
- # 查询存在year字段的数据
- body = {
- "query": {
- "exists": {
- "field": "year"
- }
- }
- }
- # 查询不存在year字段的数据
- body = {
- "query": {
- "bool": {
- "must_not": {
- "exists": {
- "field": "year"
- }
- }
-
- }
- }
- }
- # wildcards 查询–使用标准的shell通配符查询
- body = {
- "query": {
- "wildcard": {
- "title": "*Structure*"
- }
- }
- }
-
- # wildcards 查询–使用标准的shell通配符查询
- body = {
- "query": {
- "regexp": {
- "year": "20.*"
- }
- }
- }
- # prefix 查询 – 以什么字符开头的
- body = {
- "query": {
- "prefix": {
- "id": "f1803ea131a96817d14290077"
- }
- }
- }
按照id删除
es.delete(index=index_name, id='f1803ea131a96817d142900777cc1c73b41ee6c4')
删除符合条件的所有数据
- # 删除符合条件的所有数据
- body = {
- "query": {
- "match": {
- "year": 1958
- }
- }
- }
- es.delete_by_query(index=index_name, body=body)
lang: 指定脚本语言,painless是内置的脚本语言
script: 代表脚本内容,ctx 代表es上下文,_source代表当前的文档,
- # 修改字段值,如果没有这个字段会自动添加
- doc_body = {
- "doc": {
- "citations": 2532
- }
- }
- # 增加字段
- doc_body = {
- 'script': "ctx._source.source = 'kgPlat'"
- }
-
- # 字段
- doc_body = {
- 'script': "ctx._source.remove('source')"
- }
-
-
- id = "727f736f07d9b0fd5ad95208079a09ee506e99e2"
- es.update(index=index_name, id=id, body=doc_body)
-
-
- # update_by_query:更新满足条件的所有数据,写法同上删除和查询
- query = {
- "query": {
- "match": {
- "year": 1991
- }
- },
- "script": {
- "source": "ctx._source.citations = params.citations;ctx._source.citations2 = params.citations2",
- "lang": "painless",
- "params": {
- "citations": 0,
- "citations2": 0
- },
- }
- }
- es.update_by_query(index=index_name, body=query)