空白的是ES渲染的问题,放大后看到几乎没有丢失数据
1. ES索引建立
PUT /town_fence
{"settings": {"number_of_shards": 1},"mappings": {"properties": {"province": {"type": "keyword"},"city": {"type": "keyword"},"district": {"type": "keyword"},"town": {"type": "keyword"},"fence" : {"type": "geo_shape"}}}
}
2. Python文件解析入ES
# 从excel文件解析乡镇围栏数据至ES中
import pandas as pd
from elasticsearch import helpers, Elasticsearchtown_fence_df = pd.read_csv("../data/town.csv", sep='@', encoding='UTF-8')def init_es_client(es_host):es = Elasticsearch(hosts=[es_host], verify_certs=False)return eses_client = init_es_client("http://127.0.0.1:9200")actions = list()
count = 0for index, item in town_fence_df.iterrows():info = dict()info["province"] = item["province"]info["city"] = item["city"]info["district"] = item["region"]info["town"] = item["town"]# 有的围栏是多块,如天津,分开编号写,否则报多边形自相交异常polygon_parent_arr = item["polyline"].split("|")id_index = 0try:for polygon in polygon_parent_arr:coordinates_parent = []coordinates = []polygon_arr = polygon.split(";")lng_lat_first = []lng_lat_last = []for i in range(0, len(polygon_arr)):lng_lat = polygon_arr[i]lng_lat_arr = lng_lat.split(",")coordinate = [round(float(lng_lat_arr[0]), 6), round(float(lng_lat_arr[1]), 6)]if i == 0:lng_lat_first = coordinateif i == len(polygon_arr) - 1:lng_lat_last = coordinatecoordinates.append(coordinate)# 保证围栏闭合if lng_lat_first[0] != lng_lat_last[0]:coordinates.append(lng_lat_first)coordinates_parent.append(coordinates)info["fence"] = {"type": "Polygon", "coordinates": coordinates_parent}unique_id = str(hash(info["province"] + info["city"] + info["district"] + info["town"]))my_id = unique_id + "_" + str(id_index) if id_index > 0 else unique_idaction = {"_op_type": "index","_index": "town_fence","_id": my_id,"_source": info.copy()}actions.append(action.copy())id_index += 1if len(actions) == 1:try:helpers.bulk(es_client, actions)count += len(actions)print(count)actions.clear()except Exception as e:town_fence_df.loc[index, 'flag'] = Falseactions.clear()except Exception as e:town_fence_df.loc[index, 'flag'] = Falseactions.clear()
if len(actions) > 0:helpers.bulk(es_client, actions)count += len(actions)print(count)actions.clear()town_fence_df[town_fence_df['flag'] == False].to_csv('../data/town_errors.csv', sep='@', encoding='UTF-8', index=False)es_client.close()
3. 数据查询
欢迎关注公众号 算法小生