Elasticsearch数据导入

来源:互联网 发布:胡门网络胡建伟 编辑:程序博客网 时间:2024/06/05 07:57
import linecache
from pyes import ES

ES_SERVER = "47.92.71.18:9200" //服务器地址
ES_INDEX = "cellinfo" //数据库实例
ES_TYPE = "cell" //表名
CELL_FILE = "cellinfo_v2_19(19).txt" //插入文件
BULK_SIZE = 1000 //1000条执行一次


def create_document(line):
# Delete new line '\n' notation
line = line.strip('\n')
# split line into list
data = line.split('\t')
# Compose return dict
ret_data = {
"mcc": data[0],
"mnc": data[1],
"lac": data[2],
"ci": data[3],
"location": {
"lat": round(float(data[4]), 8),
"lon": round(float(data[5]), 8),
},
"acc": data[6],
"date": data[7],
"validity": data[8],
"addr": data[9],
"province": data[10],
"city": data[11],
"district": data[12],
"township": data[13],
}
return ret_data


def main():
es_conn = ES(ES_SERVER, timeout=20.0, bulk_size=BULK_SIZE)
error_index = 0
try:
for i in range(1, 6000000):
current_line = linecache.getline(CELL_FILE, i)
es_conn.index(create_document(current_line), ES_INDEX, ES_TYPE, bulk=True)
if i % BULK_SIZE == 0:
print('%d' % i)
error_index = i
except IndexError:
es_conn.force_bulk()
print("end!")
except:
print("error at %d" % error_index)


if __name__ == '__main__':
main()
原创粉丝点击