python 替换数据库中文本中特殊字符

来源:互联网 发布:python 2.7.11.tgz 编辑:程序博客网 时间:2024/05/01 10:53

遇见这个一个需求:几百万的数据,需要替换掉不同种类的敏感语句,需要在测试库中进行替换,成功后再替换生产库。

难点1:考虑即适用开发环境,又适用生产环境,考虑使用python(简洁,方便)

难点2:数据量太大;解决办法:查询时使用elasticsearch

难点3:替换种类多;解决办法:进行一定的封装

写的不算细致,可以替换部分规则的敏感语句。代码显示没有格式,需要自行 Reformat Code

具体代码如下:

# -*- coding: UTF-8 -*-

from elasticsearch import Elasticsearch

import pymysql

import re

import time

# Elasticsearch查找query匹配的结果

def getElasticsearchList(query_string=''):

es = Elasticsearch(

[

'http://192.168.1.166:9200',

]

)

res = es.search(index="test-questions", body={"query": {

"bool": {"must": [{"query_string": {"default_field": "test-question.content", "query": query_string}}],

"must_not": [],

"should": []}}, "from": 0, "size": 2})

print("Got %d Hits:" % res['hits']['total'])

# 查找匹配的字符 ,并以字典形式{id:content}返回

dict = {}

for hit in res['hits']['hits']:

dict[hit["_source"]['id']] = hit["_source"]['content']

return dict

#替换符合规则的字符

def deleteString(dict):

result = {}

for key in dict:

reg = r'alt=".*!"' #替换'alt'标签

strre = re.compile(reg)

str = re.sub(strre, ' ', dict[key])

result[key] = str

return result

#更新操作

conn = pymysql.connect(host='192.168.1.225', user='admin', passwd='neptune@admin', db='teachresource', port=3306,

charset='utf8')

cur = conn.cursor()

resultList = deleteString(dict)

for key in resultList:

cmd = "update sh_testquestion_bak set updateTime=%s,content =%s where id=%s"

cur.execute(

cmd, (int(time.time()) * 1000, resultList[key], key))

conn.commit()

conn.close()

 

0 0