实习第一周——UpYunUprSync——Sqlite版本
来源:互联网 发布:淘宝自动发卡 编辑:程序博客网 时间:2024/06/01 10:26
先用Sqlite做一个版本,Sqlite是一个关系型的嵌入式数据库,可以支持复杂的数据库逻辑,但是,相对的效率也不是很nice,而这里要用到的数据逻辑并不复杂,因此可以使用KV数据库,改进版本是使用了Redies 这一NoSql数据库,但Redies需要在使用机上开启监听端口,因此也不是很适合,最后改用了LevelDB,效率较Sqlite提高了20倍左右。
__author__ = 'glcsnz123'# -*- coding: utf-8 -*-import time, os, datetimeimport thread, sys, signalimport sqlite3import logging, atexitimport upyunimport Queueclass EmptyLogger: def error(self, st): pass def debug(self, st): pass def info(self, st): pass def warning(self, st): pass#------------LOG CONFIG-----------------LOGGER = EmptyLogger()__LOGLEVEL = logging.INFOLOGFILE = "/tmp/UpYunSync.log"#--------------------------------------def InitLogger(): global LOGGER, LOGFILE, __LOGLEVEL if __LOGLEVEL == logging.CRITICAL or not os.access(os.path.dirname(LOGFILE), os.W_OK): LOGGER = EmptyLogger() return LOGGER = logging.getLogger() hdlr = logging.FileHandler(LOGFILE) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) LOGGER.addHandler(hdlr) LOGGER.setLevel(__LOGLEVEL)def SetLoggerLevel(loglevel=""):#DEBUG < INFO < WARNING < ERROR < OFF global __LOGLEVEL if loglevel.upper() == "INFO": LOGLEVEL = logging.INFO elif loglevel.upper() == "WARNING": LOGLEVEL = logging.WARNING elif loglevel.upper() == "ERROR": LOGLEVEL = logging.ERROR elif loglevel.upper() == "DEBUG": LOGLEVEL = logging.DEBUG elif loglevel.upper() == "OFF": LOGLEVEL = logging.CRITICAL else: LOGLEVEL = logging.NOTSETdef getLastModifyTime(pathname):#获取最后一次修改时间 if os.path.isfile(pathname) or os.path.isdir(pathname): #如果存在该文件的话 return (datetime.datetime.fromtimestamp(os.path.getmtime(pathname))).strftime("%F %X") return (datetime.datetime.fromtimestamp(0)).strftime("%F %X")class UpYunUprSync: def __init__(self, BUCKETNAME='uprsyncx', USERNAME='admin', PASSWORD='adminadmin', SRC_DIR="/home/glcsnz123/Django-1.5.4", DST_DIR="/zlssasj", DBFILENAME=".UpYunSqlite.db"): if SRC_DIR.endswith(os.sep): SRC_DIR = SRC_DIR[0:-1:1] if DST_DIR.endswith(os.sep): DST_DIR = DST_DIR[0:-1:1] self.__BUCKETNAME = BUCKETNAME self.__USERNAME = USERNAME self.__PASSWORD = PASSWORD self.__SRC_DIR = SRC_DIR self.__DST_DIR = DST_DIR self.__HEADERS = {"x-gmkerl-rotate": "180"} self.__DBFILENAME = DBFILENAME self.__WORKER_LIMIT = 10 self.__ErrorFileList = [] self.__mkdirList = [] # 初始化sqlites self.__SQLites = UpYunSqlite(self.__SRC_DIR, self.__DBFILENAME) # daemon 配置信息 self.pidfile = "/tmp/uprsync.pid" self.stdout = "/dev/null" self.stderr = "/dev/null" def setThreadNumLimit(self, T_Limit): self.__WORKER_LIMIT = max(1, T_Limit) def __getFileList(self, fpath): start = datetime.datetime.now() if fpath.endswith(os.sep): pathStack = [fpath[0:-1:1]] else: pathStack = [fpath] self.__DFS_FINISHED = False self.__JobFileList = Queue.Queue(100000) #用来存放需要上传的文件的路径 self.__JobPathList = Queue.Queue() #用来存放需要创建的目录 dirList = [] #初始化目录 try: tmpList = os.listdir(fpath) except OSError, e: print "[ERROR]Permission Denied!\n " + fpath, "\n" LOGGER.error("[ERROR]Permission Denied!\n " + fpath + "\n\n") self.__DFS_FINISHED = True return if self.__DST_DIR != "": self.__CilentUp.mkdir(self.__DST_DIR) currentDir = os.sep.join(pathStack) SqlPathList = self.__SQLites.getPathFromSQL(currentDir) SqlFileList = self.__SQLites.getFileFromSQL(currentDir) for filename in tmpList: fullname = os.sep.join(pathStack) + os.sep + filename if os.path.isdir(fullname): if SqlPathList.has_key(filename) == False: self.__JobPathList.put(fullname) dirList.append(filename) elif os.path.islink(fullname): print "[WARNING]file:", fullname, "is a symbol link file\n" LOGGER.warning(fullname.join(["[WARNING]file: ", " is a symbol link file\n\n"])) elif filename == self.__DBFILENAME:#数据库文件,不做任何处理! continue else: if SqlFileList.has_key(filename) == False: res = 1 elif SqlFileList[filename] == getLastModifyTime(fullname): print "[DEBUG]file:", fullname, " is not modified\n" LOGGER.debug(fullname.join(["[DEBUG]file: ", " is not modified\n\n"])) continue else: res = 0 self.__JobFileList.put((fullname, res), block=True) while dirList.__len__() > 0: if dirList[-1] == "": pathStack.pop() dirList.pop() continue try: tmpList = os.listdir(os.sep.join(pathStack) + os.sep + dirList[-1]) except: print "[ERROR]Permission Denied!\n" + os.sep.join(pathStack) + os.sep + dirList[-1] + "\n" LOGGER.error("[ERROR]Permission Denied!\n" + os.sep.join(pathStack) + os.sep + dirList[-1] + "\n\n") continue pathStack.append(dirList[-1]) dirList.pop() dirList.append("") currentDir = os.sep.join(pathStack) SqlPathList = self.__SQLites.getPathFromSQL(currentDir) SqlFileList = self.__SQLites.getFileFromSQL(currentDir) for filename in tmpList: fullname = os.sep.join([currentDir, filename]) if os.path.isdir(fullname): if SqlPathList.has_key(filename) == False: self.__JobPathList.put(fullname) dirList.append(filename) elif os.path.islink(fullname): print "[WARNING]file:", fullname, " is a symbol link file!\n" LOGGER.warning(fullname.join(["[WARNING]file: ", " is a symbol link file\n\n"])) else: if SqlFileList.has_key(filename) == False: res = 1 elif SqlFileList[filename] == getLastModifyTime(fullname): print "[DEBUG]file:", fullname, " is not modified\n" LOGGER.debug(fullname.join(["[DEBUG]file: ", " is not modified\n\n"])) continue else: res = 0 self.__JobFileList.put((fullname, res), block=True) #此处代表已经完成了对目录的遍历工作,标记 self.__DFS_FINISHED = True print "[INFO] Finish the dfs after", (datetime.datetime.now() - start).seconds, "s\n" LOGGER.info("[INFO] Finish the dfs after " + (datetime.datetime.now() - start).seconds.__str__() + " s\n\n") def __FileSync(self, fpath): try: self.__CilentUp.put("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]), open(fpath).read()) print "".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " oked\n" LOGGER.debug("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " oked\n\n") return True except upyun.UpYunClientException as ce: self.__ErrorFileList.append(fpath) print "".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed!\n" print "Except an UpYunClientException ..." print "Error Message: " + ce.msg + "\n" LOGGER.error("\n".join(["".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed!\n", "Except an UpYunClientException ...", "Error Message: " + ce.msg + "\n\n"])) return False except upyun.UpYunServiceException as se: self.__ErrorFileList.append(fpath) print "".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed\n" print "Except an UpYunServiceException ..." print "HTTP Status Code: " + str(se.status) print "Error Message: " + se.msg + "\n" LOGGER.error("\n".join(["".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed\n", "Except an UpYunServiceException ...\nHTTP Status Code: " + str(se.status), "Error Message: " + se.msg + "\n\n"])) if se.err: print se.err LOGGER.error(se.err + "\n\n") return False def __Worker(self,pid): try: waiting = 1 while True: try: if self.__DFS_FINISHED == False: self.__mkdirList[pid] = self.__JobPathList.get(block=True, timeout=5) else: self.__mkdirList[pid] = self.__JobPathList.get(block=False) fpath = self.__mkdirList[pid] while True: flag = 0 for i in range(self.__WORKER_LIMIT): if i != pid and self.__mkdirList[i] != "" and fpath.startswith(self.__mkdirList[i]): flag = 1 break if flag == 0: break else: time.sleep(2) self.__CilentUp.mkdir("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]])) self.__mkdirList[pid] = "" self.__SQLites.insertPathToSQL(fpath) continue except Queue.Empty, efr: pass except Exception,ex: pass try: fpath = self.__JobFileList.get(block=True, timeout=waiting) except Exception, e: if self.__DFS_FINISHED: return else: waiting = min(waiting * 2, 30) continue waiting = max(waiting/2, 1) if os.access(fpath[0],os.R_OK) == False: self.__ErrorFileList.append(fpath[0]) print fpath[0].join(["[ERROR] "," Permission Denied!Need Read access\n\n"]) LOGGER.error(fpath[0].join(["[ERROR] "," Permission Denied!Need Read access\n\n"])) continue res = self.__FileSync(fpath[0]) #if sync success, update the datebase. if res == True: if fpath[1] == 1: self.__SQLites.insertFileToSQL(fpath[0]) elif fpath[1] == 0: self.__SQLites.updateFileToSQL(fpath[0]) else: print "==============ERROR===================" except Exception, e: print "[WARNING] a thread died.", e, "\n" LOGGER.warning("[WARNING] a thread died." + e.__str__() + "\n\n") finally: self.__WORKER_NOW -= 1 self.__mkdirList[pid]="" def __InitLogIn(self): self.__CilentUp = upyun.UpYun(self.__BUCKETNAME, self.__USERNAME, self.__PASSWORD, timeout=30, endpoint=upyun.ED_AUTO) def runMultiThreadSync(self): start = datetime.datetime.now() self.__WORKER_NOW = self.__WORKER_LIMIT self.__mkdirList = [""] * self.__WORKER_LIMIT self.__InitLogIn() thread.start_new_thread(self.__getFileList,(self.__SRC_DIR,)) time.sleep(3) for i in range(self.__WORKER_LIMIT): thread.start_new_thread(self.__Worker, (i,)) while self.__WORKER_NOW > 0: while self.__WORKER_NOW < self.__WORKER_LIMIT and (self.__JobPathList.qsize()>0 or self.__JobFileList.qsize()>0): thread.start_new_thread(self.__Worker, ()) self.__WORKER_NOW += 1 self.__mkdirList.append("") print "[INFO] Create a new Thread! \n" LOGGER.info("[INFO] Create a new Thread! \n\n") # time.sleep(20) for i in range(4): print self.__JobFileList.qsize() , self.__JobPathList.qsize() LOGGER.debug("[INFO]" + str(self.__JobFileList.qsize()) + " files are found and waiting for sync.") time.sleep(5) self.RollBack() print "[INFO]Finish uprsync after " + (datetime.datetime.now() - start).seconds.__str__() + " s\n" LOGGER.info("[INFO]Finish uprsync after " + (datetime.datetime.now() - start).seconds.__str__() + " s\n\n") def RollBack(self): for fullname in self.__ErrorFileList: print fullname.join(["[WARNING] ", " is rolling back!"]) LOGGER.warning(fullname.join(["[WARNING] ", " is rolling back!"])) pathname = os.path.dirname(fullname)[self.__SRC_DIR.__len__():] while os.path.basename(pathname) != "": self.__SQLites.rollBackPathToSQL(pathname) pathname = os.path.dirname(pathname) #Daemon ------------------------------------------ def __daemonize(self): try: pid = os.fork() if pid > 0: sys.exit(0) except OSError, ose: sys.stderr.write("[Daemon ERROR]fork #1 failed: %d (%s)\n" % (ose.errno, ose.strerror)) LOGGER.error("[Daemon ERROR]fork #1 failed: %d (%s)\n\n" % (ose.errno, ose.strerror)) sys.exit(1) #脱离终端 os.setsid() #修改当前工作目录 os.chdir("/") #重设文件创建权限 os.umask(0) #第二次fork,禁止进程重新打开控制终端 try: pid = os.fork() if pid > 0: sys.exit(0) except OSError, e: sys.stderr.write("[Daemon ERROR]fork #2 failed: %d (%s)\n" % (e.errno, e.strerror)) LOGGER.error("[Daemon ERROR]fork #2 failed: %d (%s)\n\n" % (e.errno, e.strerror)) sys.exit(1) sys.stdout.flush() sys.stderr.flush() so = file(self.stdout, 'a+') se = file(self.stderr, 'a+', 0) #重定向标准输出/错误 os.dup2(so.fileno(), sys.stdout.fileno()) os.dup2(se.fileno(), sys.stderr.fileno()) #注册程序退出时的函数,即删掉pid文件 atexit.register(self.__delpid) pid = str(os.getpid()) file(self.pidfile, 'w+').write("%s\n" % pid) def __delpid(self): os.remove(self.pidfile) def start(self): """ Start the daemon """ print "[Deamon DEBUG]start in Daemon\n" LOGGER.debug("[Daemon DEBUG]start in Daemon\n\n") # Check for a pidfile to see if the daemon already runs try: pf = file(self.pidfile, 'r') pid = int(pf.read().strip()) pf.close() except IOError: pid = None if pid: message = "pidfile %s already exist. Daemon already running?\n\n" sys.stderr.write(message % self.pidfile) LOGGER.error("[Daemon ERROR]pidfile %s already exist. Daemon already running?\n\n" % self.pidfile) sys.exit(1) # Start the daemon self.__daemonize() self.__run() def stop(self): """ Stop the daemon """ # Get the pid from the pidfile print "[Deamon DEBUG]stop in Daemon\n" LOGGER.debug("[Daemon DEBUG]stop in Daemon\n\n") try: pf = file(self.pidfile, 'r') pid = int(pf.read().strip()) pf.close() except IOError: pid = None if not pid: message = "pidfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pidfile) LOGGER.error("[Daemon ERROR]pidfile %s does not exist. Daemon not running?\n\n" % self.pidfile) return # not an error in a restart # Try killing the daemon process try: while True: os.kill(pid, signal.SIGTERM) time.sleep(0.1) except OSError, err: err = str(err) if err.find("No such process") > 0: if os.path.exists(self.pidfile): os.remove(self.pidfile) else: print str(err) sys.exit(1) def restart(self): """ Restart the daemon """ self.stop() self.start() def __run(self): self.runMultiThreadSync()#---------------------------- DB ----------------------------------------class UpYunSqlite: """ 元数据的操作 """ def __init__(self, SRC_DIR="/home/glcsnz123", DBFILENAME="example.db"): self.SRC_DIR = SRC_DIR self.DBFILENAME = DBFILENAME self.__InitConnect() self.lockSql = thread.allocate_lock() def __InitDBFile(self): if not os.access(self.SRC_DIR, os.W_OK): print "[ERROR]No write access in current directory" LOGGER.error("[ERROR]No write access in current directory") sys.exit("403") conn = sqlite3.connect(os.sep.join([self.SRC_DIR, self.DBFILENAME])) cur = conn.cursor() cur.execute("CREATE TABLE FileModify(id INTEGER PRIMARY KEY AUTOINCREMENT, filename VARCHAR(256),\ pathname VARCHAR(256),last_modify DATE)") cur.execute("CREATE TABLE PathModify(id INTEGER PRIMARY KEY AUTOINCREMENT, pathname VARCHAR(256),\ fatherpath VARCHAR(256),last_modify DATE)") cur.close() conn.close() def __InitConnect(self): if os.path.isfile(os.sep.join([self.SRC_DIR, self.DBFILENAME])) == False: self.__InitDBFile() self.CONN = sqlite3.connect(os.sep.join([self.SRC_DIR, self.DBFILENAME]), check_same_thread=False) self.CUR = self.CONN.cursor() def getPathFromSQL(self, fapath): query = "select pathname,last_modify from PathModify where fatherpath='%s'" % ( self.__rpQuota(fapath[self.SRC_DIR.__len__():])) try: self.lockSql.acquire() self.CUR.execute(query) res = self.CUR.fetchall() finally: self.lockSql.release() resObj = {} for i in range(res.__len__()): resObj[res[i][0]] = res[i][1] return resObj def getFileFromSQL(self, fapath): query = "select filename,last_modify from FileModify where pathname = '%s'" % ( self.__rpQuota(fapath[self.SRC_DIR.__len__():])) try: self.lockSql.acquire() self.CUR.execute(query) res = self.CUR.fetchall() finally: self.lockSql.release() resObj = {} for i in range(res.__len__()): resObj[res[i][0]] = res[i][1] return resObj def updateFileToSQL(self, fpath): query = r"update FileModify SET last_modify = '%s' where filename = '%s' AND pathname = '%s'" % ( getLastModifyTime(fpath), self.__rpQuota(os.path.basename(fpath)), self.__rpQuota(os.path.dirname(fpath)[self.SRC_DIR.__len__():])) try: self.lockSql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError, soe: print "[Error] syntax error!\nError Query: " + query, "\nError Path: " + fpath + '\n' LOGGER.error("[Error] syntax error!\nError Query: " + query + "\nError Path: " + fpath + '\n\n') finally: self.lockSql.release() def insertFileToSQL(self, fpath): query = r"insert into FileModify(filename,pathname,last_modify) values('%s','%s','%s')" % ( self.__rpQuota(os.path.basename(fpath)), self.__rpQuota(os.path.dirname(fpath)[self.SRC_DIR.__len__():]), getLastModifyTime(fpath)) try: self.lockSql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError, soe: print "[Error] syntax error!\nError Query: " + query, "\nError Path: " + fpath + '\n' LOGGER.error("[Error] syntax error!\nError Query: " + query + "\nError Path: " + fpath + '\n\n') finally: self.lockSql.release() def updatePathToSQL(self, pathname): query = r"update PathModify SET last_modify = '%s' where pathname = '%s' and fatherpath='%s'" % ( getLastModifyTime(pathname), self.__rpQuota(os.path.basename(pathname)), self.__rpQuota(os.path.dirname(pathname)[self.SRC_DIR.__len__():])) try: self.lockSql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError, soe: print "[Error] syntax error!\n Error Query: " + query, "\nError Path: " + pathname + "\n" LOGGER.error("[Error] syntax error!\n Error Query: " + query + "\nError Path: " + pathname + "\n\n") finally: self.lockSql.release() def rollBackPathToSQL(self, pathname): query = r"update PathModify SET last_modify = '%s' where pathname = '%s' and fatherpath='%s'" % ( getLastModifyTime(""), self.__rpQuota(os.path.basename(pathname)), self.__rpQuota(os.path.dirname(pathname))) try: self.lockSql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError, soe: print "[Error] syntax error!\n Error Query: " + query, "\nError Path: " + pathname + "\n" LOGGER.error("[Error] syntax error!\n Error Query: " + query + "\nError Path: " + pathname + "\n\n") finally: self.lockSql.release() def insertPathToSQL(self, pathname): query = r"insert into PathModify(pathname,fatherpath,last_modify) values('%s','%s','%s')" % ( self.__rpQuota(os.path.basename(pathname)), self.__rpQuota(os.path.dirname(pathname)[self.SRC_DIR.__len__():]), getLastModifyTime(pathname)) try: self.lockSql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError, soe: print "[Error] syntax error!\n Error Query: " + query, "\nError Path: " + pathname + "\n" LOGGER.error("[Error] syntax error!\n Error Query: " + query + "\nError Path: " + pathname + "\n\n") finally: self.lockSql.release() def __rpQuota(self, st): return st.replace("'", r"//")if __name__ == "__main__": InitLogger() ups = UpYunUprSync() ups.setThreadNumLimit(20) ups.runMultiThreadSync()################################ 问题主要出在了数据库的询问上,所以必须减少数据库的访问# getFileList函数主要的花费是在mkdir和数据库操作上。,数据库操作占了50%的花费# mkdir大概花了25%的时间代价#
ReadMe.txt基本函数接口初始化日志: InitLogger() 默认日志等级为INFO 设置日志等级 SetLoggerLevel(log_level) 参数log_level 为日志的等级 可选日志等级有(不区分大小写): "DEBUG" "INFO" "WARNING" "ERROR" "OFF" 其中OFF表示关闭日志功能,另外,如果未初始化日志,日志功能默认是关闭的。 日志的存放地址默认为/tmp/UpYunSync.log。同时,也可通过以下方法来设定日志文件地址: UpYunrSync.LOGFILE = LOG_FILE 其中参数LOG_FILE 为日志文件的地址初始化UpYunrSync import UpYunrSync ups = UpYunrSync.UpYunUprSync(BUCKETNAME, USERNAME, PASSWORD, SRC_DIR,DST_DIR, DBFILENAME) 其中参数 bucket 为空间名称,username 和 password 分别为授权操作员帐号和密码,SRC_DIR和DST_DIR分别为需要同步的本地目录和服务器目录,必选。 参数DBFILENAME为存储本地文件元数据的sqlite文件,默认值为.UpYunSqlite.db设置线程开启个数 ups.setThreadNumLimit(Thread_Num) 参数Thread_Num为上传文件的线程个数,线程个数并不是越多越好,应当根据所要上传的目录中文件个数以及大小来确定。同步目录 终端直接同步文件形式 ups.runMultiThreadSync() 执行文件同步的操作 后台守护进程同步文件形式 ups.start() ups.stop() ups.restart() 三个方法分别是启动进程、停止进程和重启进程。 后台进程模式下,分别设置程序标准输出、错误输出和PID文件位置的方法如下: ups.stdout = Stdout_File ups.stderr = Stderr_File ups.pidfile = Pid_File 其中,参数Stdout_File 和 Stderr_File 分别是标准输出和错误输出的文件地址,默认值为/dev/null。Pid_File为进程的PID文件位置,默认值为/tmp/uprsync.pid
- 实习第一周——UpYunUprSync——Sqlite版本
- 第一周——《一周问题集》
- 第八天——实习
- Android笔记—第一周
- 第一周—*写名字
- 第一周—长方形面积
- 第一周实习记
- 实习第一周
- HSGF实习第一周
- PRT实习第一周
- 实习的第一周
- 实习第一周
- 实习第一周小记
- ICT实习第一周
- 第一周 实习总结
- 公司实习第一周
- 实习第一周
- 实习第一周记录
- MVC4 的ViewBeg和ViewData的简单实用方法
- 视频帧率和分辨率对QoE的影响
- github帐号的朋友,有30元任务等你来拿
- python startswith and endswith
- CMD更改IP设置
- 实习第一周——UpYunUprSync——Sqlite版本
- sql server 2008远程连接配置
- base64编码是怎么工作的?
- php 数组的小函数之想
- WINNT & WIN2K下实现进程的完全隐藏——宿主程序
- 如何让程序自动管理线程
- Lua学习笔记(九)
- Ural 1043
- android4.0 中设置网络出错com.android.settings.WirelessSettings没有在AndroidManifest.xml中声明