实习第一周——UpYunUprSync——Sqlite版本

来源:互联网 发布:淘宝自动发卡 编辑:程序博客网 时间:2024/06/01 10:26

先用Sqlite做一个版本,Sqlite是一个关系型的嵌入式数据库,可以支持复杂的数据库逻辑,但是,相对的效率也不是很nice,而这里要用到的数据逻辑并不复杂,因此可以使用KV数据库,改进版本是使用了Redies 这一NoSql数据库,但Redies需要在使用机上开启监听端口,因此也不是很适合,最后改用了LevelDB,效率较Sqlite提高了20倍左右。

__author__ = 'glcsnz123'# -*- coding: utf-8 -*-import time, os, datetimeimport thread, sys, signalimport sqlite3import logging, atexitimport upyunimport Queueclass EmptyLogger:    def error(self, st):        pass    def debug(self, st):        pass    def info(self, st):        pass    def warning(self, st):        pass#------------LOG CONFIG-----------------LOGGER = EmptyLogger()__LOGLEVEL = logging.INFOLOGFILE = "/tmp/UpYunSync.log"#--------------------------------------def InitLogger():    global LOGGER, LOGFILE, __LOGLEVEL    if __LOGLEVEL == logging.CRITICAL or not os.access(os.path.dirname(LOGFILE), os.W_OK):        LOGGER = EmptyLogger()        return    LOGGER = logging.getLogger()    hdlr = logging.FileHandler(LOGFILE)    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')    hdlr.setFormatter(formatter)    LOGGER.addHandler(hdlr)    LOGGER.setLevel(__LOGLEVEL)def SetLoggerLevel(loglevel=""):#DEBUG < INFO < WARNING < ERROR < OFF    global __LOGLEVEL    if loglevel.upper() == "INFO":        LOGLEVEL = logging.INFO    elif loglevel.upper() == "WARNING":        LOGLEVEL = logging.WARNING    elif loglevel.upper() == "ERROR":        LOGLEVEL = logging.ERROR    elif loglevel.upper() == "DEBUG":        LOGLEVEL = logging.DEBUG    elif loglevel.upper() == "OFF":        LOGLEVEL = logging.CRITICAL    else:        LOGLEVEL = logging.NOTSETdef getLastModifyTime(pathname):#获取最后一次修改时间    if os.path.isfile(pathname) or os.path.isdir(pathname): #如果存在该文件的话        return (datetime.datetime.fromtimestamp(os.path.getmtime(pathname))).strftime("%F %X")    return (datetime.datetime.fromtimestamp(0)).strftime("%F %X")class UpYunUprSync:    def __init__(self, BUCKETNAME='uprsyncx', USERNAME='admin', PASSWORD='adminadmin',                 SRC_DIR="/home/glcsnz123/Django-1.5.4",                 DST_DIR="/zlssasj", DBFILENAME=".UpYunSqlite.db"):        if SRC_DIR.endswith(os.sep):            SRC_DIR = SRC_DIR[0:-1:1]        if DST_DIR.endswith(os.sep):            DST_DIR = DST_DIR[0:-1:1]        self.__BUCKETNAME = BUCKETNAME        self.__USERNAME = USERNAME        self.__PASSWORD = PASSWORD        self.__SRC_DIR = SRC_DIR        self.__DST_DIR = DST_DIR        self.__HEADERS = {"x-gmkerl-rotate": "180"}        self.__DBFILENAME = DBFILENAME        self.__WORKER_LIMIT = 10        self.__ErrorFileList = []        self.__mkdirList = []        # 初始化sqlites        self.__SQLites = UpYunSqlite(self.__SRC_DIR, self.__DBFILENAME)        # daemon 配置信息        self.pidfile = "/tmp/uprsync.pid"        self.stdout = "/dev/null"        self.stderr = "/dev/null"    def setThreadNumLimit(self, T_Limit):        self.__WORKER_LIMIT = max(1, T_Limit)    def __getFileList(self, fpath):        start = datetime.datetime.now()        if fpath.endswith(os.sep):            pathStack = [fpath[0:-1:1]]        else:            pathStack = [fpath]        self.__DFS_FINISHED = False        self.__JobFileList = Queue.Queue(100000)  #用来存放需要上传的文件的路径        self.__JobPathList = Queue.Queue()  #用来存放需要创建的目录        dirList = []        #初始化目录        try:            tmpList = os.listdir(fpath)        except OSError, e:            print "[ERROR]Permission Denied!\n " + fpath, "\n"            LOGGER.error("[ERROR]Permission Denied!\n " + fpath + "\n\n")            self.__DFS_FINISHED = True            return        if self.__DST_DIR != "":            self.__CilentUp.mkdir(self.__DST_DIR)        currentDir = os.sep.join(pathStack)        SqlPathList = self.__SQLites.getPathFromSQL(currentDir)        SqlFileList = self.__SQLites.getFileFromSQL(currentDir)        for filename in tmpList:            fullname = os.sep.join(pathStack) + os.sep + filename            if os.path.isdir(fullname):                if SqlPathList.has_key(filename) == False:                    self.__JobPathList.put(fullname)                dirList.append(filename)            elif os.path.islink(fullname):                print "[WARNING]file:", fullname, "is a symbol link file\n"                LOGGER.warning(fullname.join(["[WARNING]file: ", " is a symbol link file\n\n"]))            elif filename == self.__DBFILENAME:#数据库文件,不做任何处理!                continue            else:                if SqlFileList.has_key(filename) == False:                    res = 1                elif SqlFileList[filename] == getLastModifyTime(fullname):                    print "[DEBUG]file:", fullname, " is not modified\n"                    LOGGER.debug(fullname.join(["[DEBUG]file: ", " is not modified\n\n"]))                    continue                else:                    res = 0                self.__JobFileList.put((fullname, res), block=True)        while dirList.__len__() > 0:            if dirList[-1] == "":                pathStack.pop()                dirList.pop()                continue            try:                tmpList = os.listdir(os.sep.join(pathStack) + os.sep + dirList[-1])            except:                print "[ERROR]Permission Denied!\n" + os.sep.join(pathStack) + os.sep + dirList[-1] + "\n"                LOGGER.error("[ERROR]Permission Denied!\n" + os.sep.join(pathStack) + os.sep + dirList[-1] + "\n\n")                continue            pathStack.append(dirList[-1])            dirList.pop()            dirList.append("")            currentDir = os.sep.join(pathStack)            SqlPathList = self.__SQLites.getPathFromSQL(currentDir)            SqlFileList = self.__SQLites.getFileFromSQL(currentDir)            for filename in tmpList:                fullname = os.sep.join([currentDir, filename])                if os.path.isdir(fullname):                    if SqlPathList.has_key(filename) == False:                        self.__JobPathList.put(fullname)                    dirList.append(filename)                elif os.path.islink(fullname):                    print "[WARNING]file:", fullname, " is a symbol link file!\n"                    LOGGER.warning(fullname.join(["[WARNING]file: ", " is a symbol link file\n\n"]))                else:                    if SqlFileList.has_key(filename) == False:                        res = 1                    elif SqlFileList[filename] == getLastModifyTime(fullname):                        print "[DEBUG]file:", fullname, " is not modified\n"                        LOGGER.debug(fullname.join(["[DEBUG]file: ", " is not modified\n\n"]))                        continue                    else:                        res = 0                    self.__JobFileList.put((fullname, res), block=True)                    #此处代表已经完成了对目录的遍历工作,标记        self.__DFS_FINISHED = True        print "[INFO] Finish the dfs after", (datetime.datetime.now() - start).seconds, "s\n"        LOGGER.info("[INFO] Finish the dfs after " + (datetime.datetime.now() - start).seconds.__str__() + " s\n\n")    def __FileSync(self, fpath):        try:            self.__CilentUp.put("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]), open(fpath).read())            print "".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " oked\n"            LOGGER.debug("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " oked\n\n")            return True        except upyun.UpYunClientException as ce:            self.__ErrorFileList.append(fpath)            print "".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed!\n"            print "Except an UpYunClientException ..."            print "Error Message: " + ce.msg + "\n"            LOGGER.error("\n".join(["".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed!\n",                                    "Except an UpYunClientException ...", "Error Message: " + ce.msg + "\n\n"]))            return False        except upyun.UpYunServiceException as se:            self.__ErrorFileList.append(fpath)            print "".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed\n"            print "Except an UpYunServiceException ..."            print "HTTP Status Code: " + str(se.status)            print "Error Message:    " + se.msg + "\n"            LOGGER.error("\n".join(["".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]) + " failed\n",                                    "Except an UpYunServiceException ...\nHTTP Status Code: " + str(se.status),                                    "Error Message:    " + se.msg + "\n\n"]))            if se.err:                print se.err                LOGGER.error(se.err + "\n\n")            return False    def __Worker(self,pid):        try:            waiting = 1            while True:                try:                    if self.__DFS_FINISHED == False:                        self.__mkdirList[pid] = self.__JobPathList.get(block=True, timeout=5)                    else:                        self.__mkdirList[pid] = self.__JobPathList.get(block=False)                    fpath = self.__mkdirList[pid]                    while True:                        flag = 0                        for i in range(self.__WORKER_LIMIT):                            if i != pid and self.__mkdirList[i] != "" and fpath.startswith(self.__mkdirList[i]):                                flag = 1                                break                        if flag == 0:                            break                        else:                            time.sleep(2)                    self.__CilentUp.mkdir("".join([self.__DST_DIR, fpath[self.__SRC_DIR.__len__():]]))                    self.__mkdirList[pid] = ""                    self.__SQLites.insertPathToSQL(fpath)                    continue                except Queue.Empty, efr:                    pass                except Exception,ex:                    pass                try:                    fpath = self.__JobFileList.get(block=True, timeout=waiting)                except Exception, e:                    if self.__DFS_FINISHED:                        return                    else:                        waiting = min(waiting * 2, 30)                        continue                waiting = max(waiting/2, 1)                if os.access(fpath[0],os.R_OK) == False:                    self.__ErrorFileList.append(fpath[0])                    print fpath[0].join(["[ERROR] "," Permission Denied!Need Read access\n\n"])                    LOGGER.error(fpath[0].join(["[ERROR] "," Permission Denied!Need Read access\n\n"]))                    continue                res = self.__FileSync(fpath[0])                #if sync success, update the datebase.                if res == True:                    if fpath[1] == 1:                        self.__SQLites.insertFileToSQL(fpath[0])                    elif fpath[1] == 0:                        self.__SQLites.updateFileToSQL(fpath[0])                    else:                        print "==============ERROR==================="        except Exception, e:            print "[WARNING] a thread died.", e, "\n"            LOGGER.warning("[WARNING] a thread died." + e.__str__() + "\n\n")        finally:            self.__WORKER_NOW -= 1            self.__mkdirList[pid]=""    def __InitLogIn(self):        self.__CilentUp = upyun.UpYun(self.__BUCKETNAME, self.__USERNAME, self.__PASSWORD, timeout=30,                                      endpoint=upyun.ED_AUTO)    def runMultiThreadSync(self):        start = datetime.datetime.now()        self.__WORKER_NOW = self.__WORKER_LIMIT        self.__mkdirList = [""] * self.__WORKER_LIMIT        self.__InitLogIn()        thread.start_new_thread(self.__getFileList,(self.__SRC_DIR,))        time.sleep(3)        for i in range(self.__WORKER_LIMIT):            thread.start_new_thread(self.__Worker, (i,))        while self.__WORKER_NOW > 0:            while self.__WORKER_NOW < self.__WORKER_LIMIT and (self.__JobPathList.qsize()>0 or self.__JobFileList.qsize()>0):                thread.start_new_thread(self.__Worker, ())                self.__WORKER_NOW += 1                self.__mkdirList.append("")                print "[INFO] Create a new Thread! \n"                LOGGER.info("[INFO] Create a new Thread! \n\n")                #    time.sleep(20)            for i in range(4):                print self.__JobFileList.qsize() , self.__JobPathList.qsize()                LOGGER.debug("[INFO]" + str(self.__JobFileList.qsize()) + " files are found and waiting for sync.")                time.sleep(5)        self.RollBack()        print "[INFO]Finish uprsync after " + (datetime.datetime.now() - start).seconds.__str__() + " s\n"        LOGGER.info("[INFO]Finish uprsync after " + (datetime.datetime.now() - start).seconds.__str__() + " s\n\n")    def RollBack(self):        for fullname in self.__ErrorFileList:            print fullname.join(["[WARNING] ", " is rolling back!"])            LOGGER.warning(fullname.join(["[WARNING] ", " is rolling back!"]))            pathname = os.path.dirname(fullname)[self.__SRC_DIR.__len__():]            while os.path.basename(pathname) != "":                self.__SQLites.rollBackPathToSQL(pathname)                pathname = os.path.dirname(pathname)    #Daemon ------------------------------------------    def __daemonize(self):        try:            pid = os.fork()            if pid > 0:                sys.exit(0)        except OSError, ose:            sys.stderr.write("[Daemon ERROR]fork #1 failed: %d (%s)\n" % (ose.errno, ose.strerror))            LOGGER.error("[Daemon ERROR]fork #1 failed: %d (%s)\n\n" % (ose.errno, ose.strerror))            sys.exit(1)            #脱离终端        os.setsid()        #修改当前工作目录        os.chdir("/")        #重设文件创建权限        os.umask(0)        #第二次fork,禁止进程重新打开控制终端        try:            pid = os.fork()            if pid > 0:                sys.exit(0)        except OSError, e:            sys.stderr.write("[Daemon ERROR]fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))            LOGGER.error("[Daemon ERROR]fork #2 failed: %d (%s)\n\n" % (e.errno, e.strerror))            sys.exit(1)        sys.stdout.flush()        sys.stderr.flush()        so = file(self.stdout, 'a+')        se = file(self.stderr, 'a+', 0)        #重定向标准输出/错误        os.dup2(so.fileno(), sys.stdout.fileno())        os.dup2(se.fileno(), sys.stderr.fileno())        #注册程序退出时的函数,即删掉pid文件        atexit.register(self.__delpid)        pid = str(os.getpid())        file(self.pidfile, 'w+').write("%s\n" % pid)    def __delpid(self):        os.remove(self.pidfile)    def start(self):        """        Start the daemon        """        print "[Deamon DEBUG]start in Daemon\n"        LOGGER.debug("[Daemon DEBUG]start in Daemon\n\n")        # Check for a pidfile to see if the daemon already runs        try:            pf = file(self.pidfile, 'r')            pid = int(pf.read().strip())            pf.close()        except IOError:            pid = None        if pid:            message = "pidfile %s already exist. Daemon already running?\n\n"            sys.stderr.write(message % self.pidfile)            LOGGER.error("[Daemon ERROR]pidfile %s already exist. Daemon already running?\n\n" % self.pidfile)            sys.exit(1)        # Start the daemon        self.__daemonize()        self.__run()    def stop(self):        """        Stop the daemon        """        # Get the pid from the pidfile        print "[Deamon DEBUG]stop in Daemon\n"        LOGGER.debug("[Daemon DEBUG]stop in Daemon\n\n")        try:            pf = file(self.pidfile, 'r')            pid = int(pf.read().strip())            pf.close()        except IOError:            pid = None        if not pid:            message = "pidfile %s does not exist. Daemon not running?\n"            sys.stderr.write(message % self.pidfile)            LOGGER.error("[Daemon ERROR]pidfile %s does not exist. Daemon not running?\n\n" % self.pidfile)            return # not an error in a restart            # Try killing the daemon process        try:            while True:                os.kill(pid, signal.SIGTERM)                time.sleep(0.1)        except OSError, err:            err = str(err)            if err.find("No such process") > 0:                if os.path.exists(self.pidfile):                    os.remove(self.pidfile)            else:                print str(err)                sys.exit(1)    def restart(self):        """        Restart the daemon        """        self.stop()        self.start()    def __run(self):        self.runMultiThreadSync()#---------------------------- DB ----------------------------------------class UpYunSqlite:    """        元数据的操作    """    def __init__(self, SRC_DIR="/home/glcsnz123", DBFILENAME="example.db"):        self.SRC_DIR = SRC_DIR        self.DBFILENAME = DBFILENAME        self.__InitConnect()        self.lockSql = thread.allocate_lock()    def __InitDBFile(self):        if not os.access(self.SRC_DIR, os.W_OK):            print "[ERROR]No write access in current directory"            LOGGER.error("[ERROR]No write access in current directory")            sys.exit("403")        conn = sqlite3.connect(os.sep.join([self.SRC_DIR, self.DBFILENAME]))        cur = conn.cursor()        cur.execute("CREATE TABLE FileModify(id INTEGER PRIMARY KEY AUTOINCREMENT, filename VARCHAR(256),\                    pathname VARCHAR(256),last_modify DATE)")        cur.execute("CREATE TABLE PathModify(id INTEGER PRIMARY KEY AUTOINCREMENT, pathname VARCHAR(256),\                    fatherpath VARCHAR(256),last_modify DATE)")        cur.close()        conn.close()    def __InitConnect(self):        if os.path.isfile(os.sep.join([self.SRC_DIR, self.DBFILENAME])) == False:            self.__InitDBFile()        self.CONN = sqlite3.connect(os.sep.join([self.SRC_DIR, self.DBFILENAME]), check_same_thread=False)        self.CUR = self.CONN.cursor()    def getPathFromSQL(self, fapath):        query = "select pathname,last_modify from PathModify where fatherpath='%s'" % (            self.__rpQuota(fapath[self.SRC_DIR.__len__():]))        try:            self.lockSql.acquire()            self.CUR.execute(query)            res = self.CUR.fetchall()        finally:            self.lockSql.release()        resObj = {}        for i in range(res.__len__()):            resObj[res[i][0]] = res[i][1]        return resObj    def getFileFromSQL(self, fapath):        query = "select filename,last_modify from FileModify where pathname = '%s'" % (            self.__rpQuota(fapath[self.SRC_DIR.__len__():]))        try:            self.lockSql.acquire()            self.CUR.execute(query)            res = self.CUR.fetchall()        finally:            self.lockSql.release()        resObj = {}        for i in range(res.__len__()):            resObj[res[i][0]] = res[i][1]        return resObj    def updateFileToSQL(self, fpath):        query = r"update FileModify SET last_modify = '%s' where filename = '%s' AND pathname = '%s'" % (            getLastModifyTime(fpath), self.__rpQuota(os.path.basename(fpath)),            self.__rpQuota(os.path.dirname(fpath)[self.SRC_DIR.__len__():]))        try:            self.lockSql.acquire()            self.CUR.execute(query)            self.CONN.commit()        except sqlite3.OperationalError, soe:            print "[Error] syntax error!\nError Query: " + query, "\nError Path: " + fpath + '\n'            LOGGER.error("[Error] syntax error!\nError Query: " + query + "\nError Path: " + fpath + '\n\n')        finally:            self.lockSql.release()    def insertFileToSQL(self, fpath):        query = r"insert into FileModify(filename,pathname,last_modify) values('%s','%s','%s')" % (            self.__rpQuota(os.path.basename(fpath)),            self.__rpQuota(os.path.dirname(fpath)[self.SRC_DIR.__len__():]),            getLastModifyTime(fpath))        try:            self.lockSql.acquire()            self.CUR.execute(query)            self.CONN.commit()        except sqlite3.OperationalError, soe:            print "[Error] syntax error!\nError Query: " + query, "\nError Path: " + fpath + '\n'            LOGGER.error("[Error] syntax error!\nError Query: " + query + "\nError Path: " + fpath + '\n\n')        finally:            self.lockSql.release()    def updatePathToSQL(self, pathname):        query = r"update PathModify SET last_modify = '%s' where pathname = '%s' and fatherpath='%s'" % (            getLastModifyTime(pathname),            self.__rpQuota(os.path.basename(pathname)),            self.__rpQuota(os.path.dirname(pathname)[self.SRC_DIR.__len__():]))        try:            self.lockSql.acquire()            self.CUR.execute(query)            self.CONN.commit()        except sqlite3.OperationalError, soe:            print "[Error] syntax error!\n Error Query: " + query, "\nError Path: " + pathname + "\n"            LOGGER.error("[Error] syntax error!\n Error Query: " + query + "\nError Path: " + pathname + "\n\n")        finally:            self.lockSql.release()    def rollBackPathToSQL(self, pathname):        query = r"update PathModify SET last_modify = '%s' where pathname = '%s' and fatherpath='%s'" % (            getLastModifyTime(""),            self.__rpQuota(os.path.basename(pathname)), self.__rpQuota(os.path.dirname(pathname)))        try:            self.lockSql.acquire()            self.CUR.execute(query)            self.CONN.commit()        except sqlite3.OperationalError, soe:            print "[Error] syntax error!\n Error Query: " + query, "\nError Path: " + pathname + "\n"            LOGGER.error("[Error] syntax error!\n Error Query: " + query + "\nError Path: " + pathname + "\n\n")        finally:            self.lockSql.release()    def insertPathToSQL(self, pathname):        query = r"insert into PathModify(pathname,fatherpath,last_modify) values('%s','%s','%s')" % (            self.__rpQuota(os.path.basename(pathname)),            self.__rpQuota(os.path.dirname(pathname)[self.SRC_DIR.__len__():]),            getLastModifyTime(pathname))        try:            self.lockSql.acquire()            self.CUR.execute(query)            self.CONN.commit()        except sqlite3.OperationalError, soe:            print "[Error] syntax error!\n Error Query: " + query, "\nError Path: " + pathname + "\n"            LOGGER.error("[Error] syntax error!\n Error Query: " + query + "\nError Path: " + pathname + "\n\n")        finally:            self.lockSql.release()    def __rpQuota(self, st):        return st.replace("'", r"//")if __name__ == "__main__":    InitLogger()    ups = UpYunUprSync()    ups.setThreadNumLimit(20)    ups.runMultiThreadSync()################################ 问题主要出在了数据库的询问上,所以必须减少数据库的访问# getFileList函数主要的花费是在mkdir和数据库操作上。,数据库操作占了50%的花费# mkdir大概花了25%的时间代价#

ReadMe.txt基本函数接口初始化日志:    InitLogger()    默认日志等级为INFO    设置日志等级    SetLoggerLevel(log_level)    参数log_level 为日志的等级    可选日志等级有(不区分大小写):        "DEBUG"        "INFO"        "WARNING"        "ERROR"        "OFF"    其中OFF表示关闭日志功能,另外,如果未初始化日志,日志功能默认是关闭的。    日志的存放地址默认为/tmp/UpYunSync.log。同时,也可通过以下方法来设定日志文件地址:        UpYunrSync.LOGFILE = LOG_FILE    其中参数LOG_FILE 为日志文件的地址初始化UpYunrSync    import UpYunrSync    ups = UpYunrSync.UpYunUprSync(BUCKETNAME, USERNAME, PASSWORD, SRC_DIR,DST_DIR, DBFILENAME)    其中参数 bucket 为空间名称,username 和 password 分别为授权操作员帐号和密码,SRC_DIR和DST_DIR分别为需要同步的本地目录和服务器目录,必选。    参数DBFILENAME为存储本地文件元数据的sqlite文件,默认值为.UpYunSqlite.db设置线程开启个数    ups.setThreadNumLimit(Thread_Num)    参数Thread_Num为上传文件的线程个数,线程个数并不是越多越好,应当根据所要上传的目录中文件个数以及大小来确定。同步目录    终端直接同步文件形式        ups.runMultiThreadSync()        执行文件同步的操作    后台守护进程同步文件形式        ups.start()        ups.stop()        ups.restart()        三个方法分别是启动进程、停止进程和重启进程。    后台进程模式下,分别设置程序标准输出、错误输出和PID文件位置的方法如下:        ups.stdout = Stdout_File        ups.stderr = Stderr_File        ups.pidfile = Pid_File        其中,参数Stdout_File 和 Stderr_File 分别是标准输出和错误输出的文件地址,默认值为/dev/null。Pid_File为进程的PID文件位置,默认值为/tmp/uprsync.pid





原创粉丝点击