kinetic-swift---kinetic object server

来源:互联网 发布:淘宝店铺认证信息修改 编辑:程序博客网 时间:2024/06/05 18:00

location: kinetic-swift-master\kinetic_swift\obj\server.py

import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'cpp'
import logging
from contextlib import contextmanager
from collections import deque
from uuid import uuid4
from eventlet import sleep, Timeout, spawn_n
import re
import time

import msgpack
from swift.obj import diskfile, server

from kinetic_swift.client import KineticSwiftClient

from kinetic.common import Synchronization


DEFAULT_DEPTH = 2


SYNC_OPTION_MAP = {
    'default': Synchronization.INVALID_SYNCHRONIZATION,
    'writethrough': Synchronization.WRITETHROUGH,
    'writeback': Synchronization.WRITEBACK,
    'flush': Synchronization.FLUSH,
}

# key=chunks.380b815b3317cc72f32a76dfb8227659.3c6c6ae6-0198-4fa2-9ea1-56b3ab4ef41c.00000000000000000000000000000001
def chunk_key(hashpath, nounce, index=None):
    if index is None:
        # for use with getKeyRange
        key = 'chunks.%s.%s/' % (hashpath, nounce)
    else:
        key = 'chunks.%s.%s.%0.32d' % (hashpath, nounce, index)
    return key

# key = objects.06acc17e32ea7122cf0c0f9b6b127f61/
def object_key(policy_index, hashpath, timestamp='',
               extension='.data', nounce=''):
    storage_policy = diskfile.get_data_dir(policy_index)
    if timestamp:
        return '%s.%s.%s%s.%s' % (storage_policy, hashpath, timestamp,
                                  extension, nounce)
    else:
        # for use with getPrevious
        return '%s.%s/' % (storage_policy, hashpath)


def async_key(policy_index, hashpath, timestamp):
    async_policy = diskfile.get_async_dir(policy_index)
    return '%s.%s.%s' % (async_policy, hashpath, timestamp)


def get_nounce(key):
    return key.rsplit('.', 1)[-1]


class DiskFileManager(diskfile.DiskFileManager):

    def __init__(self, conf, logger):
        super(DiskFileManager, self).__init__(conf, logger)
        self.connect_timeout = int(conf.get('connect_timeout', 3))
        self.response_timeout = int(conf.get('response_timeout', 30))
        self.connect_retry = int(conf.get('connect_retry', 3))
        self.write_depth = int(conf.get('write_depth', DEFAULT_DEPTH))
        self.read_depth = int(conf.get('read_depth', DEFAULT_DEPTH))
        self.delete_depth = int(conf.get('delete_depth', DEFAULT_DEPTH))
        raw_sync_option = conf.get('synchronization', 'writeback').lower()
        try:
            self.synchronization = SYNC_OPTION_MAP[raw_sync_option]
        except KeyError:
            raise ValueError('Invalid synchronization option, choices are %r' %
                             SYNC_OPTION_MAP.keys())
        self.conn_pool = {}
        self.unlink_wait = \
            server.config_true_value(conf.get('unlink_wait', 'false'))

    def get_diskfile(self, device, partition, account, container, obj,
                     policy_idx=0, **kwargs):
        host, port = device.split(':')
        return DiskFile(self, host, port, self.threadpools[device],
                        partition, account, container, obj,
                        policy_idx=policy_idx, unlink_wait=self.unlink_wait,
                        **kwargs)

    def get_diskfile_from_audit_location(self, device, head_key):
        host, port = device.split(':')
        policy_match = re.match('objects([-]?[0-9]?)\.', head_key)
        policy_string = policy_match.group(1)
        if not policy_string:
            policy_index = 0
        else:
            policy_index = abs(int(policy_string))
        datadir = head_key.split('.', 3)[1]
        return DiskFile(self, host, port, self.threadpools[device], None,
                        policy_idx=policy_index, _datadir=datadir,
                        unlink_wait=self.unlink_wait)
#kinetic-swift object server,向Object device发送同步失败的Object list info
    def pickle_async_update(self, device, account, container, obj, data,
                            timestamp, policy_idx):
        host, port = device.split(':')
        hashpath = diskfile.hash_path(account, container, obj)
        #key = policy_idx, hashpath, timestamp
        key = async_key(policy_idx, hashpath, timestamp)
        blob = msgpack.packb(data)
        resp = self.get_connection(host, port).put(key, blob)
        resp.wait()
        self.logger.increment('async_pendings')

    def _new_connection(self, host, port, **kwargs):
        kwargs.setdefault('connect_timeout', self.connect_timeout)
        kwargs.setdefault('response_timeout', self.response_timeout)
        for i in range(1, self.connect_retry + 1):
            try:
                return KineticSwiftClient(self.logger, host, int(port),
                                          **kwargs)
            except Timeout:
                self.logger.warning('Drive %s:%s connect timeout #%d (%ds)' % (
                    host, port, i, self.connect_timeout))
            except Exception:
                self.logger.exception('Drive %s:%s connection error #%d' % (
                    host, port, i))
            if i < self.connect_retry:
                sleep(1)
        msg = 'Unable to connect to drive %s:%s after %s attempts' % (
            host, port, i)
        self.logger.error(msg)
        raise diskfile.DiskFileDeviceUnavailable()

    def get_connection(self, host, port, **kwargs):
        key = (host, port)
        conn = None
        try:
            conn = self.conn_pool[key]
        except KeyError:
            pass
        if conn and conn.faulted:
            conn.close()
            conn = None
        if not conn:
            conn = self.conn_pool[key] = self._new_connection(
                host, port, **kwargs)
        return conn


class DiskFileReader(diskfile.DiskFileReader):

    def __init__(self, diskfile):
        self.diskfile = diskfile
        self._suppress_file_closing = False

    def app_iter_range(self, start, stop):
        r = 0
        if start or start == 0:

            #divmod(a,b)方法返回的是a//b(除法取整)以及a对b的余数返回结果类型为tuple
            q, r = divmod(start, self.diskfile.disk_chunk_size)
            self.diskfile.chunk_id = q
        if stop is not None:
            length = stop - start
        else:
            length = None
        try:
            for chunk in self:
                if length is not None:
                    length -= len(chunk) - r
                    if length < 0:
                        # Chop off the extra:
                        yield chunk[r:length]
                        break
                yield chunk[r:]
                r = 0
        finally:
            if not self._suppress_file_closing:
                self.close()

    def __iter__(self):
        return iter(self.diskfile)

    def close(self):
        return self.diskfile.close()


class DiskFile(diskfile.DiskFile):

    def __init__(self, mgr, host, port, *args, **kwargs):
        self.unlink_wait = kwargs.pop('unlink_wait', False)
        device_path = ''
        self.disk_chunk_size = kwargs.pop('disk_chunk_size',
                                          mgr.disk_chunk_size)
        self.policy_index = kwargs.get('policy_idx', 0)
        # this is normally setup in DiskFileWriter, but we do it here
        self._extension = '.data'
        # this is to neuter the context manager close in GET
        self._took_reader = False
        super(DiskFile, self).__init__(mgr, device_path, *args, **kwargs)
        self.hashpath = os.path.basename(self._datadir.rstrip('/'))
        self._buffer = bytearray()
        self._nounce = None
        self.chunk_id = 0
        self.upload_size = 0
        self.last_sync = 0
        # configurables
        self.write_depth = self._mgr.write_depth
        self.read_depth = self._mgr.read_depth
        self.delete_depth = self._mgr.delete_depth
        self.synchronization = self._mgr.synchronization
        self.conn = None
        self.conn = mgr.get_connection(host, port)
        self.logger = mgr.logger

    def object_key(self, *args, **kwargs):
        return object_key(self.policy_index, self.hashpath, *args, **kwargs)

    def _read(self):
        key = self.object_key()
        entry = self.conn.getPrevious(key).wait()
        if not entry or not entry.key.startswith(key[:-1]):
            self._metadata = {}  # mark object as "open"
            return
        self.data_file = '.ts.' not in entry.key
        blob = entry.value
        self._nounce = get_nounce(entry.key)
        self._metadata = msgpack.unpackb(blob)

    def open(self, **kwargs):
        self._read()
        if not self._metadata:
            raise diskfile.DiskFileNotExist()
        if self._metadata.get('deleted', False):
            raise diskfile.DiskFileDeleted(metadata=self._metadata)
        return self

    def reader(self, *args, **kwargs):
        self._took_reader = True
        return DiskFileReader(self)

    def close(self, **kwargs):
        self._metadata = None

    def __exit__(self, t, v, tb):
        if not self._took_reader:
            self.close()

    def keys(self):
        return [chunk_key(self.hashpath, self._nounce, i + 1) for i in
                range(self.chunk_id,
                      int(self._metadata['X-Kinetic-Chunk-Count']))]

    def __iter__(self):
        if not self._metadata:
            return
        #Deque模块是Python标准库collections中的一项. 它提供了两端都可以操作的序列, 这意味着, 你可以在序列前后都执行添加或删除
        pending = deque()
        for key in self.keys():
            while len(pending) >= self.read_depth:
                entry = pending.popleft().wait()
                yield str(entry.value) if entry else ''
            pending.append(self.conn.get(key))
        for resp in pending:
            entry = resp.wait()
            yield str(entry.value) if entry else ''

    @contextmanager
    def create(self, size=None):
        #uuid4 uuid是一种唯一标识,在许多领域作为标识用途。python的uuid模块就是用来生成它的
        self._nounce = str(uuid4())
        self._chunk_id = 0
        try:
            self._pending_write = deque()
            yield self
        finally:
            self.close()
#kinetic writer写请求  write->_sync_buffer->_submit_write
    def write(self, chunk):
        self._buffer.extend(chunk)
        self.upload_size += len(chunk)

        diff = self.upload_size - self.last_sync
        if diff >= self.disk_chunk_size:
            self._sync_buffer()
            self.last_sync = self.upload_size
        return self.upload_size

    def _submit_write(self, key, blob, final=True):
        if len(self._pending_write) >= self.write_depth:
            self._pending_write.popleft().wait()
        if self.synchronization == Synchronization.FLUSH and not final:
            synchronization = Synchronization.WRITEBACK
        else:
            synchronization = self.synchronization
        pending_resp = self.conn.put(key, blob, force=True,
                                     synchronization=synchronization)
        self._pending_write.append(pending_resp)

    def _sync_buffer(self):
        if self._buffer:
            # write out the chunk buffer!
            self._chunk_id += 1
            #chunk_key为真正的数据存储时的key
            key = chunk_key(self.hashpath, self._nounce, self._chunk_id)
            self._submit_write(key, self._buffer[:self.disk_chunk_size],
                               final=False)
        self._buffer = self._buffer[self.disk_chunk_size:]

    def _wait_write(self):
        for resp in self._pending_write:
            resp.wait()

    def delete(self, timestamp):
        timestamp = diskfile.Timestamp(timestamp).internal

        with self.create() as deleter:
            deleter._extension = '.ts'
            deleter.put({'X-Timestamp': timestamp})

    def put(self, metadata):
        if self._extension == '.ts':
            metadata['deleted'] = True
        self._sync_buffer()
        while self._buffer:
            self._sync_buffer()
        # zero index, chunk-count is len
        # self._metadata = {'X-Kinetic-Chunk-Nounce': 'd2499503-0e66-4ef4-b4ef-a9d040c41088', 'Content-Length': '8131',
        # 'name': '/AUTH_test/foobar/.viminfo', 'Content-Type': 'application/octet-stream', 'X-Kinetic-Chunk-Count': 1,
        # 'ETag': '', 'X-Timestamp': '1422329553.08974', 'X-Object-Meta-Mtime': '1422329035.563941'}
        metadata['X-Kinetic-Chunk-Count'] = self._chunk_id
        metadata['X-Kinetic-Chunk-Nounce'] = self._nounce
        metadata['name'] = self._name
        self._metadata = metadata
        blob = msgpack.packb(self._metadata)
        timestamp = diskfile.Timestamp(metadata['X-Timestamp'])
        key = self.object_key(timestamp.internal, self._extension,
                              self._nounce)
        self._submit_write(key, blob, final=True)
        self._wait_write()
        if self.unlink_wait:
            self._unlink_old(timestamp)
        else:
            spawn_n(self._unlink_old, timestamp)

    def _unlink_old(self, req_timestamp):
        #start_key= objects.06acc17e32ea7122cf0c0f9b6b127f61
        start_key = self.object_key()[:-1]
        #end_key ='%s.%s.%s%s.%s' % (storage_policy, hashpath, timestamp,extension, nounce)
        end_key = self.object_key(timestamp=req_timestamp.internal)
        resp = self.conn.getKeyRange(start_key, end_key, endKeyInclusive=False)
        head_keys = resp.wait()
        pending = deque()
        for head_key in head_keys:
            nounce = get_nounce(head_key)

            def key_gen():
        # end_key = 'chunks.%s.%s/' % (hashpath, nounce)
        # start_key = 'chunks.%s.%s.%0.32d' % (hashpath, nounce, index)
                start_key = chunk_key(self.hashpath, nounce, 0)
                end_key = chunk_key(self.hashpath, nounce)
                resp = self.conn.getKeyRange(start_key, end_key,
                                             endKeyInclusive=False)
                chunk_keys = resp.wait()
                for key in chunk_keys:
                    yield key
                yield head_key

            for key in key_gen():
                while len(pending) >= self.delete_depth:
                    found = pending.popleft().wait()
                    if not found:
                        break
                pending.append(self.conn.delete(key, force=True))

        for resp in pending:
            resp.wait()

    def quarantine(self):
        timestamp = diskfile.Timestamp(self._metadata['X-Timestamp'])
        head_key = self.object_key(timestamp.internal, self._extension,
                                   self._nounce)
        keys = [head_key] + [
            chunk_key(self.hashpath, self._nounce, i + 1) for i in
            range(int(self._metadata['X-Kinetic-Chunk-Count']))]
        quarantine_prefix = 'quarantine.%s.' % diskfile.Timestamp(
            time.time()).internal
        for key in keys:
            resp = self.conn.rename(key, quarantine_prefix + key)
            resp.wait()

    def get_data_file_size(self):
        return self._metadata['Content-Length']


class ObjectController(server.ObjectController):

    def setup(self, conf):
        super(ObjectController, self).setup(conf)
        self._diskfile_mgr = DiskFileManager(conf, self.logger)
        kinetic_logger = logging.getLogger('kinetic')
        for handler in self.logger.logger.handlers:
            kinetic_logger.addHandler(handler)


def app_factory(global_conf, **local_conf):
    conf = global_conf.copy()
    conf.update(local_conf)
    return ObjectController(conf)

 

0 0
原创粉丝点击