Parameter server anatomy (1)

来源:互联网 发布:云协作软件 编辑:程序博客网 时间:2024/05/16 01:01

anatomy的目的,是为了把Parameter server从一个framework,改造为一个platform。

独立的PS server集群,包括PS scheduler(需要增加scheduler HA的实现,并且scheduler只管理和调度server,不再管理和调度worker)。

独立的PS worker集群,基于Spark实现。把PS-lite framework中的worker相关的代码,剥离出来,封装成Java class(通过JNI),供实现某种算法(比如FTRL)的Spark app使用。

下面代码的分析,目的是正确地剥离scheduler、server、worker的代码。


ps.h wraps class Postoffice.
base.h defines kScheduler, kServerGroup, kWorkerGroup using bitmask.


ps::IsScheduler()
    Postoffice::Postoffice() (singleton)
    Postoffice::is_scheduler()


ps::IsServer()
    Postoffice::Postoffice() (singleton)
    Postoffice::is_server()


ps::IsWorker()
    Postoffice::Postoffice() (singleton)
    Postoffice::is_worker()


ps::Start()
    Postoffice::Postoffice() (singleton)
    Postoffice::Start()


Postoffice::Postoffice()
    Van::Create("zmq")
        new ZMQVan() (NOOP)
    some env vars
    Postoffice::num_workers_
    Postoffice::num_servers_
    Postoffice::is_worker_
    Postoffice::is_server
    Postoffice::is_scheduler_


Postoffice::Start()
    log
    Postoffice::node_ids_ (role (bitmask) <--> node IDs)
    ZMQVan::Start()
        ZMQVan::context_ = zmq_ctx_new()
        zmq_ctx_set()
        // must call it before calling Send
        // it initalizes all connections to other nodes. start the receiving threads, which keeps receiving messages
        Van::Start()
            Van::scheduler_
            Van::is_scheduler_
            Van::my_node_
                Node::SCHEDULER
                    Van::my_node_ = Van::scheduler_
                Node::SERVER or Node::WORKER
                    hostname (IP), port, role, id
            Van::Bind() (pure virtual)
                ZMQVan::Bind()
                    zmq_socket()
                    zmq_bind()
            Van::Connect(Van::scheduler_) (pure virtual)
                ZMQVan::Connect()
                    ZMQVan::senders_
                    zmq_socket()
                    zmq_setsockopt()
                    zmq_connect()
            Van::receiver_thread_ = std::unique_ptr<std::thread>(new std::thread(&Van::Receiving, this))
                ZMQVan::RecvMsg()
                    zmq_msg_init()
                    zmq_msg_recv()
                    zmq_msg_close()
                    zmq_msg_more()
                Van::resender_
                operations on all kinds of nodes of the scheduler, servers, workers (very important)
                    Connect()
                    Send()
            // let the scheduler know myself
            Van::Send()
                ZMQVan::SendMsg()
                Van::resender_
            Van::resender_
            Van::heartbeat_thread_ = std::unique_ptr<std::thread>(new std::thread(&Van::Heartbeat, this))
    Postoffice::start_time_
    Barrier()
        Van::Send()
            ZMQVan::SendMsg()


ZMQVan::SendMsg()
    senders_.find() (for connected socket)
    PackMeta()
    zmq_msg_init_data()
    zmq_msg_send()
    zmq_msg_close()


// All nodes should call this function before existing.
ps::Finalize()
    Barrier()
    ZMQVan::Stop()
        Van::Stop()
        zmq_setsockopt()
        zmq_close()
        zmq_ctx_destroy()
    exit_callback_()


SimpleApp::SimpleApp()
    SimpleApp::SimpleApp() (default constructor)
    obj_ = new Customer()
        Postoffice::Postoffice() (singleton)
        Postoffice::AddCustomer()
            Postoffice::customers_
        Customer::recv_thread_ = std::unique_ptr<std::thread>(new std::thread(&Customer::Receiving, this))
            Customer::recv_queue_.WaitAndPop()
            Customer::recv_handle_()
            Customer::tracker_
        SimpleApp::Process()
            SimpleApp::request_handle_()
            SimpleApp::response_handle_()


SimpleApp::Request()
    Customer::NewRequest()
        Customer::tracker_
    Van::Send()


SimpleApp::Wait()
    Customer::WaitRequest()
        Customer::tracker_


SimpleApp::Response()
    Van::Send()


KVServer::KVServer()
    SimpleApp::SimpleApp() (default constructor)
    obj_ = new Customer()
        Postoffice::Postoffice() (singleton)
        Postoffice::AddCustomer()
            Postoffice::customers_
        Customer::recv_thread_ = std::unique_ptr<std::thread>(new std::thread(&Customer::Receiving, this))
            Customer::recv_queue_.WaitAndPop()
            Customer::recv_handle_()
            Customer::tracker_
        KVServer<Val>::Process()
            SimpleApp::Process()
            KVServer::request_handle_()


KVWorker::KVWorker()
    SimpleApp::SimpleApp() (default constructor)
    KVWorker::slicer_
    obj_ = new Customer()
        Postoffice::Postoffice() (singleton)
        Postoffice::AddCustomer()
            Postoffice::customers_
        Customer::recv_thread_ = std::unique_ptr<std::thread>(new std::thread(&Customer::Receiving, this))
            Customer::recv_queue_.WaitAndPop()
            Customer::recv_handle_()
            Customer::tracker_
        KVWorker<Val>::Process()
            SimpleApp::Process()
            KVWorker::recv_kvs_
            Customer::NumResponse()
                Customer::tracker_
            KVWorker::RunCallback()
                KVWorker::callbacks_


// Pushes a list of key-value pairs to all server nodes.
KVWorker::Push()
    KVWorker::ZPush()
        Customer::NewRequest()
        KVWorker::AddCallback()
        KVWorker::Send()
            KVWorker::slicer_()
            Customer::AddResponse()
                Customer::tracker_
            KVWorker::RunCallback()
            Van::Send()


KVWorker::Wait()
    Customer::WaitRequest()


KVWorker::Pull()
    KVWorker::Pull_()
        Customer::NewRequest()
        KVWorker::AddCallback()
            KVWorker::recv_kvs_
            ps::FindRange()
            all kinds of operations and checks on data
            cb()
        KVWorker::Send()

0 0