mongodb源码分析(二十)mongos分片的配置

来源:互联网 发布:购买淘宝4心店铺 编辑:程序博客网 时间:2024/06/04 20:16

         本文描述分片设置命令的流程.在分析分片设置命令流程前先来看看configserver服务器config数据库中各个collection的作用.

version:   保存当前configserver的版本信息,这个是随mongodb升级而变动的.

settings: 保存分片系统设置信息如chunksize大小,balancer配置信息.

shards:   保存shard中的配置信息包括每一个mongod的shard id如shard0以及其地址.

databases: 保存shard系统中的数据库的分片信息,是否分片,primary服务器的id.

collections: 保存分片的collection.

locks:      分布式锁状态,state=0,表示没锁,1表示准备加锁,2表示正拥有锁.

lockpings: 分布式服务器的ping信息,为分布式锁设计的,保留其ping时间,超时后其它服务器将能剥夺超时服务器的锁.

chunks:   保存分片collection的chunks信息,chunks的数据范围,collection名,所在服务器的id.

mongos: mongos进程的ping信息,确保mongos是可连接的.

tags:        似乎和replset中的tags作用类似,确保某些chunk只能在某些类型的服务器上.

        在分片系统中,每一个mongod服务器是一个分shard,其内部名称在不指定的情况下会自动生成名字如shard0000 shard0001等编号.每一个数据库在初始加入分片系统中将会有一个primary shard,表示该数据库最早存于哪个服务器上.若是新添加的数据库,没有指定其存的位置,则其初始位置存于分片系统中当前数据最少的服务器上.

        对于sharded collection,其由一个chunkManager管理,collection的分片是按照chunk来做单位的,每一个chunk默认大小为64M,可以通过命令修改,大小达到上限后将发生分片,chunkManager负责记录分片以及每一个分片所在的范围,以后对数据修改时将首先查询chunkManager,通过它将知道将请求发往哪一台服务器上.

下面开始分析代码.首先来看看添加一台服务器到分片系统db.runCommand(addshard:"127.0.0.1:27040").其将执行addshard命令.

            bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {                if ( !ClientBasic::getCurrent()->getAuthenticationInfo()->isAuthorized("admin") ) {//非admin权限不能执行该命令                    errmsg = "unauthorized. Need admin authentication to add a shard ";                    return false;                }                // get replica set component hosts                ConnectionString servers = ConnectionString::parse( cmdObj.firstElement().valuestrsafe() , errmsg );                // using localhost in server names implies every other process must use localhost addresses too                vector<HostAndPort> serverAddrs = servers.getServers();//得到添加的服务器地址                for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ) {                    if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ) {                        return false;                    }                    // it's fine if mongods of a set all use default port                    if ( ! serverAddrs[i].hasPort() ) {//没有端口加上默认端口                        serverAddrs[i].setPort( CmdLine::ShardServerPort );                    }                }                // name is optional; addShard will provide one if needed                string name = "";                if ( cmdObj["name"].type() == String ) {//得到shard的名称,没有设置将自动生成一个如shard0000这样的.                    name = cmdObj["name"].valuestrsafe();                }                // maxSize is the space usage cap in a shard in MBs                long long maxSize = 0;                if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ) {                    maxSize = cmdObj[ ShardFields::maxSize.name() ].numberLong();                }                if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ) {//实际的添加过程.                    return false;                }                result << "shardAdded" << name;                return true;            }
run->addShard,删除了一大部分的检查流程.

    bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) {        // name can be NULL, so provide a dummy one here to avoid testing it elsewhere        string nameInternal;        ReplicaSetMonitorPtr rsMonitor;        // Check whether the host (or set) exists and run several sanity checks on this request.        // There are two set of sanity checks: making sure adding this particular shard is consistent        // with the replica set state (if it exists) and making sure this shards databases can be        // brought into the grid without conflict.        vector<string> dbNames;        {            scoped_ptr<ScopedDbConnection> newShardConnPtr(//与配置要加入shard的服务器建立连接                    ScopedDbConnection::getInternalScopedDbConnection( servers.toString() ) );            ScopedDbConnection& newShardConn = *newShardConnPtr;            BSONObj resIsMongos;            BSONObj resIsMaster;            ok =  newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster );            // if the shard has only one host, make sure it is not part of a replica set            string setName = resIsMaster["setName"].str();            string commandSetName = servers.getSetName();            // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of            // the set. It is fine if not all members of the set are present in 'servers'.            bool foundAll = true;            string offendingHost;            // shard name defaults to the name of the replica set            if ( name->empty() && ! setName.empty() )                *name = setName;            // In order to be accepted as a new shard, that mongod must not have any database name that exists already            // in any other shards. If that test passes, the new shard's databases are going to be entered as            // non-sharded db's whose primary is the newly added shard.            BSONObj resListDB;//列出所有的新加入的服务器的数据库,将其加入到shard            ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB );            BSONObjIterator i( resListDB["databases"].Obj() );            while ( i.more() ) {                BSONObj dbEntry = i.next().Obj();                const string& dbName = dbEntry["name"].String();                if ( _isSpecialLocalDB( dbName ) ) {                    // 'local', 'admin', and 'config' are system DBs and should be excluded here                    continue;                }                else {                    dbNames.push_back( dbName );                }            }            if ( newShardConn->type() == ConnectionString::SET )                 rsMonitor = ReplicaSetMonitor::get( setName );            newShardConn.done();        }        //判断这里数据库一定为null,否则失败        // check that none of the existing shard candidate's db's exist elsewhere        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {            DBConfigPtr config = getDBConfig( *it , false );//得到数据库的配置.        }        // if a name for a shard wasn't provided, pick one.        if ( name->empty() && ! _getNewShardName( name ) ) {//对于新加入的服务器给他一个新的shardid.            errMsg = "error generating new shard name";            return false;        }        // build the ConfigDB shard document        BSONObjBuilder b;        b.append( "_id" , *name );        b.append( "host" , rsMonitor ? rsMonitor->getServerAddress() : servers.toString() );        if ( maxSize > 0 ) {            b.append( ShardFields::maxSize.name() , maxSize );        }        BSONObj shardDoc = b.obj();        {            scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection(                    configServer.getPrimary().getConnString() ) );            // check whether the set of hosts (or single host) is not an already a known shard            BSONObj old = conn->get()->findOne( ShardNS::shard ,                                                BSON( "host" << servers.toString() ) );            conn->get()->insert( ShardNS::shard , shardDoc );//配置信息保存到configserver的shards集合中.            conn->done();        }        Shard::reloadShardInfo();//新配置了shard,重新加载shard的信息.        // add all databases of the new shard        for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) {            DBConfigPtr config = getDBConfig( *it , true , *name );//建立每一个新添加的数据库的配置信息,参数true表示创建        }        return true;    }
继续这里的shard::reloadShardInfo.

    void Shard::reloadShardInfo() {staticShardInfo.reload();}    void reload() {            list<BSONObj> all;            {                scoped_ptr<ScopedDbConnection> conn(//shards collection中取出所有配置                        ScopedDbConnection::getInternalScopedDbConnection(                                configServer.getPrimary().getConnString() ) );                auto_ptr<DBClientCursor> c = conn->get()->query( ShardNS::shard , Query() );                while ( c->more() ) {                    all.push_back( c->next().getOwned() );                }                conn->done();            }            scoped_lock lk( _mutex );            // We use the _lookup table for all shards and for the primary config DB. The config DB info,            // however, does not come from the ShardNS::shard. So when cleaning the _lookup table we leave            // the config state intact. The rationale is that this way we could drop shards that            // were removed without reinitializing the config DB information.            ShardMap::iterator i = _lookup.find( "config" );//config数据库不变            if ( i != _lookup.end() ) {                ShardPtr config = i->second;                _lookup.clear();                _lookup[ "config" ] = config;            }            else {                _lookup.clear();            }            _rsLookup.clear();//根据shards collection中的信息从新配置shard            for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); ++i ) {                BSONObj o = *i;                string name = o["_id"].String();                string host = o["host"].String();                long long maxSize = 0;                BSONElement maxSizeElem = o[ ShardFields::maxSize.name() ];                if ( ! maxSizeElem.eoo() )                    maxSize = maxSizeElem.numberLong();                bool isDraining = false;                BSONElement isDrainingElem = o[ ShardFields::draining.name() ];                if ( ! isDrainingElem.eoo() )                     isDraining = isDrainingElem.Bool();                ShardPtr s( new Shard( name , host , maxSize , isDraining ) );//建立shard结构.                if ( o["tags"].type() == Array ) {//添加tag信息.                    vector<BSONElement> v = o["tags"].Array();                    for ( unsigned j=0; j<v.size(); j++ ) {                        s->addTag( v[j].String() );                    }                }                _lookup[name] = s;                _installHost( host , s );            }        }
run->addShard->getDBConfig

    DBConfigPtr Grid::getDBConfig( string database , bool create , const string& shardNameHint ) {        string::size_type i = database.find( "." );        if ( i != string::npos )                database = database.substr( 0 , i );        if ( database == "config" )//config数据库不需要创建了            return configServerPtr;        scoped_lock l( _lock );        DBConfigPtr& dbConfig = _databases[database];//_databases中没有,建立新的DBConfig.        if( ! dbConfig ){            dbConfig.reset(new DBConfig( database ));            // Protect initial load from connectivity errors            bool loaded = false;            loaded = dbConfig->load();//这里将从database集合中加载这个数据库的配置,同时再从collections中加载该数据库collection的配置            if( ! loaded ){//当然,首次时因为这里数据库还没保存到database这个collection中,所以loaded=false.                if( create ){                    try{                        Shard primary;                        if ( database == "admin" ) {                            primary = configServer.getPrimary();                        }                        else if ( shardNameHint.empty() ) {                            primary = Shard::pick();//没有shard信息,选取一个map最少数据的服务器,将其指定为primary                        }                        else {                            // use the shard name if provided                            Shard shard;                            shard.reset( shardNameHint );//将shard设置为shardNameHint指定name对应的shard                            primary = shard;                        }                        if ( primary.ok() ) {//设置这个配置的shard的名称,内部会根据shard名找到对应的shard,这里也将把DBConfig信息保存到databases和collections集合中                            dbConfig->setPrimary( primary.getName() ); // saves 'cc' to configDB                        }                    }                }                else {                    dbConfig.reset();                }            }        }        return dbConfig;    }
run->addShard->getDBConfig->DBConfig::load
bool DBConfig::load() {    scoped_lock lk( _lock );    return _load();}    bool DBConfig::_load() {        scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection(                configServer.modelServer(), 30.0 ) );        BSONObj o = conn->get()->findOne( ShardNS::database , BSON( "_id" << _name ) );        unserialize( o );//设置数据库配置的primary服务器,及其是否开启了sharding        BSONObjBuilder b;        b.appendRegex( "_id" , (string)"^" + pcrecpp::RE::QuoteMeta( _name ) + "\\." );        int numCollsErased = 0;        int numCollsSharded = 0;        auto_ptr<DBClientCursor> cursor = conn->get()->query( ShardNS::collection, b.obj() );        while ( cursor->more() ) {//根据数据库加载其collection信息            BSONObj o = cursor->next();            string collName = o["_id"].String();            if( o["dropped"].trueValue() ){                _collections.erase( collName );                numCollsErased++;            }            else{                _collections[ collName ] = CollectionInfo( o );                if( _collections[ collName ].isSharded() ) numCollsSharded++;            }        }        conn->done();        return true;    }
继续来看DBConfig的保存.run->addShard->getDBConfig->DBConfig::setPrimary

    void DBConfig::setPrimary( string s ) {        scoped_lock lk( _lock );        _primary.reset( s );        _save();    }    void DBConfig::_save( bool db, bool coll ) {        scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection(                configServer.modelServer(), 30.0 ) );        if( db ){            BSONObj n;            {                BSONObjBuilder b;                serialize(b);//序列化信息                n = b.obj();            }//将信息保存到数据库中            conn->get()->update( ShardNS::database , BSON( "_id" << _name ) , n , true );        }        if( coll ){//存在collection信息则将其保存,初次加载时没有这个信息,没有这里的流程            for ( Collections::iterator i=_collections.begin(); i!=_collections.end(); ++i ) {                if ( ! i->second.isDirty() )                    continue;                i->second.save( i->first , conn->get() );            }        }        conn->done();    }
到这里addShard的过程结束,继续看enablesharding的流程.db.runCommand({enablesharding: "fool"}),其命令在mongo\s\commands_admin.cpp中.

            bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {                string dbname = cmdObj.firstElement().valuestrsafe();                if ( dbname.size() == 0 ) {//各种检查                    errmsg = "no db";                    return false;                }                if ( dbname == "admin" ) {                    errmsg = "can't shard the admin db";                    return false;                }                if ( dbname == "local" ) {                    errmsg = "can't shard the local db";                    return false;                }                DBConfigPtr config = grid.getDBConfig( dbname );//根据数据库得到相应的数据库配置,前面已经分析过了                if ( config->isShardingEnabled() ) {                    errmsg = "already enabled";                    return false;                }                if ( ! okForConfigChanges( errmsg ) )//所有configserver起来后才能更改配置                    return false;                config->enableSharding();//真正的设置点                return true;            }
    void DBConfig::enableSharding( bool save ) {//默认save=true        if ( _shardingEnabled )            return;        、        scoped_lock lk( _lock );//设置shardingEnabled=true并将其保存到配置保存到config.databases中.        _shardingEnabled = true;        if( save ) _save();    }
继续来看看collection的分片设置,命令为:db.runCommand({shardcollection: fool.coll,key:{_id:1}}),这个命令的源码在mongo\s\commands_admin.cpp中,其中的出错处理代码太多,不得不删除,这里以文字的形式简单描述下:

1. collection名存在.

2. collection所在db是shardenable状态.

3. 给collection没有被shardenable.

4. shardkey必须是数字1

5. 所有configserver可达.

6. 不能sharding system类的collection

7 不能sharding capped collection

10gen注释的sharding key要求:

1. 除以_id开始的unique索引外的所有unique的索引的前缀必须包括sharding key.

2. collection不为空则必须存在至少一条以sharding key做前缀的索引,且索引不能是sparse,不能带null值,不能是多值索引(multikey index,这一条限制可能会被移除)

3. 如果sharding key被配置为unique,则必须存在对应的unique index,必须完全相等而不只是前缀问题.

            bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {                DBConfigPtr config = grid.getDBConfig( ns );                BSONObj proposedKey = cmdObj.getObjectField( "key" );                //the rest of the checks require a connection to the primary db                scoped_ptr<ScopedDbConnection> conn(                        ScopedDbConnection::getScopedDbConnection(                                        config->getPrimary().getConnString() ) );                //check that collection is not capped                BSONObj res = conn->get()->findOne( config->getName() + ".system.namespaces",                                                    BSON( "name" << ns ) );                // The proposed shard key must be validated against the set of existing indexes.                // In particular, we must ensure the following constraints                //                // 1. All existing unique indexes, except those which start with the _id index,                //    must contain the proposed key as a prefix (uniqueness of the _id index is                //    ensured by the _id generation process or guaranteed by the user).                //                // 2. If the collection is not empty, there must exist at least one index that                //    is "useful" for the proposed key.  A "useful" index is defined as follows                //    Useful Index:                //         i. contains proposedKey as a prefix                //         ii. is not sparse                //         iii. contains no null values                //         iv. is not multikey (maybe lift this restriction later)                //                // 3. If the proposed shard key is specified as unique, there must exist a useful,                //    unique index exactly equal to the proposedKey (not just a prefix).                //                // After validating these constraint:                //                // 4. If there is no useful index, and the collection is non-empty, we                //    must fail.                //                // 5. If the collection is empty, and it's still possible to create an index                //    on the proposed key, we go ahead and do so.                string indexNS = config->getName() + ".system.indexes";                // 1.  Verify consistency with existing unique indexes                BSONObj uniqueQuery = BSON( "ns" << ns << "unique" << true );                auto_ptr<DBClientCursor> uniqueQueryResult =                                conn->get()->query( indexNS , uniqueQuery );                while ( uniqueQueryResult->more() ) {                    BSONObj idx = uniqueQueryResult->next();                    BSONObj currentKey = idx["key"].embeddedObject();                    bool isCurrentID = str::equals( currentKey.firstElementFieldName() , "_id" );                    if( ! isCurrentID && ! proposedKey.isPrefixOf( currentKey) ) {                        errmsg = str::stream() << "can't shard collection '" << ns << "' "                                               << "with unique index on " << currentKey << " "                                               << "and proposed shard key " << proposedKey << ". "                                               << "Uniqueness can't be maintained unless "                                               << "shard key is a prefix";                        conn->done();                        return false;                    }                }                // 2. Check for a useful index                bool hasUsefulIndexForKey = false;                BSONObj allQuery = BSON( "ns" << ns );                auto_ptr<DBClientCursor> allQueryResult =                                conn->get()->query( indexNS , allQuery );                BSONArrayBuilder allIndexes;                while ( allQueryResult->more() ) {                    BSONObj idx = allQueryResult->next();                    allIndexes.append( idx );                    BSONObj currentKey = idx["key"].embeddedObject();                    // Check 2.i. and 2.ii.                    if ( ! idx["sparse"].trueValue() && proposedKey.isPrefixOf( currentKey ) ) {                        hasUsefulIndexForKey = true;                    }                }                // 3. If proposed key is required to be unique, additionally check for exact match.                bool careAboutUnique = cmdObj["unique"].trueValue();                if ( hasUsefulIndexForKey && careAboutUnique ) {                    BSONObj eqQuery = BSON( "ns" << ns << "key" << proposedKey );                    BSONObj eqQueryResult = conn->get()->findOne( indexNS, eqQuery );                    if ( eqQueryResult.isEmpty() ) {                        hasUsefulIndexForKey = false;  // if no exact match, index not useful,                                                       // but still possible to create one later                    }                    else {                        bool isExplicitlyUnique = eqQueryResult["unique"].trueValue();                        BSONObj currKey = eqQueryResult["key"].embeddedObject();                        bool isCurrentID = str::equals( currKey.firstElementFieldName() , "_id" );                        if ( ! isExplicitlyUnique && ! isCurrentID ) {                            errmsg = str::stream() << "can't shard collection " << ns << ", "                                                   << proposedKey << " index not unique, "                                                   << "and unique index explicitly specified";                            conn->done();                            return false;                        }                    }                }                if ( hasUsefulIndexForKey ) {                    // Check 2.iii and 2.iv. Make sure no null entries in the sharding index                    // and that there is a useful, non-multikey index available                    BSONObjBuilder cmd;                    cmd.append( "checkShardingIndex" , ns );                    cmd.append( "keyPattern" , proposedKey );                    BSONObj cmdObj = cmd.obj();                    if ( ! conn->get()->runCommand( "admin" , cmdObj , res ) ) {                        errmsg = res["errmsg"].str();                        conn->done();                        return false;                    }                }                // 4. if no useful index, and collection is non-empty, fail                else if ( conn->get()->count( ns ) != 0 ) {                    errmsg = str::stream() << "please create an index that starts with the "                                           << "shard key before sharding.";                    result.append( "proposedKey" , proposedKey );                    result.appendArray( "curIndexes" , allIndexes.done() );                    conn->done();                    return false;                }                // 5. If no useful index exists, and collection empty, create one on proposedKey.                //    Only need to call ensureIndex on primary shard, since indexes get copied to                //    receiving shard whenever a migrate occurs.                else {//对空的collection,建立索引                    // call ensureIndex with cache=false, see SERVER-1691                    bool ensureSuccess = conn->get()->ensureIndex( ns ,                                                                   proposedKey ,                                                                   careAboutUnique ,                                                                   "" ,                                                                   false );                }                conn->done();//真正的shardCollection动作.                config->shardCollection( ns , proposedKey , careAboutUnique );                return true;            }

run->shardCollection

    ChunkManagerPtr DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ,                                            vector<BSONObj>* initPoints, vector<Shard>* initShards ) {        ChunkManagerPtr manager;//每一个分片的collection由一个chunkManager管理,其负责定位每一个chunk的位置.        {                       //以及负责每一份数据该发往哪一个shard            scoped_lock lk( _lock );            CollectionInfo& ci = _collections[ns];            ChunkManager* cm = new ChunkManager( ns, fieldsAndOrder, unique );            cm->createFirstChunks( configServer.getPrimary().getConnString(),//这里第一次创建chunk可能会分片                                   getPrimary(), initPoints, initShards );//这里将collection的信息(包括分片信息)保存到chunks集合中.            ci.shard( cm );//从chunks collection中读出所有的信息,初始化chunkmanager以及chunk的范围.            _save();//前面已经分析过,将更改的配置保存到databases和collections集合中.            // Save the initial chunk manager for later, no need to reload if we're in this lock            manager = ci.getCM();        }        // Tell the primary mongod to refresh it's data        // TODO:  Think the real fix here is for mongos to just assume all collections sharded, when we get there        for( int i = 0; i < 4; i++ ){            {                ShardConnection conn( getPrimary(), ns );                conn.setVersion();//提醒shard端版本已更新,需重新加载配置                conn.done();                break;            }            sleepsecs( i );        }        return manager;    }
run->shardCollection->setVersion

bool setVersion() {    _finishInit();    return _setVersion;}void ShardConnection::_finishInit() {    if ( _finishedInit )        return;    _finishedInit = true;    if ( _ns.size() && versionManager.isVersionableCB( _conn ) ) {        // Make sure we specified a manager for the correct namespace        if( _manager ) verify( _manager->getns() == _ns );        _setVersion = versionManager.checkShardVersionCB( this , false , 1 );        }    else {        // Make sure we didn't specify a manager for an empty namespace        _setVersion = false;    }}bool VersionManager::checkShardVersionCB( ShardConnection* conn_in , bool authoritative , int tryNumber ) {    return checkShardVersion( conn_in->get(), conn_in->getNS(), conn_in->getManager(), authoritative, tryNumber );    }

run->shardCollection->setVersion->_finishInit->checkShardVersion

    bool checkShardVersion( DBClientBase * conn_in , const string& ns , ChunkManagerPtr refManager, bool authoritative , int tryNumber ) {        WriteBackListener::init( *conn_in );//初始化写回线程.比如说某些数据本来发往shard0,但是因为平衡数据被移动到其它shard了,所以得回收        DBConfigPtr conf = grid.getDBConfig( ns );//然后再次将数据发到正确的位置        DBClientBase* conn = getVersionable( conn_in );        unsigned long long officialSequenceNumber = 0;        ChunkManagerPtr manager;        const bool isSharded = conf->isSharded( ns );        if ( isSharded ) {            manager = conf->getChunkManagerIfExists( ns , authoritative );            // It's possible the chunk manager was reset since we checked whether sharded was true,            // so must check this here.            if( manager ) officialSequenceNumber = manager->getSequenceNumber();        }        // Check this manager against the reference manager        if( isSharded && manager ){            Shard shard = Shard::make( conn->getServerAddress() );            if( refManager && ! refManager->compatibleWith( manager, shard ) ){//比照版本,版本不对(如分片或者banlance发生)则抛出异常                throw SendStaleConfigException( ns, str::stream() << "manager (" << manager->getVersion( shard ).toString()  << " : " << manager->getSequenceNumber() << ") "                                                                      << "not compatible with reference manager (" << refManager->getVersion( shard ).toString()  << " : " << refManager->getSequenceNumber() << ") "                                                                      << "on shard " << shard.getName() << " (" << shard.getAddress().toString() << ")",                                                refManager->getVersion( shard ), manager->getVersion( shard ) );            }        }        else if( refManager ){            Shard shard = Shard::make( conn->getServerAddress() );            string msg( str::stream() << "not sharded ("                        << ( (manager.get() == 0) ? string( "<none>" ) :                                str::stream() << manager->getSequenceNumber() )                        << ") but has reference manager ("                        << refManager->getSequenceNumber() << ") "                        << "on conn " << conn->getServerAddress() << " ("                        << conn_in->getServerAddress() << ")" );            throw SendStaleConfigException( ns, msg,                    refManager->getVersion( shard ), ShardChunkVersion( 0, OID() ));        }        // has the ChunkManager been reloaded since the last time we updated the connection-level version?        // (ie., last time we issued the setShardVersions below)        unsigned long long sequenceNumber = connectionShardStatus.getSequence(conn,ns);        if ( sequenceNumber == officialSequenceNumber ) {            return false;        }        ShardChunkVersion version = ShardChunkVersion( 0, OID() );        if ( isSharded && manager ) {            version = manager->getVersion( Shard::make( conn->getServerAddress() ) );        }        BSONObj result;        if ( setShardVersion( *conn , ns , version , authoritative , result ) ) {//通知远端shard版本已经改变            // success!            connectionShardStatus.setSequence( conn , ns , officialSequenceNumber );            return true;        }//下面部分因为各种原因需要重复调用checkSHardVersion向远端通知版本已改变.        if ( result["need_authoritative"].trueValue() )            massert( 10428 ,  "need_authoritative set but in authoritative mode already" , ! authoritative );        if ( ! authoritative ) {            checkShardVersion( conn , ns , refManager, 1 , tryNumber + 1 );            return true;        }        if ( result["reloadConfig"].trueValue() ) {            if( result["version"].timestampTime() == 0 ){                conf->reload();            }            else {                // reload config                conf->getChunkManager( ns , true );            }        }        const int maxNumTries = 7;        if ( tryNumber < maxNumTries ) {            sleepmillis( 10 * tryNumber );            checkShardVersion( conn , ns , refManager, true , tryNumber + 1 );            return true;        }        massert( 10429 , errmsg , 0 );        return true;    }

run->shardCollection->setVersion->_finishInit->checkShardVersion->setShardVersion

    bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ) {        BSONObjBuilder cmdBuilder;//向远端mongod服务发送命令告诉其shardversion已经改变,通知其重新加载配置        cmdBuilder.append( "setShardVersion" , ns.c_str() );        cmdBuilder.append( "configdb" , configServer.modelServer() );        version.addToBSON( cmdBuilder );        cmdBuilder.appendOID( "serverID" , &serverID );        if ( authoritative )            cmdBuilder.appendBool( "authoritative" , 1 );        Shard s = Shard::make( conn.getServerAddress() );        cmdBuilder.append( "shard" , s.getName() );        cmdBuilder.append( "shardHost" , s.getConnString() );        BSONObj cmd = cmdBuilder.obj();//这里不再深入下去,具体流程为各种检查,最后因为shardversion改变,远端shard重新从configserver中        return conn.runCommand( "admin",cmd,result,0,//加载相应的chunkmanager,这里的改变指chunk被搬迁了,才会发生重新加载chunkmanager的情况                                &AuthenticationTable::getInternalSecurityAuthenticationTable() );    }
        到这里分片系统服务器的添加,数据库的分片设置,collection的分片设置分析完毕,这部分是后面分析部分的基础,将这里弄清楚了后面的查询修改分片迁移就都简单了.


原文链接:mongodb源码分析(二十)mongos分片的配置

作者: yhjj0108,杨浩.










原创粉丝点击