pg源码阅读五

来源：互联网发布：苹果手机的mac地址在哪编辑：程序博客网时间：2024/04/28 07:52

在pg启动的时候，会初始化共享内存，下面看下过程
在postmaster中使用下面的调用进入初始化过程
/*
* Set up shared memory and semaphores.
*/
reset_shared(PostPortNumber);
传入的是端口号，每次启动的时候，都是根据这个端口号来分配相同的IPC key，
CreateSharedMemoryAndSemaphores(false, port);
这个函数初始化共享内存和信号量
第一个参数是判断创建共享内存还是私有内存
size = 100000;
size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
sizeof(ShmemIndexEnt)));
size = add_size(size, BufferShmemSize());
size = add_size(size, LockShmemSize());
size = add_size(size, ProcGlobalShmemSize());
size = add_size(size, XLOGShmemSize());
size = add_size(size, CLOGShmemSize());
size = add_size(size, SUBTRANSShmemSize());
size = add_size(size, TwoPhaseShmemSize());
size = add_size(size, MultiXactShmemSize());
size = add_size(size, LWLockShmemSize());
size = add_size(size, ProcArrayShmemSize());
size = add_size(size, BackendStatusShmemSize());
size = add_size(size, SInvalShmemSize());
size = add_size(size, PMSignalShmemSize());
size = add_size(size, BgWriterShmemSize());
size = add_size(size, AutoVacuumShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());

ifdef EXEC_BACKEND

    size = add_size(size, ShmemBackendArraySize());

endif

开始初始化100k的大小，然后把各个部分需要的内存都加上
比如buffer size 的大小，有data page,buffer描述符，hash表
/*
* BufferShmemSize
*
* compute the size of shared memory for the buffer pool including
* data pages, buffer descriptors, hash tables, etc.
*/
Size
BufferShmemSize(void)
{
Size size = 0;

/* size of buffer descriptors */size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));/* size of data pages */size = add_size(size, mul_size(NBuffers, BLCKSZ));/* size of stuff controlled by freelist.c */size = add_size(size, StrategyShmemSize());return size;

}

define BLCKSZ 8192默认8k

接着是创建共享内存段，初始化访问
/*
* Create the shmem segment
*/
seghdr = PGSharedMemoryCreate(size, makePrivate, port);

    InitShmemAccess(seghdr);

在pgsharedmemorycreate中使用
/* Try to create new segment */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);来创建新的共享内存段
* Attempt to create a new shared memory segment with the specified key.
* Will fail (return NULL) if such a segment already exists. If successful,
* attach the segment to the current process and return its attached address.
* On success, callbacks are registered with on_shmem_exit to detach and
* delete the segment when on_shmem_exit is called.
*
* If we fail with a failure code other than collision-with-existing-segment,
* print out an error and abort. Other types of errors are not recoverable.
*/
static void *
InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
{
IpcMemoryId shmid;
void *memAddress;

shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);if (shmid < 0){    int         shmget_errno = errno;    /*     * Fail quietly if error indicates a collision with existing segment.     * One would expect EEXIST, given that we said IPC_EXCL, but perhaps     * we could get a permission violation instead?  Also, EIDRM might     * occur if an old seg is slated for destruction but not gone yet.     */    if (shmget_errno == EEXIST || shmget_errno == EACCES

ifdef EIDRM

        || shmget_errno == EIDRM

endif

        )        return NULL;    /*     * Some BSD-derived kernels are known to return EINVAL, not EEXIST,     * if there is an existing segment but it's smaller than "size"     * (this is a result of poorly-thought-out ordering of error tests).     * To distinguish between collision and invalid size in such cases,     * we make a second try with size = 0.  These kernels do not test     * size against SHMMIN in the preexisting-segment case, so we will     * not get EINVAL a second time if there is such a segment.     */    if (shmget_errno == EINVAL)    {        shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);        if (shmid < 0)        {            /* As above, fail quietly if we verify a collision */            if (errno == EEXIST || errno == EACCES

ifdef EIDRM

                || errno == EIDRM

endif

                )                return NULL;            /* Otherwise, fall through to report the original error */        }        else        {            /*             * On most platforms we cannot get here because SHMMIN is             * greater than zero.  However, if we do succeed in creating             * a zero-size segment, free it and then fall through to             * report the original error.             */            if (shmctl(shmid, IPC_RMID, NULL) < 0)                elog(LOG, "shmctl(%d, %d, 0) failed: %m",                     (int) shmid, IPC_RMID);        }    }    /*     * Else complain and abort     */    errno = shmget_errno;    ereport(FATAL,            (errmsg("could not create shared memory segment: %m"),      errdetail("Failed system call was shmget(key=%lu, size=%lu, 0%o).",                (unsigned long) memKey, (unsigned long) size,                IPC_CREAT | IPC_EXCL | IPCProtection),             (shmget_errno == EINVAL) ?             errhint("This error usually means that PostgreSQL's request for a shared memory "      "segment exceeded your kernel's SHMMAX parameter.  You can either "                     "reduce the request size or reconfigure the kernel with larger SHMMAX.  "              "To reduce the request size (currently %lu bytes), reduce "           "PostgreSQL's shared_buffers parameter (currently %d) and/or "                     "its max_connections parameter (currently %d).\n"                     "If the request size is already small, it's possible that it is less than "                     "your kernel's SHMMIN parameter, in which case raising the request size or "                     "reconfiguring SHMMIN is called for.\n"    "The PostgreSQL documentation contains more information about shared "                     "memory configuration.",                     (unsigned long) size, NBuffers, MaxBackends) : 0,             (shmget_errno == ENOMEM) ?             errhint("This error usually means that PostgreSQL's request for a shared "               "memory segment exceeded available memory or swap space. "              "To reduce the request size (currently %lu bytes), reduce "           "PostgreSQL's shared_buffers parameter (currently %d) and/or "                     "its max_connections parameter (currently %d).\n"    "The PostgreSQL documentation contains more information about shared "                     "memory configuration.",                     (unsigned long) size, NBuffers, MaxBackends) : 0,             (shmget_errno == ENOSPC) ?             errhint("This error does *not* mean that you have run out of disk space. "                     "It occurs either if all available shared memory IDs have been taken, "                     "in which case you need to raise the SHMMNI parameter in your kernel, "      "or because the system's overall limit for shared memory has been "             "reached.  If you cannot increase the shared memory limit, "      "reduce PostgreSQL's shared memory request (currently %lu bytes), "        "by reducing its shared_buffers parameter (currently %d) and/or "                     "its max_connections parameter (currently %d).\n"    "The PostgreSQL documentation contains more information about shared "                     "memory configuration.",                     (unsigned long) size, NBuffers, MaxBackends) : 0));}/* Register on-exit routine to delete the new segment */on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));/* OK, should be able to attach to the segment */memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS);if (memAddress == (void *) -1)    elog(FATAL, "shmat(id=%d) failed: %m", shmid);/* Register on-exit routine to detach new segment before deleting */on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));/* Record key and ID in lockfile for data directory. */RecordSharedMemoryInLockFile((unsigned long) memKey,                             (unsigned long) shmid);return memAddress;

在初始化了内存段后，创建信号量
/*
* Create semaphores
*/
numSemas = ProcGlobalSemas();每个后台进程和辅助进程都需要一个信号量
numSemas += SpinlockSemas();spinlocks需要的信号量
PGReserveSemaphores(numSemas, port);
创建了信号量后，设置共享内存分配机制，共享内存的分配主要是通过pin，lock来实现的，所以主要就是pin lock的初始化过程
/*
* Set up shared memory allocation mechanism
*/
if (!IsUnderPostmaster)
InitShmemAllocation();

主要干了SpinLockInit(ShmemLock);设置pin为空，接下来创建轻量及锁
CreateLWLocks();初始化各个部分内存
在初始化buffer pool的时候
InitBufferPool(void)
{
bool foundBufs,
foundDescs;

BufferDescriptors = (BufferDesc *)    ShmemInitStruct("Buffer Descriptors",                    NBuffers * sizeof(BufferDesc), &foundDescs);BufferBlocks = (char *)    ShmemInitStruct("Buffer Blocks",                    NBuffers * (Size) BLCKSZ, &foundBufs);if (foundDescs || foundBufs){    /* both should be present or neither */    Assert(foundDescs && foundBufs);    /* note: this path is only taken in EXEC_BACKEND case */}else{    BufferDesc *buf;    int         i;    buf = BufferDescriptors;    /*     * Initialize all the buffer headers.     */    for (i = 0; i < NBuffers; buf++, i++)    {        CLEAR_BUFFERTAG(buf->tag);        buf->flags = 0;        buf->usage_count = 0;        buf->refcount = 0;        buf->wait_backend_pid = 0;        SpinLockInit(&buf->buf_hdr_lock);        buf->buf_id = i;        /*         * Initially link all the buffers together as unused. Subsequent         * management of this list is done by freelist.c.         */        buf->freeNext = i + 1;        buf->io_in_progress_lock = LWLockAssign();        buf->content_lock = LWLockAssign();    }    /* Correct last entry of linked list */    BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;}/* Init other shared buffer-management stuff */StrategyInitialize(!foundDescs);

}

buffer 描述符基本就是代表了共享块的内容，串联起来就是共享池了
* BufferDesc – shared descriptor/state data for a single shared buffer.
*
* Note: buf_hdr_lock must be held to examine or change the tag, flags,
* usage_count, refcount, or wait_backend_pid fields. buf_id field never
* changes after initialization, so does not need locking. freeNext is
* protected by the BufFreelistLock not buf_hdr_lock. The LWLocks can take
* care of themselves. The buf_hdr_lock is not used to control access to
* the data in the buffer!
*
* An exception is that if we have the buffer pinned, its tag can’t change
* underneath us, so we can examine the tag without locking the spinlock.
* Also, in places we do one-time reads of the flags without bothering to
* lock the spinlock; this is generally for situations where we don’t expect
* the flag bit being tested to be changing.
*
* We can’t physically remove items from a disk page if another backend has
* the buffer pinned. Hence, a backend may need to wait for all other pins
* to go away. This is signaled by storing its own PID into
* wait_backend_pid and setting flag bit BM_PIN_COUNT_WAITER. At present,
* there can be only one such waiter per buffer.
*
* We use this same struct for local buffer headers, but the lock fields
* are not used and not all of the flag bits are useful either.
*/
typedef struct sbufdesc
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
uint16 usage_count; /* usage counter for clock sweep code */
unsigned refcount; /* # of backends holding pins on buffer */
int wait_backend_pid; /* backend PID of pin-count waiter */

slock_t     buf_hdr_lock;   /* protects the above fields */int         buf_id;         /* buffer's index number (from 0) */int         freeNext;       /* link in freelist chain */LWLockId    io_in_progress_lock;    /* to wait for I/O to complete */LWLockId    content_lock;   /* to lock access to buffer contents */

} BufferDesc;
使用StrategyInitialize(!foundDescs);来初始化hash表方便查找内存块，freelist等

0 0