Linux用户空间线程管理介绍之二:创建线程堆栈
来源:互联网 发布:工业以太网知乎 编辑:程序博客网 时间:2024/06/06 00:36
转自:http://www.longene.org/forum/viewtopic.php?f=17&t=429&sid=babec6ba82dd65e29c5fafe03e4d89c0
前面已经介绍过了线程结构pthread,下面就需要来看看在创建线程过程中,如何生成这个结构的。Allocate_stack函数位于nptl/allocatestack.c中:
- 代码: 全选
308 static int
309 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
310 ALLOCATE_STACK_PARMS)
311 {
312 struct pthread *pd;
313 size_t size;
314 size_t pagesize_m1 = __getpagesize () - 1;
315 void *stacktop;
316
317 assert (attr != NULL);
318 assert (powerof2 (pagesize_m1 + 1));
319 assert (TCB_ALIGNMENT >= STACK_ALIGN);
320
321 /* Get the stack size from the attribute if it is set. Otherwise we
322 use the default we determined at start time. */
323 size = attr->stacksize ?: __default_stacksize;
324
325 /* Get memory for the stack. */
326 if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
327 {
.........
410 }
411 else
412 {
用户程序在调用pthread_create时,可以传进一个参数pthread_attr,这个参数可以指定堆栈地址、大小等参数,323行的意思就是说在指定堆栈大小的情况下,采用指定大小,否则采用默认大小。__default_stacksize可以有ulimit -s查看,在一般系统中,这个值为8M。在通常情况下,应用程序是不指定堆栈大小的。
326行开始,分两种情况处理堆栈地址是否由pthread_attr中指定,在通常情况下,这个地址也是不指定的,因此,直接看412行开始的else部分:
- 代码: 全选
412 {
413 /* Allocate some anonymous memory. If possible use the cache. */
414 size_t guardsize;
415 size_t reqsize;
416 void *mem;
417 const int prot = (PROT_READ | PROT_WRITE
418 | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
419
420 #if COLORING_INCREMENT != 0
421 /* Add one more page for stack coloring. Don't do it for stacks
422 with 16 times pagesize or larger. This might just cause
423 unnecessary misalignment. */
424 if (size <= 16 * pagesize_m1)
425 size += pagesize_m1 + 1;
426 #endif
427
428 /* Adjust the stack size for alignment. */
429 size &= ~__static_tls_align_m1;
430 assert (size != 0);
431
432 /* Make sure the size of the stack is enough for the guard and
433 eventually the thread descriptor. */
434 guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
435 if (__builtin_expect (size < ((guardsize + __static_tls_size
436 + MINIMAL_REST_STACK + pagesize_m1)
437 & ~pagesize_m1),
438 0))
439 /* The stack is too small (or the guard too large). */
440 return EINVAL;
441
442 /* Try to get a stack from the cache. */
443 reqsize = size;
444 pd = get_cached_stack (&size, &mem);
445 if (pd == NULL)
446 {
447 /* To avoid aliasing effects on a larger scale than pages we
448 adjust the allocated stack size if necessary. This way
449 allocations directly following each other will not have
450 aliasing problems. */
451 #if MULTI_PAGE_ALIASING != 0
452 if ((size % MULTI_PAGE_ALIASING) == 0)
453 size += pagesize_m1 + 1;
454 #endif
455
456 mem = mmap (NULL, size, prot,
457 MAP_PRIVATE | MAP_ANONYMOUS | ARCH_MAP_FLAGS, -1, 0);
458
459 if (__builtin_expect (mem == MAP_FAILED, 0))
460 {
461 #ifdef ARCH_RETRY_MMAP
462 mem = ARCH_RETRY_MMAP (size);
463 if (__builtin_expect (mem == MAP_FAILED, 0))
464 #endif
465 return errno;
466 }
467
468 /* SIZE is guaranteed to be greater than zero.
469 So we can never get a null pointer back from mmap. */
470 assert (mem != NULL);
471
472 #if COLORING_INCREMENT != 0
473 /* Atomically increment NCREATED. */
474 unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
475
476 /* We chose the offset for coloring by incrementing it for
477 every new thread by a fixed amount. The offset used
478 module the page size. Even if coloring would be better
479 relative to higher alignment values it makes no sense to
480 do it since the mmap() interface does not allow us to
481 specify any alignment for the returned memory block. */
482 size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
483
484 /* Make sure the coloring offsets does not disturb the alignment
485 of the TCB and static TLS block. */
486 if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
487 coloring = (((coloring + __static_tls_align_m1)
488 & ~(__static_tls_align_m1))
489 & ~pagesize_m1);
490 #else
491 /* Unless specified we do not make any adjustments. */
492 # define coloring 0
493 #endif
417行是设定堆栈段的权限,在某些情况下,堆栈段内可有存放一些临时的代码,这样就需要有可执行权限,一般情况下,是可读写的权限。
设定完堆栈段的权限后,就开始处理堆栈段的大小,主要是一些堆栈大小、对齐的检查,还有Guard页的检查。
在进行堆栈的映射之前,还需要通过get_cached_stack函数,检查系统中是否存在缓冲着的堆栈,在我们的情景中,我们假定是第一次创建线程,就不存在缓冲的堆栈,这个函数留待后面介绍。
这些检查都完成后,就需要通过mmap来映射堆栈了。这是一个匿名映射,相当于在用户空间直接分配内存,有点像brk系统调用,用来分配大块的内存。
后面一段是对coloring的设定,暂时认为这段代码没有起作用吧。
这些完成后,就开始ptherad结构的设定了:
- 代码: 全选
494
495 /* Place the thread descriptor at the end of the stack. */
496 #if TLS_TCB_AT_TP
497 pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
498 #elif TLS_DTV_AT_TP
499 pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
500 - __static_tls_size)
501 & ~__static_tls_align_m1)
502 - TLS_PRE_TCB_SIZE);
503 #endif
504
505 /* Remember the stack-related values. */
506 pd->stackblock = mem;
507 pd->stackblock_size = size;
508
509 /* We allocated the first block thread-specific data array.
510 This address will not change for the lifetime of this
511 descriptor. */
512 pd->specific[0] = pd->specific_1stblock;
513
514 /* This is at least the second thread. */
515 pd->header.multiple_threads = 1;
516 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
517 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
518 #endif
519
520 #ifndef __ASSUME_PRIVATE_FUTEX
521 /* The thread must know when private futexes are supported. */
522 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
523 header.private_futex);
524 #endif
525
526 #ifdef NEED_DL_SYSINFO
527 /* Copy the sysinfo value from the parent. */
528 THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
529 #endif
530
531 /* The process ID is also the same as that of the caller. */
532 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
533
534 /* Allocate the DTV for this thread. */
535 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
536 {
537 /* Something went wrong. */
538 assert (errno == ENOMEM);
539
540 /* Free the stack memory we just allocated. */
541 (void) munmap (mem, size);
542
543 return EAGAIN;
544 }
545
546
547 /* Prepare to modify global data. */
548 lll_lock (stack_cache_lock, LLL_PRIVATE);
549
550 /* And add to the list of stacks in use. */
551 list_add (&pd->list, &stack_used);
552
553 lll_unlock (stack_cache_lock, LLL_PRIVATE);
554
555
556 /* There might have been a race. Another thread might have
557 caused the stacks to get exec permission while this new
558 stack was prepared. Detect if this was possible and
559 change the permission if necessary. */
560 if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
561 && (prot & PROT_EXEC) == 0, 0))
562 {
563 int err = change_stack_perm (pd
564 #ifdef NEED_SEPARATE_REGISTER_STACK
565 , ~pagesize_m1
566 #endif
567 );
568 if (err != 0)
569 {
570 /* Free the stack memory we just allocated. */
571 (void) munmap (mem, size);
572
573 return err;
574 }
575 }
576
577
578 /* Note that all of the stack and the thread descriptor is
579 zeroed. This means we do not have to initialize fields
580 with initial value zero. This is specifically true for
581 the 'tid' field which is always set back to zero once the
582 stack is not used anymore and for the 'guardsize' field
583 which will be read next. */
584 }
前面说到过,在我所观察的系统中,TLS_TCB_AT_TP总是被定义,这意味着pthread位于刚才申请的堆栈的顶端,见497行,这里的-1,是减去一个pthread结构的大小。
505~532行,开始设置新线程的pthread结构,堆栈信息设置为刚刚申请的堆栈,并将pthread结构设置成为多线程状态,futex、sysinfo、pid等则从父线程继承。
接下来是调用_dl_allocate_tls来设置TLS,这是一个相当重要的过程,如果不能设置TLS,程序很有可能无法运行,目前兼容内核中多线程问题很多时候与此相关。
先看宏TLS_TPADJ,它就定义在nptl/allocatestack.c中
#define TLS_TPADJ (pd) (pd)
也就是pthread结构本身。再看_dl_allocate_tls(),位于elf/dl-tls.c中:
- 代码: 全选
459 void *
460 internal_function
461 _dl_allocate_tls (void *mem)
462 {
463 return _dl_allocate_tls_init (mem == NULL
464 ? _dl_allocate_tls_storage ()
465 : allocate_dtv (mem));
466 }
在我们这个情景中,传进去的mem值为pthread结构地址,不为NULL,因此调用到了allocate_dtv函数,也是位于elf/dl-tls.c中:
289 static void *
290 internal_function
291 allocate_dtv (void *result)
292 {
293 dtv_t *dtv;
294 size_t dtv_length;
295
296 /* We allocate a few more elements in the dtv than are needed for the
297 initial set of modules. This should avoid in most cases expansions
298 of the dtv. */
299 dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
300 dtv = calloc (dtv_length + 2, sizeof (dtv_t));
301 if (dtv != NULL)
302 {
303 /* This is the initial length of the dtv. */
304 dtv[0].counter = dtv_length;
305
306 /* The rest of the dtv (including the generation counter) is
307 Initialize with zero to indicate nothing there. */
308
309 /* Add the dtv to the thread data structures. */
310 INSTALL_DTV (result, dtv);
311 }
312 else
313 result = NULL;
314
315 return result;
316 }
这个函数比较简单,就是申请一个dtv的数组,然后装载到pthread结构中,有趣的是INSTALL_DTV这个宏,定义在nptl/sysdep/i386/tls.h中:
- 代码: 全选
# define INSTALL_DTV(descr, dtvp) \
((tcbhead_t *) (descr))->dtv = (dtvp) + 1
Allocate_dtv完成后,需要调用_dl_allocate_tls_init对TLS进行初始化:
- 代码: 全选
377 void *
378 internal_function
379 _dl_allocate_tls_init (void *result)
380 {
381 if (result == NULL)
382 /* The memory allocation failed. */
383 return NULL;
384
385 dtv_t *dtv = GET_DTV (result);
386 struct dtv_slotinfo_list *listp;
387 size_t total = 0;
388 size_t maxgen = 0;
389
390 /* We have to prepare the dtv for all currently loaded modules using
391 TLS. For those which are dynamically loaded we add the values
392 indicating deferred allocation. */
393 listp = GL(dl_tls_dtv_slotinfo_list);
394 while (1)
395 {
396 size_t cnt;
397
398 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
399 {
400 struct link_map *map;
401 void *dest;
402
403 /* Check for the total number of used slots. */
404 if (total + cnt > GL(dl_tls_max_dtv_idx))
405 break;
406
407 map = listp->slotinfo[cnt].map;
408 if (map == NULL)
409 /* Unused entry. */
410 continue;
411
412 /* Keep track of the maximum generation number. This might
413 not be the generation counter. */
414 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
415
416 if (map->l_tls_offset == NO_TLS_OFFSET)
417 {
418 /* For dynamically loaded modules we simply store
419 the value indicating deferred allocation. */
420 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
421 dtv[map->l_tls_modid].pointer.is_static = false;
422 continue;
423 }
424
425 assert (map->l_tls_modid == cnt);
426 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
427 #if TLS_TCB_AT_TP
428 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
429 dest = (char *) result - map->l_tls_offset;
430 #elif TLS_DTV_AT_TP
431 dest = (char *) result + map->l_tls_offset;
432 #else
433 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
434 #endif
435
436 /* Copy the initialization image and clear the BSS part. */
437 dtv[map->l_tls_modid].pointer.val = dest;
438 dtv[map->l_tls_modid].pointer.is_static = true;
439 memset (__mempcpy (dest, map->l_tls_initimage,
440 map->l_tls_initimage_size), '\0',
441 map->l_tls_blocksize - map->l_tls_initimage_size);
442 }
443
444 total += cnt;
445 if (total >= GL(dl_tls_max_dtv_idx))
446 break;
447
448 listp = listp->next;
449 assert (listp != NULL);
450 }
451
452 /* The DTV version is up-to-date now. */
453 dtv[0].counter = maxgen;
454
455 return result;
456 }
这里是一大堆和连接有关的代码,这里就不做解释了,如果以后有时间,或许可以多看看连接相关的代码,梳理一下,连接过程到底是如何完成的。
回到allocate_stack函数中,551行是将此结构连接到stack_used队列中。当进程退出时,将调用到_deallocate_stack,此时,此结构将从stack_used队列脱出,加入到stack_cached中,等待下一个pthread_create调用。
接下来一段和可执行堆栈相关,不是我们所关心的,忽略。
- 代码: 全选
585
586 /* Create or resize the guard area if necessary. */
587 if (__builtin_expect (guardsize > pd->guardsize, 0))
588 {
589 #ifdef NEED_SEPARATE_REGISTER_STACK
590 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
591 #elif _STACK_GROWS_DOWN
592 char *guard = mem;
593 # elif _STACK_GROWS_UP
594 char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
595 #endif
596 if (mprotect (guard, guardsize, PROT_NONE) != 0)
597 {
598 int err;
599 mprot_error:
600 err = errno;
601
602 lll_lock (stack_cache_lock, LLL_PRIVATE);
603
604 /* Remove the thread from the list. */
605 list_del (&pd->list);
606
607 lll_unlock (stack_cache_lock, LLL_PRIVATE);
608
609 /* Get rid of the TLS block we allocated. */
610 _dl_deallocate_tls (TLS_TPADJ (pd), false);
611
612 /* Free the stack memory regardless of whether the size
613 of the cache is over the limit or not. If this piece
614 of memory caused problems we better do not use it
615 anymore. Uh, and we ignore possible errors. There
616 is nothing we could do. */
617 (void) munmap (mem, size);
618
619 return err;
620 }
621
622 pd->guardsize = guardsize;
623 }
624 else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
625 0))
626 {
627 /* The old guard area is too large. */
628
629 #ifdef NEED_SEPARATE_REGISTER_STACK
630 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
631 char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
632
633 if (oldguard < guard
634 && mprotect (oldguard, guard - oldguard, prot) != 0)
635 goto mprot_error;
636
637 if (mprotect (guard + guardsize,
638 oldguard + pd->guardsize - guard - guardsize,
639 prot) != 0)
640 goto mprot_error;
641 #elif _STACK_GROWS_DOWN
642 if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
643 prot) != 0)
644 goto mprot_error;
645 #elif _STACK_GROWS_UP
646 if (mprotect ((char *) pd - pd->guardsize,
647 pd->guardsize - guardsize, prot) != 0)
648 goto mprot_error;
649 #endif
650
651 pd->guardsize = guardsize;
652 }
上面这么一大段,是为了设置Guard页,总体说来就是把刚才申请到的内存最低几页,设置成为PROT_NONE,使着一页无法访问。
再下面就是锁、mutex等一些同步用的字段设置。这样在新线程创建出来之前,pthread结构的设置工作就基本完成了。
0 0
- Linux用户空间线程管理介绍之二:创建线程堆栈
- Linux用户空间线程管理介绍之一
- 管理线程之创建线程
- 管理线程之创建线程
- C++ Linux 多线程之创建、管理线程
- 线程管理(二)创建线程池管理线程
- 漫谈兼容内核之二十:Windows线程的系统空间堆栈
- 漫谈兼容内核之二十:Windows线程的系统空间堆栈
- Linux之线程管理
- 线程管理之线程创建和运行
- Linux内核进程 线程 用户空间线程进程
- pthread之线程堆栈
- pthread之线程堆栈
- pthread之线程堆栈
- pthread之线程堆栈
- linux 用户空间电源管理 (二)
- Linux编程之线程介绍
- linux创建线程之vfork
- CentOS获取软件安装包源码
- HDOJ-1869六度分离(FLOYD)
- zookeeper安装
- 《中国科技纵横》杂志
- python数据类型总结
- Linux用户空间线程管理介绍之二:创建线程堆栈
- logfile
- linux环境下的多人聊天程序设计
- 聚类算法
- [UOJ 111][APIO 2015]Jakarta Skyscrapers(Dijkstra+pbds堆暴力)
- 几种判断素数的办法。
- 策略模式
- 用JavaScript实现模块切换
- sql 基本语句