linux SysV IPC shm共享内存实现
来源:互联网 发布:tomcat的默认端口 编辑:程序博客网 时间:2024/06/08 19:59
共享内存可以使多个进程共享某段内存,由于不需要进程间数据复制,所以是速度最快的IPC。
多个进程访问共享内存时需要同步机制,如进程A往共享内存中写数据时,进程B不能使用共享内存;通常采用信号量同步多进程访问共享内存。
共享内存实现主要有以下几点:
1.分配物理内存
2.将物理内存映射到进程的地址空间;通过修改进程的页表,可以虚拟地址直接访问物理内存
3.进程不再使用共享内存时,取消物理内存在进程地址空间的映射
tmpfs文件系统将所有文件存储在内存(而非硬盘等介质)中;tmpfs将所有的东西存放在内核缓存中,可以根据文件系统中所容纳的文件自动增长和收缩,也可以将不使用的页swap出去。
linux共享内存的实现基于tmpfs文件系统及mmap文件映射;通过在tmpfs中创建文件来获取物理内存,将文件映射到进程地址空间后可以使用虚拟地址访问共享内存。
I.数据结构
include/linux/shm.h
86 struct shmid_kernel /* private to the kernel */ 87 { 88 struct kern_ipc_perm shm_perm; /* operation perms */ 89 struct file * shm_file; /* tmpfs file */ 90 unsigned long shm_nattch; /* no. of current attaches */ 91 unsigned long shm_segsz; /* size of segment (bytes) */ 92 time_t shm_atim; /* last attach time */ 93 time_t shm_dtim; /* last detach time */ 94 time_t shm_ctim; /* last change time */ 95 pid_t shm_cprid; /* pid of creator */ 96 pid_t shm_lprid; /* pid of last operator */ 97 struct user_struct *mlock_user; 98 };
shmid_kernel用于存放共享内存信息
注:
shm_file存放tmpfs中创建的内存文件,用于分配物理内存;用tmpfs的文件映射功能直接将共享内存映射到进程地址空间
ipc/shm.c
48 struct shm_file_data { 49 int id; 50 struct ipc_namespace *ns; 51 struct file *file; 52 const struct vm_operations_struct *vm_ops; 53 };
shm_file_data主要用于保存文件(tmpfs文件)内存映射的虚拟内存操作集vm_ops,进而扩展vm_ops,使某进程已经调用IPC_RMID,其它所有进程detach后能正常释放共享内存IPC资源
注:
共享内存主要涉及两种文件,tmpfs文件与shm文件;一个共享内存对应一个tmpfs文件,有多少个进程attach到共享内存就有多少个shm文件。
为什么要在tmpfs文件上层再加shm文件呢?直接将tmpfs文件映射到多个进程地址空间不就能实现内存共享了吗?
的确,可以将tmpfs文件映射到多个进程地址空间,并能实现内存共享。但是有一种特殊情况,当多个进程attach到共享内存,此时某个进程删除共享内存,为了保证其他进程能继续正常使用共享内存,则暂不能删除共享内存的IPC资源;而所有的进程detach后,tmpfs文件munmap又不能删除IPC资源。
所以在tmpfs文件上层添加shm文件,用于扩展tmpfs文件映射的vm_ops,来实现所有进程detach后删除共享内存的IPC资源。
详细代码参见:do_shm_rmid、shm_close
II.共享内存创建
326 /** 327 * newseg - Create a new shared memory segment 328 * @ns: namespace 329 * @params: ptr to the structure that contains key, size and shmflg 330 * 331 * Called with shm_ids.rw_mutex held as a writer. 332 */ 333 334 static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 335 { 336 key_t key = params->key; 337 int shmflg = params->flg; 338 size_t size = params->u.size; 339 int error; 340 struct shmid_kernel *shp; 341 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; 342 struct file * file; 343 char name[13]; 344 int id; 345 int acctflag = 0; 346 347 if (size < SHMMIN || size > ns->shm_ctlmax) 348 return -EINVAL; 349 350 if (ns->shm_tot + numpages > ns->shm_ctlall) 351 return -ENOSPC; 352 353 shp = ipc_rcu_alloc(sizeof(*shp)); 354 if (!shp) 355 return -ENOMEM; 356 357 shp->shm_perm.key = key; 358 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 359 shp->mlock_user = NULL; 360 361 shp->shm_perm.security = NULL; 362 error = security_shm_alloc(shp); 363 if (error) { 364 ipc_rcu_putref(shp); 365 return error; 366 } 367 368 sprintf (name, "SYSV%08x", key); 369 if (shmflg & SHM_HUGETLB) { 370 /* hugetlb_file_setup applies strict accounting */ 371 if (shmflg & SHM_NORESERVE) 372 acctflag = VM_NORESERVE; 373 file = hugetlb_file_setup(name, size, acctflag, 374 &shp->mlock_user, HUGETLB_SHMFS_INODE); 375 } else { 376 /* 377 * Do not allow no accounting for OVERCOMMIT_NEVER, even 378 * if it's asked for. 379 */ 380 if ((shmflg & SHM_NORESERVE) && 381 sysctl_overcommit_memory != OVERCOMMIT_NEVER) 382 acctflag = VM_NORESERVE; 383 file = shmem_file_setup(name, size, acctflag); 384 } 385 error = PTR_ERR(file); 386 if (IS_ERR(file)) 387 goto no_file; 388 389 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 390 if (id < 0) { 391 error = id; 392 goto no_id; 393 } 394 395 shp->shm_cprid = task_tgid_vnr(current); 396 shp->shm_lprid = 0; 397 shp->shm_atim = shp->shm_dtim = 0; 398 shp->shm_ctim = get_seconds(); 399 shp->shm_segsz = size; 400 shp->shm_nattch = 0; 401 shp->shm_file = file; 402 /* 403 * shmid gets reported as "inode#" in /proc/pid/maps. 404 * proc-ps tools use this. Changing this will break them. 405 */ 406 file->f_dentry->d_inode->i_ino = shp->shm_perm.id; 407 408 ns->shm_tot += numpages; 409 error = shp->shm_perm.id; 410 shm_unlock(shp); 411 return error; 412 413 no_id: 414 if (is_file_hugepages(file) && shp->mlock_user) 415 user_shm_unlock(size, shp->mlock_user); 416 fput(file); 417 no_file: 418 security_shm_free(shp); 419 ipc_rcu_putref(shp); 420 return error; 421 }
1.参数及共享内存系统限制检查
2.分配共享内存管理结构shmid_kernel
3.在tmpfs中创建共享内存文件,以获取物理内存
4.将shmid_kernel添加到共享内存基数树中,并获得基数树id
5.初始化shmid_kernel结构
6.返回共享内存IPC id
III.共享内存映射到进程地址空间
i.do_shmat
806 /* 807 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 808 * 809 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 810 * "raddr" thing points to kernel space, and there has to be a wrapper around 811 * this. 812 */ 813 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) 814 { 815 struct shmid_kernel *shp; 816 unsigned long addr; 817 unsigned long size; 818 struct file * file; 819 int err; 820 unsigned long flags; 821 unsigned long prot; 822 int acc_mode; 823 unsigned long user_addr; 824 struct ipc_namespace *ns; 825 struct shm_file_data *sfd; 826 struct path path; 827 fmode_t f_mode; 828 829 err = -EINVAL; 830 if (shmid < 0) 831 goto out; 832 else if ((addr = (ulong)shmaddr)) { 833 if (addr & (SHMLBA-1)) { 834 if (shmflg & SHM_RND) 835 addr &= ~(SHMLBA-1); /* round down */ 836 else 837 #ifndef __ARCH_FORCE_SHMLBA 838 if (addr & ~PAGE_MASK) 839 #endif 840 goto out; 841 } 842 flags = MAP_SHARED | MAP_FIXED; 843 } else { 844 if ((shmflg & SHM_REMAP)) 845 goto out; 846 847 flags = MAP_SHARED; 848 } 849 850 if (shmflg & SHM_RDONLY) { 851 prot = PROT_READ; 852 acc_mode = S_IRUGO; 853 f_mode = FMODE_READ; 854 } else { 855 prot = PROT_READ | PROT_WRITE; 856 acc_mode = S_IRUGO | S_IWUGO; 857 f_mode = FMODE_READ | FMODE_WRITE; 858 } 859 if (shmflg & SHM_EXEC) { 860 prot |= PROT_EXEC; 861 acc_mode |= S_IXUGO; 862 } 863 864 /* 865 * We cannot rely on the fs check since SYSV IPC does have an 866 * additional creator id... 867 */ 868 ns = current->nsproxy->ipc_ns; 869 shp = shm_lock_check(ns, shmid); 870 if (IS_ERR(shp)) { 871 err = PTR_ERR(shp); 872 goto out; 873 } 874 875 err = -EACCES; 876 if (ipcperms(&shp->shm_perm, acc_mode)) 877 goto out_unlock; 878 879 err = security_shm_shmat(shp, shmaddr, shmflg); 880 if (err) 881 goto out_unlock; 882 883 path.dentry = dget(shp->shm_file->f_path.dentry); 884 path.mnt = shp->shm_file->f_path.mnt; 885 shp->shm_nattch++; 886 size = i_size_read(path.dentry->d_inode); 887 shm_unlock(shp); 888 889 err = -ENOMEM; 890 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 891 if (!sfd) 892 goto out_put_dentry; 893 894 file = alloc_file(path.mnt, path.dentry, f_mode, 895 is_file_hugepages(shp->shm_file) ? 896 &shm_file_operations_huge : 897 &shm_file_operations); 898 if (!file) 899 goto out_free; 900 ima_counts_get(file); 901 902 file->private_data = sfd; 903 file->f_mapping = shp->shm_file->f_mapping; 904 sfd->id = shp->shm_perm.id; 905 sfd->ns = get_ipc_ns(ns); 906 sfd->file = shp->shm_file; 907 sfd->vm_ops = NULL; 908 909 down_write(¤t->mm->mmap_sem); 910 if (addr && !(shmflg & SHM_REMAP)) { 911 err = -EINVAL; 912 if (find_vma_intersection(current->mm, addr, addr + size)) 913 goto invalid; 914 /* 915 * If shm segment goes below stack, make sure there is some 916 * space left for the stack to grow (at least 4 pages). 917 */ 918 if (addr < current->mm->start_stack && 919 addr > current->mm->start_stack - size - PAGE_SIZE * 5) 920 goto invalid; 921 } 922 923 user_addr = do_mmap (file, addr, size, prot, flags, 0); 924 *raddr = user_addr; 925 err = 0; 926 if (IS_ERR_VALUE(user_addr)) 927 err = (long)user_addr; 928 invalid: 929 up_write(¤t->mm->mmap_sem); 930 931 fput(file); 932 933 out_nattch: 934 down_write(&shm_ids(ns).rw_mutex); 935 shp = shm_lock(ns, shmid); 936 BUG_ON(IS_ERR(shp)); 937 shp->shm_nattch--; 938 if(shp->shm_nattch == 0 && 939 shp->shm_perm.mode & SHM_DEST) 940 shm_destroy(ns, shp); 941 else 942 shm_unlock(shp); 943 up_write(&shm_ids(ns).rw_mutex); 944 945 out: 946 return err; 947 948 out_unlock: 949 shm_unlock(shp); 950 goto out; 951 952 out_free: 953 kfree(sfd); 954 out_put_dentry: 955 dput(path.dentry); 956 goto out_nattch; 957 }
1.对参数进行合法性检查,并根据参数计算内存映射标识和保护方式
2.attach权限检验
3.attach计数器shm_nattch加1
4.分配shm文件,并初始化私有数据shm_file_data
5.将shm文件映射到进程地址空间(do_mmap实现是将tmpfs文件映射到进程地址空间)
6.attach计数器shm_nattch减1,由于在shm文件映射时shm_mmap->shm_open会将shm_nattch加1
ii.shm_mmap
do_mmap会回调shm文件的shm_mmap函数:
do_mmap->do_mmap_pgoff->mmap_region->mmap
249 static int shm_mmap(struct file * file, struct vm_area_struct * vma) 250 { 251 struct shm_file_data *sfd = shm_file_data(file); 252 int ret; 253 254 ret = sfd->file->f_op->mmap(sfd->file, vma); 255 if (ret != 0) 256 return ret; 257 sfd->vm_ops = vma->vm_ops; 258 #ifdef CONFIG_MMU 259 BUG_ON(!sfd->vm_ops->fault); 260 #endif 261 vma->vm_ops = &shm_vm_ops; 262 shm_open(vma); 263 264 return ret; 265 }
1.将tmpfs文件的vma操作切换成shm文件的vma操作shm_vm_ops,用于进程munmap时调用shm_close,来实现所有进程detach且之前有IPC_RMID时删除共享内存IPC资源
2.可以看出do_mmap最后会调用tmpfs文件的mmap方法,将tmpfs文件映射到进程地址空间
iii.shm_fault
将tmpfs文件映射到进程地址空间后,如果是第一次访问会产生缺页异常;缺页异常处理中会将文件装入内存中并添加相应的页表项,以便使用虚拟地址访问。
do_page_fault->handle_mm_fault->handle_pte_fault->do_linear_fault->__do_fault->shm_fault
214 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 215 { 216 struct file *file = vma->vm_file; 217 struct shm_file_data *sfd = shm_file_data(file); 218 219 return sfd->vm_ops->fault(vma, vmf); 220 }
shm文件的异常处理shm_fault实际调用的是tmpfs文件的异常处理,来装入tmpfs文件的内容。
IV.共享内存从进程地址空间中删除
i.shmdt
当进程不想再访问共享内存时,会将其从地址空间中移除。
971 /* 972 * detach and kill segment if marked destroyed. 973 * The work is done in shm_close. 974 */ 975 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 976 { 977 struct mm_struct *mm = current->mm; 978 struct vm_area_struct *vma; 979 unsigned long addr = (unsigned long)shmaddr; 980 int retval = -EINVAL; 981 #ifdef CONFIG_MMU 982 loff_t size = 0; 983 struct vm_area_struct *next; 984 #endif 985 986 if (addr & ~PAGE_MASK) 987 return retval; 988 989 down_write(&mm->mmap_sem); 990 991 /* 992 * This function tries to be smart and unmap shm segments that 993 * were modified by partial mlock or munmap calls: 994 * - It first determines the size of the shm segment that should be 995 * unmapped: It searches for a vma that is backed by shm and that 996 * started at address shmaddr. It records it's size and then unmaps 997 * it. 998 * - Then it unmaps all shm vmas that started at shmaddr and that 999 * are within the initially determined size.1000 * Errors from do_munmap are ignored: the function only fails if1001 * it's called with invalid parameters or if it's called to unmap1002 * a part of a vma. Both calls in this function are for full vmas,1003 * the parameters are directly copied from the vma itself and always1004 * valid - therefore do_munmap cannot fail. (famous last words?)1005 */1006 /*1007 * If it had been mremap()'d, the starting address would not1008 * match the usual checks anyway. So assume all vma's are1009 * above the starting address given.1010 */1011 vma = find_vma(mm, addr);1012 1013 #ifdef CONFIG_MMU1014 while (vma) {1015 next = vma->vm_next;1016 1017 /*1018 * Check if the starting address would match, i.e. it's1019 * a fragment created by mprotect() and/or munmap(), or it1020 * otherwise it starts at this address with no hassles.1021 */1022 if ((vma->vm_ops == &shm_vm_ops) &&1023 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {1024 1025 1026 size = vma->vm_file->f_path.dentry->d_inode->i_size;1027 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);1028 /*1029 * We discovered the size of the shm segment, so1030 * break out of here and fall through to the next1031 * loop that uses the size information to stop1032 * searching for matching vma's.1033 */1034 retval = 0;1035 vma = next;1036 break;1037 }1038 vma = next;1039 }1040 1041 /*1042 * We need look no further than the maximum address a fragment1043 * could possibly have landed at. Also cast things to loff_t to1044 * prevent overflows and make comparisions vs. equal-width types.1045 */1046 size = PAGE_ALIGN(size);1047 while (vma && (loff_t)(vma->vm_end - addr) <= size) {1048 next = vma->vm_next;1049 1050 /* finding a matching vma now does not alter retval */1051 if ((vma->vm_ops == &shm_vm_ops) &&1052 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)1053 1054 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);1055 vma = next;1056 }1057 1058 #else /* CONFIG_MMU */1059 /* under NOMMU conditions, the exact address to be destroyed must be1060 * given */1061 retval = -EINVAL;1062 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {1063 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);1064 retval = 0;1065 }1066 1067 #endif1068 1069 up_write(&mm->mmap_sem);1070 return retval;1071 }
1.查找共享内存映射到进程地址空间的虚拟地址内存段vma
2.移除共享内存在进程地址空间的映射
ii.shm_close
186 /* 187 * remove the attach descriptor vma. 188 * free memory for segment if it is marked destroyed. 189 * The descriptor has already been removed from the current->mm->mmap list 190 * and will later be kfree()d. 191 */ 192 static void shm_close(struct vm_area_struct *vma) 193 { 194 struct file * file = vma->vm_file; 195 struct shm_file_data *sfd = shm_file_data(file); 196 struct shmid_kernel *shp; 197 struct ipc_namespace *ns = sfd->ns; 198 199 down_write(&shm_ids(ns).rw_mutex); 200 /* remove from the list of attaches of the shm segment */ 201 shp = shm_lock(ns, sfd->id); 202 BUG_ON(IS_ERR(shp)); 203 shp->shm_lprid = task_tgid_vnr(current); 204 shp->shm_dtim = get_seconds(); 205 shp->shm_nattch--; 206 if(shp->shm_nattch == 0 && 207 shp->shm_perm.mode & SHM_DEST) 208 shm_destroy(ns, shp); 209 else 210 shm_unlock(shp); 211 up_write(&shm_ids(ns).rw_mutex); 212 }
1.attach计数器shm_nattch减1
2.如果共享内存没有attach的进程,且已经有进程调用过shmctl(...,IPC_RMID,...),则销毁共享内存IPC资源
V.共享内存移除
i.do_shm_rmid
当不再使用共享内存时,会移除共享内存;通过IPC_RMID命令调用shmctl来实现共享内存的移除
82 /* 83 * Called with shm_ids.rw_mutex (writer) and the shp structure locked. 84 * Only shm_ids.rw_mutex remains locked on exit. 85 */ 86 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 87 { 88 struct shmid_kernel *shp; 89 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 90 91 if (shp->shm_nattch){ 92 shp->shm_perm.mode |= SHM_DEST; 93 /* Do not find it any more */ 94 shp->shm_perm.key = IPC_PRIVATE; 95 shm_unlock(shp); 96 } else 97 shm_destroy(ns, shp); 98 }
1.还有进程attach到共享内存,置共享内存销毁SHM_DEST标识,用于在所有进程detach时销毁共享内存IPC资源,见shm_close;并将key置为IPC_PRIVATE,不能通过key再获取共享内存
2.如果没有进程attach到共享内存,销毁共享内存IPC资源
ii.shm_destroy
162 /* 163 * shm_destroy - free the struct shmid_kernel 164 * 165 * @ns: namespace 166 * @shp: struct to free 167 * 168 * It has to be called with shp and shm_ids.rw_mutex (writer) locked, 169 * but returns with shp unlocked and freed. 170 */ 171 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 172 { 173 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 174 shm_rmid(ns, shp); 175 shm_unlock(shp); 176 if (!is_file_hugepages(shp->shm_file)) 177 shmem_lock(shp->shm_file, 0, shp->mlock_user); 178 else if (shp->mlock_user) 179 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, 180 shp->mlock_user); 181 fput (shp->shm_file); 182 security_shm_free(shp); 183 ipc_rcu_putref(shp); 184 }
1.将共享内存IPC从共享内存基数树中移除
2.释放tmpfs文件使用的file结构内存
3.释放shmid_kernel内存
- linux SysV IPC shm共享内存实现
- IPC-shm 共享内存
- IPC共享内存 shm
- linux SysV IPC实现
- linux shm共享内存
- linux shm共享内存
- LINUX下利用特殊文件系统shm 实现内存的共享
- linux SysV IPC sem信号量实现
- linux SysV IPC msg消息队列实现
- linux下共享内存(shm)使用示例
- linux shm 进程之间内存共享
- linux 下共享内存shm详解
- linux IPC---共享内存
- shm创建共享内存
- 共享内存 shm
- 共享内存 shm
- 共享内存(shm)
- linux ipc机制-共享内存
- uva 10313 - Pay the Price
- Windows编程之管道技术
- timus 1073 Square Country
- 几种开放源码的TCPIP协议栈概述--LwIP,uIP,TinyTcp和uC/IP
- “#if 0/#if 1 ... #endif”的注释作用
- linux SysV IPC shm共享内存实现
- Oracle内存全面分析之PGA
- Design Pattern_Builder(建造者模式)
- 解决mysql“Access denied for user 'root'@'localhost'”
- 命名管道
- win8 硬盘版安装
- Android CTS环境搭建与测试方法
- 中介者模式 C++ 实现
- 斗地主的感悟(转)