Linux内存缺页错误处理

来源：互联网发布：淘宝女内增高鞋编辑：程序博客网时间：2024/06/06 08:43

linux内存访问是通过页表访问的形式访问的，当虚拟内存找不到对应物理内存，则抛出缺页错误。

缺页错误函数在mm/fault.c中的do_page_fault定义，

do_page_fault(struct pt_regs *regs, unsigned long error_code){struct vm_area_struct *vma;struct task_struct *tsk;unsigned long address;struct mm_struct *mm;int write;int fault;tsk = current;mm = tsk->mm;/* Get the faulting address: */address = read_cr2();/* * Detect and handle instructions that would cause a page fault for * both a tracked kernel page and a userspace page. */if (kmemcheck_active(regs))kmemcheck_hide(regs);prefetchw(&mm->mmap_sem);if (unlikely(kmmio_fault(regs, address)))return;/* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */if (unlikely(fault_in_kernel_space(address))) {if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {if (vmalloc_fault(address) >= 0)return;if (kmemcheck_fault(regs, address, error_code))return;}/* Can handle a stale RO->RW TLB: */if (spurious_fault(error_code, address))return;/* kprobes don't want to hook the spurious faults: */if (notify_page_fault(regs))return;/* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock: */bad_area_nosemaphore(regs, error_code, address);return;}/* kprobes don't want to hook the spurious faults: */if (unlikely(notify_page_fault(regs)))return;/* * It's safe to allow irq's after cr2 has been saved and the * vmalloc fault has been handled. * * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */if (user_mode_vm(regs)) {local_irq_enable();error_code |= PF_USER;} else {if (regs->flags & X86_EFLAGS_IF)local_irq_enable();}if (unlikely(error_code & PF_RSVD))pgtable_bad(regs, error_code, address);perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);/* * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */if (unlikely(in_atomic() || !mm)) {bad_area_nosemaphore(regs, error_code, address);return;}/* * When running in the kernel we expect faults to occur only to * addresses in user space.  All other faults represent errors in * the kernel and should generate an OOPS.  Unfortunately, in the * case of an erroneous fault occurring in a code path which already * holds mmap_sem we will deadlock attempting to validate the fault * against the address space.  Luckily the kernel only validly * references user space from well defined areas of code, which are * listed in the exceptions table. * * As the vast majority of faults will be valid we will only perform * the source reference check when there is a possibility of a * deadlock. Attempt to lock the address space, if we cannot we then * validate the source. If this is invalid we can skip the address * space check, thus avoiding the deadlock: */if (unlikely(!down_read_trylock(&mm->mmap_sem))) {if ((error_code & PF_USER) == 0 &&    !search_exception_tables(regs->ip)) {bad_area_nosemaphore(regs, error_code, address);return;}down_read(&mm->mmap_sem);} else {/* * The above down_read_trylock() might have succeeded in * which case we'll have missed the might_sleep() from * down_read(): */might_sleep();}vma = find_vma(mm, address);if (unlikely(!vma)) {bad_area(regs, error_code, address);return;}if (likely(vma->vm_start <= address))goto good_area;if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {bad_area(regs, error_code, address);return;}if (error_code & PF_USER) {/* * Accessing the stack below %sp is always a bug. * The large cushion allows instructions like enter * and pusha to work. ("enter $65535, $31" pushes * 32 pointers and then decrements %sp by 65535.) */if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {bad_area(regs, error_code, address);return;}}if (unlikely(expand_stack(vma, address))) {bad_area(regs, error_code, address);return;}/* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */good_area:write = error_code & PF_WRITE;if (unlikely(access_error(error_code, write, vma))) {bad_area_access_error(regs, error_code, address);return;}/* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault: */fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);if (unlikely(fault & VM_FAULT_ERROR)) {mm_fault_error(regs, error_code, address, fault);return;}if (fault & VM_FAULT_MAJOR) {tsk->maj_flt++;perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,     regs, address);} else {tsk->min_flt++;perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,     regs, address);}check_v8086_mode(regs, address, tsk);up_read(&mm->mmap_sem);}

do_page_fautl最核心的是

handle_mm_fault（）

定义如下

int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,unsigned long address, unsigned int flags){pgd_t *pgd;pud_t *pud;pmd_t *pmd;pte_t *pte;__set_current_state(TASK_RUNNING);count_vm_event(PGFAULT);/* do counter updates before entering really critical section. */check_sync_rss_stat(current);if (unlikely(is_vm_hugetlb_page(vma)))return hugetlb_fault(mm, vma, address, flags);pgd = pgd_offset(mm, address);pud = pud_alloc(mm, pgd, address);if (!pud)return VM_FAULT_OOM;pmd = pmd_alloc(mm, pud, address);if (!pmd)return VM_FAULT_OOM;pte = pte_alloc_map(mm, pmd, address);if (!pte)return VM_FAULT_OOM;return handle_pte_fault(mm, vma, address, pte, pmd, flags);}

hander_mm_fault创建页表层次结构，如果成功，则调用handel_pte_fault给进程分配一个新的页面，创建失败会返回OOM（Out of memory）。

0 0