Linux内核源代码情景分析笔记---对照4.2.5内核 续

来源:互联网 发布:磁条卡读写软件 编辑:程序博客网 时间:2024/04/19 20:32
/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures).  The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with pte unmapped and unlocked. * * The mmap_sem may have been released depending on flags and our * return value.  See filemap_fault() and __lock_page_or_retry(). */static int handle_pte_fault(struct mm_struct *mm,     struct vm_area_struct *vma, unsigned long address,     pte_t *pte, pmd_t *pmd, unsigned int flags){pte_t entry;spinlock_t *ptl;/* * some architectures can have larger ptes than wordsize, * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y, * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses. * The code below just needs a consistent view for the ifs and * we later double check anyway with the ptl lock held. So here * a barrier will do. */entry = *pte;barrier();if (!pte_present(entry)) {if (pte_none(entry)) {if (vma->vm_ops)return do_fault(mm, vma, address, pte, pmd,flags, entry);return do_anonymous_page(mm, vma, address, pte, pmd,flags);}return do_swap_page(mm, vma, address,pte, pmd, flags, entry);}if (pte_protnone(entry))return do_numa_page(mm, vma, address, entry, pte, pmd);ptl = pte_lockptr(mm, pmd);spin_lock(ptl);if (unlikely(!pte_same(*pte, entry)))goto unlock;if (flags & FAULT_FLAG_WRITE) {if (!pte_write(entry))return do_wp_page(mm, vma, address,pte, pmd, ptl, entry);entry = pte_mkdirty(entry);}entry = pte_mkyoung(entry);if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {update_mmu_cache(vma, address, pte);} else {/* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. * This still avoids useless tlb flushes for .text page faults * with threads. */if (flags & FAULT_FLAG_WRITE)flush_tlb_fix_spurious_fault(vma, address);}unlock:pte_unmap_unlock(pte, ptl);return 0;}

关于2.4的do_no_page复杂了好多,判断项也多了不少。。/mm/memory.c



/* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,unsigned long address, pte_t *page_table, pmd_t *pmd,unsigned int flags){struct mem_cgroup *memcg;struct page *page;spinlock_t *ptl;pte_t entry;pte_unmap(page_table);/* File mapping without ->vm_ops ? */if (vma->vm_flags & VM_SHARED)return VM_FAULT_SIGBUS;/* Check if we need to add a guard page to the stack */if (check_stack_guard_page(vma, address) < 0)return VM_FAULT_SIGSEGV;/* Use the zero-page for reads */if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),vma->vm_page_prot));page_table = pte_offset_map_lock(mm, pmd, address, &ptl);if (!pte_none(*page_table))goto unlock;goto setpte;}/* Allocate our own private page. */if (unlikely(anon_vma_prepare(vma)))goto oom;page = alloc_zeroed_user_highpage_movable(vma, address);if (!page)goto oom;if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))goto oom_free_page;/* * The memory barrier inside __SetPageUptodate makes sure that * preceeding stores to the page contents become visible before * the set_pte_at() write. */__SetPageUptodate(page);entry = mk_pte(page, vma->vm_page_prot);if (vma->vm_flags & VM_WRITE)entry = pte_mkwrite(pte_mkdirty(entry));page_table = pte_offset_map_lock(mm, pmd, address, &ptl);if (!pte_none(*page_table))goto release;inc_mm_counter_fast(mm, MM_ANONPAGES);page_add_new_anon_rmap(page, vma, address);mem_cgroup_commit_charge(page, memcg, false);lru_cache_add_active_or_unevictable(page, vma);setpte:set_pte_at(mm, address, page_table, entry);/* No need to invalidate - it was non-present before */update_mmu_cache(vma, address, page_table);unlock:pte_unmap_unlock(page_table, ptl);return 0;release:mem_cgroup_cancel_charge(page, memcg);page_cache_release(page);goto unlock;oom_free_page:page_cache_release(page);oom:return VM_FAULT_OOM;}

do_anonymous_page()


/* * The in-memory structure used to track swap areas. */struct swap_info_struct {unsigned longflags;/* SWP_USED etc: see above */signed shortprio;/* swap priority of this type */struct plist_node list;/* entry in swap_active_head */struct plist_node avail_list;/* entry in swap_avail_head */signed chartype;/* strange name for an index */unsigned intmax;/* extent of the swap_map */unsigned char *swap_map;/* vmalloc'ed array of usage counts */struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */struct swap_cluster_info free_cluster_head; /* free cluster list head */struct swap_cluster_info free_cluster_tail; /* free cluster list tail */unsigned int lowest_bit;/* index of first free in swap_map */unsigned int highest_bit;/* index of last free in swap_map */unsigned int pages;/* total of usable pages of swap */unsigned int inuse_pages;/* number of those currently in use */unsigned int cluster_next;/* likely index for next allocation */unsigned int cluster_nr;/* countdown to next cluster search */struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */struct swap_extent *curr_swap_extent;struct swap_extent first_swap_extent;struct block_device *bdev;/* swap device or bdev of swap file */struct file *swap_file;/* seldom referenced */unsigned int old_block_size;/* seldom referenced */#ifdef CONFIG_FRONTSWAPunsigned long *frontswap_map;/* frontswap in-use, one bit per page */atomic_t frontswap_pages;/* frontswap pages in-use counter */#endifspinlock_t lock;/* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, * cluster_nr, lowest_alloc, * highest_alloc, free/discard cluster * list. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first. */struct work_struct discard_work; /* discard worker */struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */};

swap_info_struct /linux/swap.h

int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask){int error;error = radix_tree_maybe_preload(gfp_mask);if (!error) {error = __add_to_swap_cache(page, entry);radix_tree_preload_end();}return error;}

add_to_swap_cache /mm/swap_state.c
/* * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */int __add_to_swap_cache(struct page *page, swp_entry_t entry){int error;struct address_space *address_space;VM_BUG_ON_PAGE(!PageLocked(page), page);VM_BUG_ON_PAGE(PageSwapCache(page), page);VM_BUG_ON_PAGE(!PageSwapBacked(page), page);page_cache_get(page);SetPageSwapCache(page);set_page_private(page, entry.val);address_space = swap_address_space(entry);spin_lock_irq(&address_space->tree_lock);error = radix_tree_insert(&address_space->page_tree,entry.val, page);if (likely(!error)) {address_space->nrpages++;__inc_zone_page_state(page, NR_FILE_PAGES);INC_CACHE_INFO(add_total);}spin_unlock_irq(&address_space->tree_lock);if (unlikely(error)) {/* * Only the context which have set SWAP_HAS_CACHE flag * would call add_to_swap_cache(). * So add_to_swap_cache() doesn't returns -EEXIST. */VM_BUG_ON(error == -EEXIST);set_page_private(page, 0UL);ClearPageSwapCache(page);page_cache_release(page);}return error;}


0 0
原创粉丝点击