restrict /dev/mem read() calls to linear region

来源:互联网 发布:js解决跨域问题 编辑:程序博客网 时间:2024/06/06 11:31
Unable to handle kernel paging request at virtual address ffff8000bfff0000
  pgd = ffff8000f9615000
  [ffff8000bfff0000] *pgd=0000000000000000
  Internal error: Oops: 96000007 [#1] PREEMPT SMP
  Modules linked in:
  CPU: 0 PID: 1284 Comm: lscpu Not tainted 4.11.0-rc3+ #103
  Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
  task: ffff8000fa78e800 task.stack: ffff8000f9780000
  PC is at __arch_copy_to_user+0x90/0x220
  LR is at read_mem+0xcc/0x140
这个bug 是user space通过/dev/mem 读取ffff8000bfff0000 的时候会挂掉,通过在kernel commandline加上efi=debug 可以发现ffff8000bfff0000 这个地址其实属于bios。因此可以通过下面的patch
arm64: kernel: restrict /dev/mem read() calls to linear region

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 7b0d557..adc208c 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -18,6 +18,7 @@
 
 #include <linux/elf.h>
 #include <linux/fs.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/export.h>
@@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
  */
 int valid_phys_addr_range(phys_addr_t addr, size_t size)
 {
-    if (addr < PHYS_OFFSET)
-        return 0;
-    if (addr + size > __pa(high_memory - 1) + 1)
-        return 0;
-
-    return 1;
+    /*
+     * Check whether addr is covered by a memory region without the
+     * MEMBLOCK_NOMAP attribute, and whether that region covers the
+     * entire range. In theory, this could lead to false negatives
+     * if the range is covered by distinct but adjacent memory regions
+     * that only differ in other attributes. However, few of such
+     * attributes have been defined, and it is debatable whether it
+     * follows that /dev/mem read() calls should be able traverse
+     * such boundaries.
+     */
+    return memblock_is_region_memory(addr, size) &&
+           memblock_is_map_memory(addr);
 }
 
 /*
可以看到valid_phys_addr_range 中主要通过memblock_is_region_memory 来判断这段memory属于memblock,且已经做了map,只有这样的memory才会建立页表.
这样在通过/dev/mem 时候的read函数
static ssize_t read_mem(struct file *file, char __user *buf,
            size_t count, loff_t *ppos)
{
    phys_addr_t p = *ppos;
    ssize_t read, sz;
    void *ptr;

    if (p != *ppos)
        return 0;

    if (!valid_phys_addr_range(p, count))
        return -EFAULT;
    read = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
    /* we don't have page 0 mapped on sparc and m68k.. */
    if (p < PAGE_SIZE) {
        sz = size_inside_page(p, count);
        if (sz > 0) {
            if (clear_user(buf, sz))
                return -EFAULT;
            buf += sz;
            p += sz;
            count -= sz;
            read += sz;
        }
    }
#endif
}
就会调用valid_phys_addr_range 检查,如果是bios的地址,就返回error了,不会造成kernel oops