#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
+#ifdef CONFIG_64BIT
+#define VM_SHARED_PT (1UL << 32)
+#else
+#define VM_SHARED_PT 0
+#endif
+
#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
-#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */
-#define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */
-#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */
-#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
-#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_0 33 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_1 34 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_2 35 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_3 36 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_4 37 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
{VM_MIXEDMAP, "mixedmap" }, \
{VM_HUGEPAGE, "hugepage" }, \
{VM_NOHUGEPAGE, "nohugepage" }, \
- {VM_MERGEABLE, "mergeable" } \
+ {VM_MERGEABLE, "mergeable" }, \
+ {VM_SHARED_PT, "sharedpt" }
#define show_vma_flags(flags) \
(flags) ? __print_flags(flags, "|", \
void setup_zone_pageset(struct zone *zone);
extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
+
+extern vm_fault_t find_shared_vma(struct vm_area_struct **,
+ unsigned long *addrp);
+static inline bool vma_is_shared(const struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_SHARED_PT;
+}
#endif /* __MM_INTERNAL_H */
unsigned int flags)
{
vm_fault_t ret;
+ bool shared = false;
__set_current_state(TASK_RUNNING);
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
+ if (unlikely(vma_is_shared(vma))) {
+ ret = find_shared_vma(&vma, &address);
+ if (ret)
+ return ret;
+ if (!vma)
+ return VM_FAULT_SIGSEGV;
+ shared = true;
+ }
+
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
flags & FAULT_FLAG_INSTRUCTION,
flags & FAULT_FLAG_REMOTE))
else
ret = __handle_mm_fault(vma, address, flags);
+ if (shared)
+ mmap_read_unlock(vma->vm_mm);
+
if (flags & FAULT_FLAG_USER) {
mem_cgroup_exit_user_fault();
/*
#include <linux/fs.h>
#include <linux/sched/mm.h>
#include <linux/syscalls.h>
+#include "internal.h"
+
+/* Returns holding the guest mm's lock for read. Caller must release. */
+vm_fault_t find_shared_vma(struct vm_area_struct **vmap, unsigned long *addrp)
+{
+ struct vm_area_struct *vma, *host = *vmap;
+ struct mm_struct *mm = host->vm_private_data;
+ unsigned long guest_addr = *addrp - host->vm_start;
+ pgd_t pgd = *pgd_offset(mm, guest_addr);
+ pgd_t *host_pgd = pgd_offset(current->mm, *addrp);
+
+ if (!pgd_same(*host_pgd, pgd)) {
+ set_pgd(host_pgd, pgd);
+ return VM_FAULT_NOPAGE;
+ }
+
+ mmap_read_lock(mm);
+ vma = find_vma(mm, guest_addr);
+
+ /* XXX: expand stack? */
+ if (vma && vma->vm_start > guest_addr)
+ vma = NULL;
+
+ *addrp = guest_addr;
+ *vmap = vma;
+ return 0;
+}
static ssize_t mshare_read(struct kiocb *iocb, struct iov_iter *iov)
{
return ret;
}
+static int mshare_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = file->private_data;
+
+ if ((vma->vm_start | vma->vm_end) & (PGDIR_SIZE - 1))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_SHARED_PT;
+ vma->vm_private_data = mm;
+ return 0;
+}
+
static int mshare_release(struct inode *inode, struct file *file)
{
struct mm_struct *mm = file->private_data;
static const struct file_operations mshare_fops = {
.read_iter = mshare_read,
+ .mmap = mshare_mmap,
.release = mshare_release,
};
unsigned long, flags)
{
struct mm_struct *mm;
+ struct vm_area_struct *vma;
int fd;
+ int i = 0;
if ((addr | len) & (PGDIR_SIZE - 1))
return -EINVAL;
if (!mm->task_size)
mm->task_size--;
- fd = anon_inode_getfd("mshare", &mshare_fops, mm, O_RDWR);
+ mmap_write_lock(current->mm);
+
+ vma = find_vma(current->mm, addr + len);
+ if (vma && vma->vm_start < addr + len)
+ goto unlock;
+ vma = find_vma(current->mm, addr);
+ if (vma && vma->vm_start < addr)
+ goto unlock;
+
+ while (addr < mm->task_size) {
+ mm->pgd[i++] = *pgd_offset(current->mm, addr);
+ addr += PGDIR_SIZE;
+ }
+ mmap_write_unlock(current->mm);
+ fd = anon_inode_getfd("mshare", &mshare_fops, mm, O_RDWR);
+ if (fd < 0)
+ goto nofd;
+out:
return fd;
+unlock:
+ mmap_write_unlock(current->mm);
+ fd = -EINVAL;
+nofd:
+ mmput(mm);
+ goto out;
}