]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: frontswap: add swap hooks and extend try_to_unuse
authorDan Magenheimer <dan.magenheimer@oracle.com>
Tue, 30 Aug 2011 14:05:59 +0000 (08:05 -0600)
committerDan Magenheimer <dan.magenheimer@oracle.com>
Tue, 30 Aug 2011 14:05:59 +0000 (08:05 -0600)
This third patch of four in the frontswap series adds hooks in the swap
subsystem and extends try_to_unuse so that frontswap_shrink can do a
"partial swapoff".  Also, declarations for the extern-ified swap variables
in the first patch are declared.

Note that failed frontswap_map allocation is safe... failure is noted
by lack of "FS" in the subsequent printk.

[v8: rebase to 3.0-rc4]
[v8: kamezawa.hiroyu@jp.fujitsu.com: add comment to clarify find_next_to_unuse]
[v7: rebase to 3.0-rc3]
[v7: JBeulich@novell.com: use new static inlines, no-ops if not config'd]
[v6: rebase to 3.1-rc1]
[v6: lliubbo@gmail.com: use vzalloc]
[v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails]
[v5: accidentally posted stale code for v4 that failed to compile :-(]
[v4: rebase to 2.6.39]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Konrad Wilk <konrad.wilk@oracle.com>
Acked-by: Jan Beulich <JBeulich@novell.com>
Acked-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Rik Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
mm/page_io.c
mm/swapfile.c

index dc76b4d0611ecb59fd85d89a78896c792443a62f..651a912593174b35cee96159a1867c0690a849b3 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/bio.h>
 #include <linux/swapops.h>
 #include <linux/writeback.h>
+#include <linux/frontswap.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
                unlock_page(page);
                goto out;
        }
+       if (frontswap_put_page(page) == 0) {
+               set_page_writeback(page);
+               unlock_page(page);
+               end_page_writeback(page);
+               goto out;
+       }
        bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
        if (bio == NULL) {
                set_page_dirty(page);
@@ -122,6 +129,11 @@ int swap_readpage(struct page *page)
 
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(PageUptodate(page));
+       if (frontswap_get_page(page) == 0) {
+               SetPageUptodate(page);
+               unlock_page(page);
+               goto out;
+       }
        bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
        if (bio == NULL) {
                unlock_page(page);
index ff8dc1a18cb4fb6e15dac52b3dff0c491337f9af..b5fcb81719b7be82d04150c0818fbba0079d5745 100644 (file)
@@ -32,6 +32,8 @@
 #include <linux/memcontrol.h>
 #include <linux/poll.h>
 #include <linux/oom.h>
+#include <linux/frontswap.h>
+#include <linux/swapfile.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -43,7 +45,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 static void free_swap_count_continuations(struct swap_info_struct *);
 static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
-static DEFINE_SPINLOCK(swap_lock);
+DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
 long nr_swap_pages;
 long total_swap_pages;
@@ -54,9 +56,9 @@ static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-static struct swap_list_t swap_list = {-1, -1};
+struct swap_list_t swap_list = {-1, -1};
 
-static struct swap_info_struct *swap_info[MAX_SWAPFILES];
+struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
@@ -557,6 +559,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
                        swap_list.next = p->type;
                nr_swap_pages++;
                p->inuse_pages--;
+               frontswap_flush_page(p->type, offset);
                if ((p->flags & SWP_BLKDEV) &&
                                disk->fops->swap_slot_free_notify)
                        disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -1018,11 +1021,12 @@ static int unuse_mm(struct mm_struct *mm,
 }
 
 /*
- * Scan swap_map from current position to next entry still in use.
+ * Scan swap_map (or frontswap_map if frontswap parameter is true)
+ * from current position to next entry still in use.
  * Recycle to start on reaching the end, returning 0 when empty.
  */
 static unsigned int find_next_to_unuse(struct swap_info_struct *si,
-                                       unsigned int prev)
+                                       unsigned int prev, bool frontswap)
 {
        unsigned int max = si->max;
        unsigned int i = prev;
@@ -1048,6 +1052,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
                        prev = 0;
                        i = 1;
                }
+               if (frontswap) {
+                       if (frontswap_test(si, i))
+                               break;
+                       else
+                               continue;
+               }
                count = si->swap_map[i];
                if (count && swap_count(count) != SWAP_MAP_BAD)
                        break;
@@ -1059,8 +1069,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
  * We completely avoid races by reading each swap page in advance,
  * and then search for the process using it.  All the necessary
  * page table adjustments can then be made atomically.
+ *
+ * if the boolean frontswap is true, only unuse pages_to_unuse pages;
+ * pages_to_unuse==0 means all pages; ignored if frontswap is false
  */
-static int try_to_unuse(unsigned int type)
+int try_to_unuse(unsigned int type, bool frontswap,
+                unsigned long pages_to_unuse)
 {
        struct swap_info_struct *si = swap_info[type];
        struct mm_struct *start_mm;
@@ -1093,7 +1107,7 @@ static int try_to_unuse(unsigned int type)
         * one pass through swap_map is enough, but not necessarily:
         * there are races when an instance of an entry might be missed.
         */
-       while ((i = find_next_to_unuse(si, i)) != 0) {
+       while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
                if (signal_pending(current)) {
                        retval = -EINTR;
                        break;
@@ -1260,6 +1274,10 @@ static int try_to_unuse(unsigned int type)
                 * interactive performance.
                 */
                cond_resched();
+               if (frontswap && pages_to_unuse > 0) {
+                       if (!--pages_to_unuse)
+                               break;
+               }
        }
 
        mmput(start_mm);
@@ -1519,7 +1537,8 @@ bad_bmap:
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
-                               unsigned char *swap_map)
+                               unsigned char *swap_map,
+                               unsigned long *frontswap_map)
 {
        int i, prev;
 
@@ -1529,6 +1548,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
        else
                p->prio = --least_priority;
        p->swap_map = swap_map;
+       frontswap_map_set(p, frontswap_map);
        p->flags |= SWP_WRITEOK;
        nr_swap_pages += p->pages;
        total_swap_pages += p->pages;
@@ -1545,6 +1565,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
                swap_list.head = swap_list.next = p->type;
        else
                swap_info[prev]->next = p->type;
+       frontswap_init(p->type);
        spin_unlock(&swap_lock);
 }
 
@@ -1616,7 +1637,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        spin_unlock(&swap_lock);
 
        oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
-       err = try_to_unuse(type);
+       err = try_to_unuse(type, false, 0); /* force all pages to be unused */
        test_set_oom_score_adj(oom_score_adj);
 
        if (err) {
@@ -1627,7 +1648,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                 * sys_swapoff for this swap_info_struct at this point.
                 */
                /* re-insert swap space back into swap_list */
-               enable_swap_info(p, p->prio, p->swap_map);
+               enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
                goto out_dput;
        }
 
@@ -1653,9 +1674,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        swap_map = p->swap_map;
        p->swap_map = NULL;
        p->flags = 0;
+       frontswap_flush_area(type);
        spin_unlock(&swap_lock);
        mutex_unlock(&swapon_mutex);
        vfree(swap_map);
+       vfree(frontswap_map_get(p));
        /* Destroy swap account informatin */
        swap_cgroup_swapoff(type);
 
@@ -2028,6 +2051,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        sector_t span;
        unsigned long maxpages;
        unsigned char *swap_map = NULL;
+       unsigned long *frontswap_map = NULL;
        struct page *page = NULL;
        struct inode *inode = NULL;
 
@@ -2108,6 +2132,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                error = nr_extents;
                goto bad_swap;
        }
+       /* frontswap enabled? set up bit-per-page map for frontswap */
+       if (frontswap_enabled)
+               frontswap_map = vzalloc(maxpages / sizeof(long));
 
        if (p->bdev) {
                if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2123,14 +2150,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        if (swap_flags & SWAP_FLAG_PREFER)
                prio =
                  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
-       enable_swap_info(p, prio, swap_map);
+       enable_swap_info(p, prio, swap_map, frontswap_map);
 
        printk(KERN_INFO "Adding %uk swap on %s.  "
-                       "Priority:%d extents:%d across:%lluk %s%s\n",
+                       "Priority:%d extents:%d across:%lluk %s%s%s\n",
                p->pages<<(PAGE_SHIFT-10), name, p->prio,
                nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
                (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
-               (p->flags & SWP_DISCARDABLE) ? "D" : "");
+               (p->flags & SWP_DISCARDABLE) ? "D" : "",
+               (frontswap_map) ? "FS" : "");
 
        mutex_unlock(&swapon_mutex);
        atomic_inc(&proc_poll_event);
@@ -2321,6 +2349,10 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
                base++;
 
        spin_lock(&swap_lock);
+       if (frontswap_test(si, target)) {
+               spin_unlock(&swap_lock);
+               return 0;
+       }
        if (end > si->max)      /* don't go beyond end of map */
                end = si->max;
 
@@ -2331,6 +2363,9 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
                        break;
                if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
                        break;
+               /* Don't read in frontswap pages */
+               if (frontswap_test(si, toff))
+                       break;
        }
        /* Count contiguous allocated slots below our target */
        for (toff = target; --toff >= base; nr_pages++) {
@@ -2339,6 +2374,9 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
                        break;
                if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
                        break;
+               /* Don't read in frontswap pages */
+               if (frontswap_test(si, toff))
+                       break;
        }
        spin_unlock(&swap_lock);