static void mpol_rebind_policy(struct mempolicy *pol,
                                const nodemask_t *newmask);
 
-/* Do sanity checking on a policy */
-static int mpol_check_policy(unsigned short mode, nodemask_t *nodes)
-{
-       int was_empty, is_empty;
-
-       if (!nodes)
-               return 0;
-
-       /*
-        * "Contextualize" the in-coming nodemast for cpusets:
-        * Remember whether in-coming nodemask was empty,  If not,
-        * restrict the nodes to the allowed nodes in the cpuset.
-        * This is guaranteed to be a subset of nodes with memory.
-        */
-       cpuset_update_task_memory_state();
-       is_empty = was_empty = nodes_empty(*nodes);
-       if (!was_empty) {
-               nodes_and(*nodes, *nodes, cpuset_current_mems_allowed);
-               is_empty = nodes_empty(*nodes); /* after "contextualization" */
-       }
-
-       switch (mode) {
-       case MPOL_DEFAULT:
-               /*
-                * require caller to specify an empty nodemask
-                * before "contextualization"
-                */
-               if (!was_empty)
-                       return -EINVAL;
-               break;
-       case MPOL_BIND:
-       case MPOL_INTERLEAVE:
-               /*
-                * require at least 1 valid node after "contextualization"
-                */
-               if (is_empty)
-                       return -EINVAL;
-               break;
-       case MPOL_PREFERRED:
-               /*
-                * Did caller specify invalid nodes?
-                * Don't silently accept this as "local allocation".
-                */
-               if (!was_empty && is_empty)
-                       return -EINVAL;
-               break;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
 /* Check that the nodemask contains at least one populated zone */
 static int is_valid_nodemask(nodemask_t *nodemask)
 {
        return 0;
 }
 
+static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
+{
+       return pol->flags & MPOL_F_STATIC_NODES;
+}
+
 /* Create a new policy */
 static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
                                  nodemask_t *nodes)
 {
        struct mempolicy *policy;
+       nodemask_t cpuset_context_nmask;
 
        pr_debug("setting mode %d flags %d nodes[0] %lx\n",
                 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
 
        if (mode == MPOL_DEFAULT)
-               return NULL;
+               return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) :
+                                                        NULL;
        policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
        if (!policy)
                return ERR_PTR(-ENOMEM);
        atomic_set(&policy->refcnt, 1);
+       cpuset_update_task_memory_state();
+       nodes_and(cpuset_context_nmask, *nodes, cpuset_current_mems_allowed);
        switch (mode) {
        case MPOL_INTERLEAVE:
-               policy->v.nodes = *nodes;
-               if (nodes_weight(policy->v.nodes) == 0) {
-                       kmem_cache_free(policy_cache, policy);
-                       return ERR_PTR(-EINVAL);
-               }
+               if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
+                       goto free;
+               policy->v.nodes = cpuset_context_nmask;
                break;
        case MPOL_PREFERRED:
-               policy->v.preferred_node = first_node(*nodes);
+               policy->v.preferred_node = first_node(cpuset_context_nmask);
                if (policy->v.preferred_node >= MAX_NUMNODES)
-                       policy->v.preferred_node = -1;
+                       goto free;
                break;
        case MPOL_BIND:
-               if (!is_valid_nodemask(nodes)) {
-                       kmem_cache_free(policy_cache, policy);
-                       return ERR_PTR(-EINVAL);
-               }
-               policy->v.nodes = *nodes;
+               if (!is_valid_nodemask(&cpuset_context_nmask))
+                       goto free;
+               policy->v.nodes = cpuset_context_nmask;
                break;
        default:
                BUG();
        }
        policy->policy = mode;
        policy->flags = flags;
-       policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
+       if (mpol_store_user_nodemask(policy))
+               policy->w.user_nodemask = *nodes;
+       else
+               policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current);
        return policy;
+
+free:
+       kmem_cache_free(policy_cache, policy);
+       return ERR_PTR(-EINVAL);
 }
 
 static void gather_stats(struct page *, void *, int pte_dirty);
 {
        struct mempolicy *new;
 
-       if (mpol_check_policy(mode, nodes))
-               return -EINVAL;
        new = mpol_new(mode, flags, nodes);
        if (IS_ERR(new))
                return PTR_ERR(new);
        mpol_free(current->mempolicy);
        current->mempolicy = new;
        mpol_set_task_struct_flag();
-       if (new && new->policy == MPOL_INTERLEAVE)
+       if (new && new->policy == MPOL_INTERLEAVE &&
+           nodes_weight(new->v.nodes))
                current->il_next = first_node(new->v.nodes);
        return 0;
 }
        if (end == start)
                return 0;
 
-       if (mpol_check_policy(mode, nmask))
-               return -EINVAL;
-
        new = mpol_new(mode, mode_flags, nmask);
        if (IS_ERR(new))
                return PTR_ERR(new);
        next = next_node(nid, policy->v.nodes);
        if (next >= MAX_NUMNODES)
                next = first_node(policy->v.nodes);
-       me->il_next = next;
+       if (next < MAX_NUMNODES)
+               me->il_next = next;
        return nid;
 }
 
                struct vm_area_struct *vma, unsigned long off)
 {
        unsigned nnodes = nodes_weight(pol->v.nodes);
-       unsigned target = (unsigned)off % nnodes;
+       unsigned target;
        int c;
        int nid = -1;
 
+       if (!nnodes)
+               return numa_node_id();
+       target = (unsigned int)off % nnodes;
        c = 0;
        do {
                nid = next_node(nid, pol->v.nodes);
        return new;
 }
 
+static int mpol_match_intent(const struct mempolicy *a,
+                            const struct mempolicy *b)
+{
+       if (a->flags != b->flags)
+               return 0;
+       if (!mpol_store_user_nodemask(a))
+               return 1;
+       return nodes_equal(a->w.user_nodemask, b->w.user_nodemask);
+}
+
 /* Slow path of a mempolicy comparison */
 int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
                return 0;
        if (a->policy != b->policy)
                return 0;
+       if (a->policy != MPOL_DEFAULT && !mpol_match_intent(a, b))
+               return 0;
        switch (a->policy) {
        case MPOL_DEFAULT:
                return 1;
 static void mpol_rebind_policy(struct mempolicy *pol,
                               const nodemask_t *newmask)
 {
-       nodemask_t *mpolmask;
        nodemask_t tmp;
+       int static_nodes;
 
        if (!pol)
                return;
-       mpolmask = &pol->cpuset_mems_allowed;
-       if (nodes_equal(*mpolmask, *newmask))
+       static_nodes = pol->flags & MPOL_F_STATIC_NODES;
+       if (!mpol_store_user_nodemask(pol) &&
+           nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
                return;
 
        switch (pol->policy) {
        case MPOL_BIND:
                /* Fall through */
        case MPOL_INTERLEAVE:
-               nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
+               if (static_nodes)
+                       nodes_and(tmp, pol->w.user_nodemask, *newmask);
+               else {
+                       nodes_remap(tmp, pol->v.nodes,
+                                   pol->w.cpuset_mems_allowed, *newmask);
+                       pol->w.cpuset_mems_allowed = *newmask;
+               }
                pol->v.nodes = tmp;
-               *mpolmask = *newmask;
-               current->il_next = node_remap(current->il_next,
-                                               *mpolmask, *newmask);
+               if (!node_isset(current->il_next, tmp)) {
+                       current->il_next = next_node(current->il_next, tmp);
+                       if (current->il_next >= MAX_NUMNODES)
+                               current->il_next = first_node(tmp);
+                       if (current->il_next >= MAX_NUMNODES)
+                               current->il_next = numa_node_id();
+               }
                break;
        case MPOL_PREFERRED:
-               pol->v.preferred_node = node_remap(pol->v.preferred_node,
-                                               *mpolmask, *newmask);
-               *mpolmask = *newmask;
+               if (static_nodes) {
+                       int node = first_node(pol->w.user_nodemask);
+
+                       if (node_isset(node, *newmask))
+                               pol->v.preferred_node = node;
+                       else
+                               pol->v.preferred_node = -1;
+               } else {
+                       pol->v.preferred_node = node_remap(pol->v.preferred_node,
+                                       pol->w.cpuset_mems_allowed, *newmask);
+                       pol->w.cpuset_mems_allowed = *newmask;
+               }
                break;
        default:
                BUG();
        int l;
        nodemask_t nodes;
        unsigned short mode = pol ? pol->policy : MPOL_DEFAULT;
+       unsigned short flags = pol ? pol->flags : 0;
 
        switch (mode) {
        case MPOL_DEFAULT:
        strcpy(p, policy_types[mode]);
        p += l;
 
+       if (flags) {
+               int need_bar = 0;
+
+               if (buffer + maxlen < p + 2)
+                       return -ENOSPC;
+               *p++ = '=';
+
+               if (flags & MPOL_F_STATIC_NODES)
+                       p += sprintf(p, "%sstatic", need_bar++ ? "|" : "");
+       }
+
        if (!nodes_empty(nodes)) {
                if (buffer + maxlen < p + 2)
                        return -ENOSPC;