On rmmod path, cfq/as waits to make sure all io-contexts was
freed. However, it's using complete(), not wait_for_completion().
I think barrier() is not enough in here. To avoid the following case,
this patch replaces barrier() with smb_wmb().
	cpu0			visibility			cpu1
	                [ioc_gnone=NULL,ioc_count=1]
ioc_gnone = &all_gone		NULL,ioc_count=1
atomic_read(&ioc_count)		NULL,ioc_count=1
wait_for_completion()		NULL,ioc_count=0	atomic_sub_and_test()
				NULL,ioc_count=0	if ( && ioc_gone)
						    [ioc_gone==NULL,
						    so doesn't call complete()]
			   &all_gone,ioc_count=0
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Jens Axboe <axboe@suse.de>
        DECLARE_COMPLETION(all_gone);
        elv_unregister(&iosched_as);
        ioc_gone = &all_gone;
-       barrier();
+       /* ioc_gone's update must be visible before reading ioc_count */
+       smp_wmb();
        if (atomic_read(&ioc_count))
-               complete(ioc_gone);
+               wait_for_completion(ioc_gone);
        synchronize_rcu();
        kmem_cache_destroy(arq_pool);
 }
 
        DECLARE_COMPLETION(all_gone);
        elv_unregister(&iosched_cfq);
        ioc_gone = &all_gone;
-       barrier();
+       /* ioc_gone's update must be visible before reading ioc_count */
+       smp_wmb();
        if (atomic_read(&ioc_count))
-               complete(ioc_gone);
+               wait_for_completion(ioc_gone);
        synchronize_rcu();
        cfq_slab_kill();
 }