struct sw_rx_page {
        struct page     *page;
        DEFINE_DMA_UNMAP_ADDR(mapping);
+       unsigned int    offset;
 };
 
 union db_prod {
 
 #define PAGES_PER_SGE_SHIFT    0
 #define PAGES_PER_SGE          (1 << PAGES_PER_SGE_SHIFT)
-#define SGE_PAGE_SIZE          PAGE_SIZE
-#define SGE_PAGE_SHIFT         PAGE_SHIFT
-#define SGE_PAGE_ALIGN(addr)   PAGE_ALIGN((typeof(PAGE_SIZE))(addr))
+#define SGE_PAGE_SHIFT         12
+#define SGE_PAGE_SIZE          (1 << SGE_PAGE_SHIFT)
+#define SGE_PAGE_MASK          (~(SGE_PAGE_SIZE - 1))
+#define SGE_PAGE_ALIGN(addr)   (((addr) + SGE_PAGE_SIZE - 1) & SGE_PAGE_MASK)
 #define SGE_PAGES              (SGE_PAGE_SIZE * PAGES_PER_SGE)
 #define TPA_AGG_SIZE           min_t(u32, (min_t(u32, 8, MAX_SKB_FRAGS) * \
                                            SGE_PAGES), 0xffff)
        TPA_MODE_GRO
 };
 
+struct bnx2x_alloc_pool {
+       struct page     *page;
+       dma_addr_t      dma;
+       unsigned int    offset;
+};
+
 struct bnx2x_fastpath {
        struct bnx2x            *bp; /* parent */
 
             4 (for the digits and to make it DWORD aligned) */
 #define FP_NAME_SIZE           (sizeof(((struct net_device *)0)->name) + 8)
        char                    name[FP_NAME_SIZE];
+
+       struct bnx2x_alloc_pool page_pool;
 };
 
 #define bnx2x_fp(bp, nr, var)  ((bp)->fp[(nr)].var)
 
 static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp,
                              u16 index, gfp_t gfp_mask)
 {
-       struct page *page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT);
        struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
        struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
+       struct bnx2x_alloc_pool *pool = &fp->page_pool;
        dma_addr_t mapping;
 
-       if (unlikely(page == NULL)) {
-               BNX2X_ERR("Can't alloc sge\n");
-               return -ENOMEM;
-       }
+       if (!pool->page || (PAGE_SIZE - pool->offset) < SGE_PAGE_SIZE) {
 
-       mapping = dma_map_page(&bp->pdev->dev, page, 0,
-                              SGE_PAGES, DMA_FROM_DEVICE);
-       if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
-               __free_pages(page, PAGES_PER_SGE_SHIFT);
-               BNX2X_ERR("Can't map sge\n");
-               return -ENOMEM;
+               /* put page reference used by the memory pool, since we
+                * won't be using this page as the mempool anymore.
+                */
+               if (pool->page)
+                       put_page(pool->page);
+
+               pool->page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT);
+               if (unlikely(!pool->page)) {
+                       BNX2X_ERR("Can't alloc sge\n");
+                       return -ENOMEM;
+               }
+
+               pool->dma = dma_map_page(&bp->pdev->dev, pool->page, 0,
+                                        PAGE_SIZE, DMA_FROM_DEVICE);
+               if (unlikely(dma_mapping_error(&bp->pdev->dev,
+                                              pool->dma))) {
+                       __free_pages(pool->page, PAGES_PER_SGE_SHIFT);
+                       pool->page = NULL;
+                       BNX2X_ERR("Can't map sge\n");
+                       return -ENOMEM;
+               }
+               pool->offset = 0;
        }
 
-       sw_buf->page = page;
+       get_page(pool->page);
+       sw_buf->page = pool->page;
+       sw_buf->offset = pool->offset;
+
+       mapping = pool->dma + sw_buf->offset;
        dma_unmap_addr_set(sw_buf, mapping, mapping);
 
        sge->addr_hi = cpu_to_le32(U64_HI(mapping));
        sge->addr_lo = cpu_to_le32(U64_LO(mapping));
 
+       pool->offset += SGE_PAGE_SIZE;
+
        return 0;
 }
 
                        return err;
                }
 
-               /* Unmap the page as we're going to pass it to the stack */
-               dma_unmap_page(&bp->pdev->dev,
-                              dma_unmap_addr(&old_rx_pg, mapping),
-                              SGE_PAGES, DMA_FROM_DEVICE);
+               dma_unmap_single(&bp->pdev->dev,
+                                dma_unmap_addr(&old_rx_pg, mapping),
+                                SGE_PAGE_SIZE, DMA_FROM_DEVICE);
                /* Add one frag and update the appropriate fields in the skb */
                if (fp->mode == TPA_MODE_LRO)
-                       skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
+                       skb_fill_page_desc(skb, j, old_rx_pg.page,
+                                          old_rx_pg.offset, frag_len);
                else { /* GRO */
                        int rem;
                        int offset = 0;
                        for (rem = frag_len; rem > 0; rem -= gro_size) {
                                int len = rem > gro_size ? gro_size : rem;
                                skb_fill_page_desc(skb, frag_id++,
-                                                  old_rx_pg.page, offset, len);
+                                                  old_rx_pg.page,
+                                                  old_rx_pg.offset + offset,
+                                                  len);
                                if (offset)
                                        get_page(old_rx_pg.page);
                                offset += len;
 
        if (!page)
                return;
 
-       dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
-                      SGE_PAGES, DMA_FROM_DEVICE);
-       __free_pages(page, PAGES_PER_SGE_SHIFT);
+       /* Since many fragments can share the same page, make sure to
+        * only unmap and free the page once.
+        */
+       dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping),
+                        SGE_PAGE_SIZE, DMA_FROM_DEVICE);
+
+       put_page(page);
 
        sw_buf->page = NULL;
        sge->addr_hi = 0;
        ((u8 *)fw_lo)[1]  = mac[4];
 }
 
+static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp,
+                                         struct bnx2x_alloc_pool *pool)
+{
+       if (!pool->page)
+               return;
+
+       /* Page was not fully fragmented.  Unmap unused space */
+       if (pool->offset < PAGE_SIZE) {
+               dma_addr_t dma = pool->dma + pool->offset;
+               int size = PAGE_SIZE - pool->offset;
+
+               dma_unmap_single(&bp->pdev->dev, dma, size, DMA_FROM_DEVICE);
+       }
+
+       put_page(pool->page);
+
+       pool->page = NULL;
+}
+
 static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp,
                                           struct bnx2x_fastpath *fp, int last)
 {
 
        for (i = 0; i < last; i++)
                bnx2x_free_rx_sge(bp, fp, i);
+
+       bnx2x_free_rx_mem_pool(bp, &fp->page_pool);
 }
 
 static inline void bnx2x_set_next_page_rx_bd(struct bnx2x_fastpath *fp)