Added support for PCIGART for PCI(E) cards. Bumped DRM interface patchlevel.
diff --git a/shared-core/nouveau_drm.h b/shared-core/nouveau_drm.h
index 0758991..7abe82e 100644
--- a/shared-core/nouveau_drm.h
+++ b/shared-core/nouveau_drm.h
@@ -25,7 +25,7 @@
 #ifndef __NOUVEAU_DRM_H__
 #define __NOUVEAU_DRM_H__
 
-#define NOUVEAU_DRM_HEADER_PATCHLEVEL 7
+#define NOUVEAU_DRM_HEADER_PATCHLEVEL 8
 
 typedef struct drm_nouveau_fifo_alloc {
 	uint32_t     fb_ctxdma_handle;
@@ -68,11 +68,14 @@
 #define NOUVEAU_MEM_AGP			0x00000002
 #define NOUVEAU_MEM_FB_ACCEPTABLE	0x00000004
 #define NOUVEAU_MEM_AGP_ACCEPTABLE	0x00000008
-#define NOUVEAU_MEM_PINNED		0x00000010
-#define NOUVEAU_MEM_USER_BACKED		0x00000020
-#define NOUVEAU_MEM_MAPPED		0x00000040
-#define NOUVEAU_MEM_INSTANCE		0x00000080 /* internal */
-#define NOUVEAU_MEM_NOTIFIER            0x00000100 /* internal */
+#define NOUVEAU_MEM_PCI			0x00000010
+#define NOUVEAU_MEM_PCI_ACCEPTABLE	0x00000020
+#define NOUVEAU_MEM_PINNED		0x00000040
+#define NOUVEAU_MEM_USER_BACKED		0x00000080
+#define NOUVEAU_MEM_MAPPED		0x00000100
+#define NOUVEAU_MEM_INSTANCE		0x00000200 /* internal */
+#define NOUVEAU_MEM_NOTIFIER            0x00000400 /* internal */
+
 typedef struct drm_nouveau_mem_alloc {
 	int flags;
 	int alignment;
@@ -95,6 +98,7 @@
 #define NOUVEAU_GETPARAM_AGP_PHYSICAL    7
 #define NOUVEAU_GETPARAM_FB_SIZE         8
 #define NOUVEAU_GETPARAM_AGP_SIZE        9
+#define NOUVEAU_GETPARAM_PCI_PHYSICAL    10
 typedef struct drm_nouveau_getparam {
 	uint64_t param;
 	uint64_t value;
diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h
index 12b78a7..ea03fe3 100644
--- a/shared-core/nouveau_drv.h
+++ b/shared-core/nouveau_drv.h
@@ -34,7 +34,7 @@
 
 #define DRIVER_MAJOR		0
 #define DRIVER_MINOR		0
-#define DRIVER_PATCHLEVEL	7
+#define DRIVER_PATCHLEVEL	8
 
 #define NOUVEAU_FAMILY   0x0000FFFF
 #define NOUVEAU_FLAGS    0xFFFF0000
@@ -229,6 +229,7 @@
 	struct mem_block *fb_heap;
 	struct mem_block *fb_nomap_heap;
 	struct mem_block *ramin_heap;
+	struct mem_block *pci_heap;
 
         /* context table pointed to be NV_PGRAPH_CHANNEL_CTX_TABLE (0x400780) */
         uint32_t ctx_table_size;
diff --git a/shared-core/nouveau_fifo.c b/shared-core/nouveau_fifo.c
index 4095a57..bc3a994 100644
--- a/shared-core/nouveau_fifo.c
+++ b/shared-core/nouveau_fifo.c
@@ -210,11 +210,19 @@
 	}
 
 	if (cb->flags & NOUVEAU_MEM_AGP) {
-		ret = nouveau_gpuobj_dma_new
-			(dev, channel, NV_CLASS_DMA_IN_MEMORY,
-			 cb->start - dev_priv->agp_phys,
-			 cb->size, NV_DMA_ACCESS_RO, NV_DMA_TARGET_AGP,
-			 &pushbuf);
+		DRM_DEBUG("Creating CB in AGP memory\n");
+		ret = nouveau_gpuobj_dma_new(dev, channel,
+				NV_CLASS_DMA_IN_MEMORY,
+				cb->start - dev_priv->agp_phys,
+				cb->size,
+				NV_DMA_ACCESS_RO, NV_DMA_TARGET_AGP, &pushbuf);
+	} else if ( cb->flags & NOUVEAU_MEM_PCI) {
+		DRM_DEBUG("Creating CB in PCI memory starting at virt 0x%08llx size %d\n", cb->start, cb->size);
+		ret = nouveau_gpuobj_dma_new(dev, channel,
+				NV_CLASS_DMA_IN_MEMORY,
+				cb->start,
+				cb->size,
+				NV_DMA_ACCESS_RO, NV_DMA_TARGET_PCI_NONLINEAR, &pushbuf);
 	} else if (dev_priv->card_type != NV_04) {
 		ret = nouveau_gpuobj_dma_new
 			(dev, channel, NV_CLASS_DMA_IN_MEMORY,
diff --git a/shared-core/nouveau_mem.c b/shared-core/nouveau_mem.c
index c75a935..79f94fd 100644
--- a/shared-core/nouveau_mem.c
+++ b/shared-core/nouveau_mem.c
@@ -211,6 +211,10 @@
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	nouveau_mem_takedown(&dev_priv->agp_heap);
 	nouveau_mem_takedown(&dev_priv->fb_heap);
+	if ( dev_priv->pci_heap ) 
+		{
+		nouveau_mem_takedown(&dev_priv->pci_heap);
+		}
 }
 
 /* returns the amount of FB ram in bytes */
@@ -283,8 +287,10 @@
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	uint32_t fb_size;
+	drm_scatter_gather_t sgreq;
 	dev_priv->agp_phys=0;
 	dev_priv->fb_phys=0;
+	sgreq . size = 4 << 20; //4MB of PCI scatter-gather zone
 
 	/* init AGP */
 	dev_priv->agp_heap=NULL;
@@ -340,8 +346,26 @@
 		dev_priv->agp_phys		= info.aperture_base;
 		dev_priv->agp_available_size	= info.aperture_size;
 	}
-no_agp:
 
+goto have_agp;
+no_agp:
+	dev_priv->pci_heap = NULL;
+	DRM_DEBUG("Allocating sg memory for PCI DMA\n");
+	if ( drm_sg_alloc(dev, &sgreq) )
+		{
+		DRM_ERROR("Unable to allocate 4MB of scatter-gather pages for PCI DMA!");
+		goto no_pci;
+		}
+
+	DRM_DEBUG("Got %d KiB\n", (dev->sg->pages * PAGE_SIZE) >> 10);
+	if ( nouveau_mem_init_heap(&dev_priv->pci_heap, dev->sg->virtual, dev->sg->pages * PAGE_SIZE))
+		{
+		DRM_ERROR("Unable to initialize pci_heap!");	
+		goto no_pci;
+		}
+
+no_pci:
+have_agp:
 	/* setup a mtrr over the FB */
 	dev_priv->fb_mtrr = drm_mtrr_add(drm_get_resource_start(dev, 1),
 					 nouveau_mem_fb_amount(dev),
@@ -405,29 +429,40 @@
 	if (size & (~PAGE_MASK))
 		size = ((size/PAGE_SIZE) + 1) * PAGE_SIZE;
 
-	if (flags&NOUVEAU_MEM_AGP) {
-		type=NOUVEAU_MEM_AGP;
-		block = nouveau_mem_alloc_block(dev_priv->agp_heap, size,
-						alignment, filp);
-		if (block) goto alloc_ok;
-	}
-	if (flags&(NOUVEAU_MEM_FB|NOUVEAU_MEM_FB_ACCEPTABLE)) {
-		type=NOUVEAU_MEM_FB;
-		if (!(flags&NOUVEAU_MEM_MAPPED)) {
-			block = nouveau_mem_alloc_block(dev_priv->fb_nomap_heap,
-							size, alignment, filp);
-			if (block) goto alloc_ok;
-		}
-		block = nouveau_mem_alloc_block(dev_priv->fb_heap, size,
-						alignment, filp);
-		if (block) goto alloc_ok;	
-	}
-	if (flags&NOUVEAU_MEM_AGP_ACCEPTABLE) {
-		type=NOUVEAU_MEM_AGP;
-		block = nouveau_mem_alloc_block(dev_priv->agp_heap, size,
-						alignment, filp);
-		if (block) goto alloc_ok;
-	}
+
+#define NOUVEAU_MEM_ALLOC_AGP {\
+		type=NOUVEAU_MEM_AGP;\
+                block = nouveau_mem_alloc_block(dev_priv->agp_heap, size,\
+                                                alignment, filp);\
+                if (block) goto alloc_ok;\
+	        }
+
+#define NOUVEAU_MEM_ALLOC_PCI {\
+                type = NOUVEAU_MEM_PCI;\
+                block = nouveau_mem_alloc_block(dev_priv->pci_heap, size, alignment, filp);\
+                if ( block ) goto alloc_ok;\
+	        }
+
+#define NOUVEAU_MEM_ALLOC_FB {\
+                type=NOUVEAU_MEM_FB;\
+                if (!(flags&NOUVEAU_MEM_MAPPED)) {\
+                        block = nouveau_mem_alloc_block(dev_priv->fb_nomap_heap,\
+                                                        size, alignment, filp); \
+                        if (block) goto alloc_ok;\
+                }\
+                block = nouveau_mem_alloc_block(dev_priv->fb_heap, size,\
+                                                alignment, filp);\
+                if (block) goto alloc_ok;\
+	        }
+
+
+	if (flags&NOUVEAU_MEM_FB) NOUVEAU_MEM_ALLOC_FB
+	if (flags&NOUVEAU_MEM_AGP) NOUVEAU_MEM_ALLOC_AGP
+	if (flags&NOUVEAU_MEM_PCI) NOUVEAU_MEM_ALLOC_PCI
+	if (flags&NOUVEAU_MEM_FB_ACCEPTABLE) NOUVEAU_MEM_ALLOC_FB
+	if (flags&NOUVEAU_MEM_AGP_ACCEPTABLE) NOUVEAU_MEM_ALLOC_AGP
+	if (flags&NOUVEAU_MEM_PCI_ACCEPTABLE) NOUVEAU_MEM_ALLOC_PCI
+
 
 	return NULL;
 
@@ -436,15 +471,19 @@
 
 	if (flags&NOUVEAU_MEM_MAPPED)
 	{
-		int ret;
+		int ret = 0;
 		block->flags|=NOUVEAU_MEM_MAPPED;
 
 		if (type == NOUVEAU_MEM_AGP)
 			ret = drm_addmap(dev, block->start - dev->agp->base, block->size, 
 					_DRM_AGP, 0, &block->map);
-		else
+		else if (type == NOUVEAU_MEM_FB)
 			ret = drm_addmap(dev, block->start, block->size,
 					_DRM_FRAME_BUFFER, 0, &block->map);
+		else if (type == NOUVEAU_MEM_PCI)
+			ret = drm_addmap(dev, block->start - (unsigned long int)dev->sg->virtual, block->size,
+					_DRM_SCATTER_GATHER, 0, &block->map);
+
 		if (ret) { 
 			nouveau_mem_free_block(block);
 			return NULL;
diff --git a/shared-core/nouveau_object.c b/shared-core/nouveau_object.c
index bf811b4..dcb29b4 100644
--- a/shared-core/nouveau_object.c
+++ b/shared-core/nouveau_object.c
@@ -515,30 +515,6 @@
 	return 0;
 }
 
-/*
-   DMA objects are used to reference a piece of memory in the
-   framebuffer, PCI or AGP address space. Each object is 16 bytes big
-   and looks as follows:
-   
-   entry[0]
-   11:0  class (seems like I can always use 0 here)
-   12    page table present?
-   13    page entry linear?
-   15:14 access: 0 rw, 1 ro, 2 wo
-   17:16 target: 0 NV memory, 1 NV memory tiled, 2 PCI, 3 AGP
-   31:20 dma adjust (bits 0-11 of the address)
-   entry[1]
-   dma limit
-   entry[2]
-   1     0 readonly, 1 readwrite
-   31:12 dma frame address (bits 12-31 of the address)
-
-   Non linear page tables seem to need a list of frame addresses afterwards,
-   the rivatv project has some info on this.   
-
-   The method below creates a DMA object in instance RAM and returns a handle
-   to it that can be used to set up context objects.
-*/
 
 static int
 nouveau_gpuobj_class_instmem_size(drm_device_t *dev, int class)
@@ -553,6 +529,33 @@
 	return 16;
 }
 
+/*
+   DMA objects are used to reference a piece of memory in the
+   framebuffer, PCI or AGP address space. Each object is 16 bytes big
+   and looks as follows:
+   
+   entry[0]
+   11:0  class (seems like I can always use 0 here)
+   12    page table present?
+   13    page entry linear?
+   15:14 access: 0 rw, 1 ro, 2 wo
+   17:16 target: 0 NV memory, 1 NV memory tiled, 2 PCI, 3 AGP
+   31:20 dma adjust (bits 0-11 of the address)
+   entry[1]
+   dma limit (size of transfer)
+   entry[X]
+   1     0 readonly, 1 readwrite
+   31:12 dma frame address of the page (bits 12-31 of the address)
+   entry[N]
+   page table terminator, same value as the first pte, as does nvidia
+   rivatv uses 0xffffffff
+
+   Non linear page tables need a list of frame addresses afterwards,
+   the rivatv project has some info on this.
+
+   The method below creates a DMA object in instance RAM and returns a handle
+   to it that can be used to set up context objects.
+*/
 int
 nouveau_gpuobj_dma_new(drm_device_t *dev, int channel, int class,
 		       uint64_t offset, uint64_t size, int access, int target,
@@ -560,13 +563,28 @@
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int ret;
-
+	uint32_t is_scatter_gather = 0;
+	
 	DRM_DEBUG("ch%d class=0x%04x offset=0x%llx size=0x%llx\n",
 		  channel, class, offset, size);
 	DRM_DEBUG("access=%d target=%d\n", access, target);
 
+	switch (target) {
+        case NV_DMA_TARGET_AGP:
+                 offset += dev_priv->agp_phys;
+                 break;
+        case NV_DMA_TARGET_PCI_NONLINEAR:
+                /*assume the "offset" is a virtual memory address*/
+                is_scatter_gather = 1;
+                /*put back the right value*/
+                target = NV_DMA_TARGET_PCI;
+                break;
+        default:
+                break;
+        }
+	
 	ret = nouveau_gpuobj_new(dev, channel,
-				 nouveau_gpuobj_class_instmem_size(dev, class),
+				 is_scatter_gather ? ((((size + PAGE_SIZE - 1) / PAGE_SIZE) << 2) + 12) : nouveau_gpuobj_class_instmem_size(dev, class),
 				 16,
 				 NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE,
 				 gpuobj);
@@ -577,22 +595,53 @@
 
 	if (dev_priv->card_type < NV_50) {
 		uint32_t frame, adjust, pte_flags = 0;
-
-		if (target == NV_DMA_TARGET_AGP)
-			offset += dev_priv->agp_phys;
-		if (access != NV_DMA_ACCESS_RO)
-			pte_flags |= (1<<1);
-		frame  = offset & ~0x00000fff;
 		adjust = offset &  0x00000fff;
-
-		INSTANCE_WR(*gpuobj, 0, ((1<<12) | (1<<13) |
-					 (adjust << 20) |
+		if (access != NV_DMA_ACCESS_RO)
+				pte_flags |= (1<<1);
+		
+		if ( ! is_scatter_gather ) 
+			{
+			frame  = offset & ~0x00000fff;
+			
+			INSTANCE_WR(*gpuobj, 0, ((1<<12) | (1<<13) |
+					(adjust << 20) |
 					 (access << 14) |
 					 (target << 16) |
 					  class));
-		INSTANCE_WR(*gpuobj, 1, size - 1);
-		INSTANCE_WR(*gpuobj, 2, frame | pte_flags);
-		INSTANCE_WR(*gpuobj, 3, frame | pte_flags);
+			INSTANCE_WR(*gpuobj, 1, size - 1);
+			INSTANCE_WR(*gpuobj, 2, frame | pte_flags);
+			INSTANCE_WR(*gpuobj, 3, frame | pte_flags);
+			}
+		else 
+			{
+			uint32_t instance_offset;
+			uint32_t bus_addr;
+			size = (uint32_t) size;
+
+			DRM_DEBUG("Creating PCI DMA object using virtual zone starting at 0x%08x, size %d\n", (uint32_t) offset, (uint32_t)size);
+	                INSTANCE_WR(*gpuobj, 0, ((1<<12) | (0<<13) |
+                                (adjust << 20) |
+                                (access << 14) |
+                                (target << 16) |
+                                class));
+			INSTANCE_WR(*gpuobj, 1, size-1);
+
+			/*write starting at the third dword*/
+			instance_offset = 2;
+ 
+			/*for each PAGE, get its bus address, fill in the page table entry, and advance*/
+			while ( size > 0 ) {
+				bus_addr = (uint32_t) page_address(vmalloc_to_page((void *) (uint32_t) offset));
+				bus_addr |= (offset & ~PAGE_MASK);
+				bus_addr = virt_to_bus((void *)bus_addr);
+				frame = bus_addr & ~0x00000FFF;
+				INSTANCE_WR(*gpuobj, instance_offset, frame | pte_flags);
+				offset += PAGE_SIZE;
+				instance_offset ++;
+				size -= PAGE_SIZE;
+				}
+
+			}
 	} else {
 		INSTANCE_WR(*gpuobj, 0, 0x00190000 | class);
 		INSTANCE_WR(*gpuobj, 1, offset + size - 1);
@@ -804,24 +853,38 @@
 		return ret;
 	}
 
-	/* non-AGP unimplemented */
-	if (dev_priv->agp_heap == NULL)
-		return 0;
-
-	/* GART ctxdma */
-	if ((ret = nouveau_gpuobj_dma_new(dev, channel, NV_CLASS_DMA_IN_MEMORY,
-					  0, dev_priv->agp_available_size,
-					  NV_DMA_ACCESS_RW, NV_DMA_TARGET_AGP,
-					  &tt))) {
-		DRM_ERROR("Error creating TT ctxdma: %d\n", ret);
-		return ret;
+	if (dev_priv->agp_heap) {
+		/* AGPGART ctxdma */
+		if ((ret = nouveau_gpuobj_dma_new(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+						   0, dev_priv->agp_available_size,
+						   NV_DMA_ACCESS_RW,
+						   NV_DMA_TARGET_AGP, &tt))) {
+			DRM_ERROR("Error creating AGP TT ctxdma: %d\n", DRM_ERR(ENOMEM));
+			return DRM_ERR(ENOMEM);
+		}
+	
+		ret = nouveau_gpuobj_ref_add(dev, channel, tt_h, tt, NULL);
+		if (ret) {
+			DRM_ERROR("Error referencing AGP TT ctxdma: %d\n", ret);
+			return ret;
+		}
 	}
-
-	if ((ret = nouveau_gpuobj_ref_add(dev, channel, tt_h, tt, NULL))) {
-		DRM_ERROR("Error referencing TT ctxdma: %d\n", ret);
-		return ret;
+	else {
+		/*PCI*/
+		if((ret = nouveau_gpuobj_dma_new(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+						   (unsigned int) dev->sg->virtual, dev->sg->pages * PAGE_SIZE,
+						   NV_DMA_ACCESS_RW,
+						   NV_DMA_TARGET_PCI_NONLINEAR, &tt))) {
+			DRM_ERROR("Error creating PCI TT ctxdma: %d\n", DRM_ERR(ENOMEM));
+			return DRM_ERR(ENOMEM);
+		}
+	
+		ret = nouveau_gpuobj_ref_add(dev, channel, tt_h, tt, NULL);
+		if (ret) {
+			DRM_ERROR("Error referencing PCI TT ctxdma: %d\n", ret);
+			return ret;
+		}
 	}
-
 	return 0;
 }
 
diff --git a/shared-core/nouveau_reg.h b/shared-core/nouveau_reg.h
index c2ebc71..a66d2d3 100644
--- a/shared-core/nouveau_reg.h
+++ b/shared-core/nouveau_reg.h
@@ -39,6 +39,8 @@
 #define NV_DMA_TARGET_VIDMEM 0
 #define NV_DMA_TARGET_PCI    2
 #define NV_DMA_TARGET_AGP    3
+/*The following is not a real value used by nvidia cards, it's changed by nouveau_object_dma_create*/
+#define NV_DMA_TARGET_PCI_NONLINEAR   8
 
 /* Some object classes we care about in the drm */
 #define NV_CLASS_DMA_FROM_MEMORY                           0x00000002
diff --git a/shared-core/nouveau_state.c b/shared-core/nouveau_state.c
index c51d7d5..14b33a4 100644
--- a/shared-core/nouveau_state.c
+++ b/shared-core/nouveau_state.c
@@ -348,6 +348,7 @@
 	nouveau_fifo_cleanup(dev, filp);
 	nouveau_mem_release(filp,dev_priv->fb_heap);
 	nouveau_mem_release(filp,dev_priv->agp_heap);
+	nouveau_mem_release(filp,dev_priv->pci_heap);
 }
 
 /* first module load, setup the mmio/fb mapping */
@@ -442,6 +443,15 @@
 	case NOUVEAU_GETPARAM_AGP_PHYSICAL:
 		getparam.value=dev_priv->agp_phys;
 		break;
+	case NOUVEAU_GETPARAM_PCI_PHYSICAL:
+		if ( dev -> sg )
+			getparam.value=dev->sg->virtual;
+		else 
+		     {
+		     DRM_ERROR("Requested PCIGART address, while no PCIGART was created\n");
+		     DRM_ERR(EINVAL);
+		     }
+		break;
 	case NOUVEAU_GETPARAM_FB_SIZE:
 		getparam.value=dev_priv->fb_available_size;
 		break;
@@ -472,6 +482,8 @@
 		switch (setparam.value) {
 		case NOUVEAU_MEM_AGP:
 		case NOUVEAU_MEM_FB:
+		case NOUVEAU_MEM_PCI:
+		case NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI_ACCEPTABLE:
 			break;
 		default:
 			DRM_ERROR("invalid CMDBUF_LOCATION value=%lld\n",