Separate image depth and samples count.

Previously, multisampled images used the 'depth' member of 3D images
or 2D arrays as the number of samples. This caused rendering to a
layer of a 2D array to be interpreted as rendering to a multisampled
render target. This change adds a 'samples' member which is orthogonal
to 'depth'.

Note that write operations put the same color into each of the samples,
while read operations (still) assume multisampled images have been
resolved into the first slice.

Change-Id: Ib33a0cf8194e19fcbb569b0c257ba1e1bd9c4821
Reviewed-on: https://swiftshader-review.googlesource.com/14808
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index b15102d..b758bad 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -44,16 +44,24 @@
 
 	void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
 	{
-		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * sliceB;
+		byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
 
-		write(element, color);
+		for(int i = 0; i < samples; i++)
+		{
+			write(element, color);
+			element += sliceB;
+		}
 	}
 
 	void Surface::Buffer::write(int x, int y, const Color<float> &color)
 	{
-		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
+		byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB;
 
-		write(element, color);
+		for(int i = 0; i < samples; i++)
+		{
+			write(element, color);
+			element += sliceB;
+		}
 	}
 
 	inline void Surface::Buffer::write(void *element, const Color<float> &color)
@@ -365,7 +373,7 @@
 
 	Color<float> Surface::Buffer::read(int x, int y, int z) const
 	{
-		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * sliceB;
+		void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
 
 		return read(element);
 	}
@@ -1039,7 +1047,7 @@
 		return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
 	}
 
-	Color<float> Surface::Buffer::sample(float x, float y) const
+	Color<float> Surface::Buffer::sample(float x, float y, int layer) const
 	{
 		x -= 0.5f;
 		y -= 0.5f;
@@ -1050,10 +1058,10 @@
 		int y0 = clamp((int)y, 0, height - 1);
 		int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
 
-		Color<float> c00 = read(x0, y0);
-		Color<float> c10 = read(x1, y0);
-		Color<float> c01 = read(x0, y1);
-		Color<float> c11 = read(x1, y1);
+		Color<float> c00 = read(x0, y0, layer);
+		Color<float> c10 = read(x1, y0, layer);
+		Color<float> c01 = read(x0, y1, layer);
+		Color<float> c11 = read(x1, y1, layer);
 
 		float fx = x - x0;
 		float fy = y - y0;
@@ -1156,11 +1164,11 @@
 			case FORMAT_ATI2:
 				return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
 			default:
-				return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
+				return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB;
 			}
 		}
 
-		return 0;
+		return nullptr;
 	}
 
 	void Surface::Buffer::unlockRect()
@@ -1173,8 +1181,8 @@
 	public:
 		SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
 			: Surface(width, height, depth, format, pixels, pitch, slice) {}
-		SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchP = 0)
-			: Surface(texture, width, height, depth, border, format, lockable, renderTarget, pitchP) {}
+		SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0)
+			: Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {}
 		~SurfaceImplementation() override {};
 
 		void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
@@ -1193,9 +1201,9 @@
 		return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
 	}
 
-	Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchPprovided)
+	Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided)
 	{
-		return new SurfaceImplementation(texture, width, height, depth, border, format, lockable, renderTarget, pitchPprovided);
+		return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided);
 	}
 
 	Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
@@ -1209,6 +1217,7 @@
 		external.width = width;
 		external.height = height;
 		external.depth = depth;
+		external.samples = 1;
 		external.format = format;
 		external.bytes = bytes(external.format);
 		external.pitchB = pitch;
@@ -1223,6 +1232,7 @@
 		internal.width = width;
 		internal.height = height;
 		internal.depth = depth;
+		internal.samples = 1;
 		internal.format = selectInternalFormat(format);
 		internal.bytes = bytes(internal.format);
 		internal.pitchB = pitchB(internal.width, 0, internal.format, false);
@@ -1237,6 +1247,7 @@
 		stencil.width = width;
 		stencil.height = height;
 		stencil.depth = depth;
+		stencil.samples = 1;
 		stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
 		stencil.bytes = bytes(stencil.format);
 		stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
@@ -1251,17 +1262,19 @@
 		paletteUsed = 0;
 	}
 
-	Surface::Surface(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
+	Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
 	{
 		resource = texture ? texture : new Resource(0);
 		hasParent = texture != nullptr;
 		ownExternal = true;
 		depth = max(1, depth);
+		samples = max(1, samples);
 
 		external.buffer = nullptr;
 		external.width = width;
 		external.height = height;
 		external.depth = depth;
+		external.samples = (short)samples;
 		external.format = format;
 		external.bytes = bytes(external.format);
 		external.pitchB = pitchB(external.width, 0, external.format, renderTarget && !texture);
@@ -1276,13 +1289,14 @@
 		internal.width = width;
 		internal.height = height;
 		internal.depth = depth;
+		internal.samples = (short)samples;
 		internal.format = selectInternalFormat(format);
 		internal.bytes = bytes(internal.format);
 		internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
 		internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
 		internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
 		internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
-		internal.border = border;
+		internal.border = (short)border;
 		internal.lock = LOCK_UNLOCKED;
 		internal.dirty = false;
 
@@ -1290,6 +1304,7 @@
 		stencil.width = width;
 		stencil.height = height;
 		stencil.depth = depth;
+		stencil.samples = (short)samples;
 		stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
 		stencil.bytes = bytes(stencil.format);
 		stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
@@ -1344,7 +1359,7 @@
 			}
 			else
 			{
-				external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.format);
+				external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format);
 			}
 		}
 
@@ -1396,7 +1411,7 @@
 			}
 			else
 			{
-				internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.format);
+				internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format);
 			}
 		}
 
@@ -1471,7 +1486,7 @@
 
 		if(!stencil.buffer)
 		{
-			stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.format);
+			stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format);
 		}
 
 		return stencil.lockRect(x, y, front, LOCK_READWRITE);   // FIXME
@@ -1931,17 +1946,21 @@
 		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
 		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
-		for(int z = 0; z < destination.depth && z < source.depth; z++)
+		int depth = min(destination.depth, source.depth);
+		int height = min(destination.height, source.height);
+		int width = min(destination.width, source.width);
+
+		for(int z = 0; z < depth; z++)
 		{
 			unsigned char *sourceRow = sourceSlice;
 			unsigned char *destinationRow = destinationSlice;
 
-			for(int y = 0; y < destination.height && y < source.height; y++)
+			for(int y = 0; y < height; y++)
 			{
 				unsigned char *sourceElement = sourceRow;
 				unsigned char *destinationElement = destinationRow;
 
-				for(int x = 0; x < destination.width && x < source.width; x++)
+				for(int x = 0; x < width; x++)
 				{
 					unsigned int b = sourceElement[0];
 					unsigned int g = sourceElement[1];
@@ -1970,17 +1989,21 @@
 		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
 		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
-		for(int z = 0; z < destination.depth && z < source.depth; z++)
+		int depth = min(destination.depth, source.depth);
+		int height = min(destination.height, source.height);
+		int width = min(destination.width, source.width);
+
+		for(int z = 0; z < depth; z++)
 		{
 			unsigned char *sourceRow = sourceSlice;
 			unsigned char *destinationRow = destinationSlice;
 
-			for(int y = 0; y < destination.height && y < source.height; y++)
+			for(int y = 0; y < height; y++)
 			{
 				unsigned char *sourceElement = sourceRow;
 				unsigned char *destinationElement = destinationRow;
 
-				for(int x = 0; x < destination.width && x < source.width; x++)
+				for(int x = 0; x < width; x++)
 				{
 					unsigned int xrgb = *(unsigned short*)sourceElement;
 
@@ -2011,17 +2034,21 @@
 		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
 		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
-		for(int z = 0; z < destination.depth && z < source.depth; z++)
+		int depth = min(destination.depth, source.depth);
+		int height = min(destination.height, source.height);
+		int width = min(destination.width, source.width);
+
+		for(int z = 0; z < depth; z++)
 		{
 			unsigned char *sourceRow = sourceSlice;
 			unsigned char *destinationRow = destinationSlice;
 
-			for(int y = 0; y < destination.height && y < source.height; y++)
+			for(int y = 0; y < height; y++)
 			{
 				unsigned char *sourceElement = sourceRow;
 				unsigned char *destinationElement = destinationRow;
 
-				for(int x = 0; x < destination.width && x < source.width; x++)
+				for(int x = 0; x < width; x++)
 				{
 					unsigned int argb = *(unsigned short*)sourceElement;
 
@@ -2053,17 +2080,21 @@
 		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
 		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
-		for(int z = 0; z < destination.depth && z < source.depth; z++)
+		int depth = min(destination.depth, source.depth);
+		int height = min(destination.height, source.height);
+		int width = min(destination.width, source.width);
+
+		for(int z = 0; z < depth; z++)
 		{
 			unsigned char *sourceRow = sourceSlice;
 			unsigned char *destinationRow = destinationSlice;
 
-			for(int y = 0; y < destination.height && y < source.height; y++)
+			for(int y = 0; y < height; y++)
 			{
 				unsigned char *sourceElement = sourceRow;
 				unsigned char *destinationElement = destinationRow;
 
-				for(int x = 0; x < destination.width && x < source.width; x++)
+				for(int x = 0; x < width; x++)
 				{
 					unsigned int xrgb = *(unsigned short*)sourceElement;
 
@@ -2094,17 +2125,21 @@
 		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
 		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
-		for(int z = 0; z < destination.depth && z < source.depth; z++)
+		int depth = min(destination.depth, source.depth);
+		int height = min(destination.height, source.height);
+		int width = min(destination.width, source.width);
+
+		for(int z = 0; z < depth; z++)
 		{
 			unsigned char *sourceRow = sourceSlice;
 			unsigned char *destinationRow = destinationSlice;
 
-			for(int y = 0; y < destination.height && y < source.height; y++)
+			for(int y = 0; y < height; y++)
 			{
 				unsigned char *sourceElement = sourceRow;
 				unsigned char *destinationElement = destinationRow;
 
-				for(int x = 0; x < destination.width && x < source.width; x++)
+				for(int x = 0; x < width; x++)
 				{
 					unsigned int argb = *(unsigned short*)sourceElement;
 
@@ -2136,17 +2171,21 @@
 		unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
 		unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
 
-		for(int z = 0; z < destination.depth && z < source.depth; z++)
+		int depth = min(destination.depth, source.depth);
+		int height = min(destination.height, source.height);
+		int width = min(destination.width, source.width);
+
+		for(int z = 0; z < depth; z++)
 		{
 			unsigned char *sourceRow = sourceSlice;
 			unsigned char *destinationRow = destinationSlice;
 
-			for(int y = 0; y < destination.height && y < source.height; y++)
+			for(int y = 0; y < height; y++)
 			{
 				unsigned char *sourceElement = sourceRow;
 				unsigned char *destinationElement = destinationRow;
 
-				for(int x = 0; x < destination.width && x < source.width; x++)
+				for(int x = 0; x < width; x++)
 				{
 					unsigned int abgr = palette[*(unsigned char*)sourceElement];
 
@@ -2581,7 +2620,7 @@
 	{
 	}
 
-	unsigned int Surface::size(int width, int height, int depth, int border, Format format)
+	unsigned int Surface::size(int width, int height, int depth, int border, int samples, Format format)
 	{
 		width += 2 * border;
 		height += 2 * border;
@@ -2667,7 +2706,7 @@
 				return YSize + 2 * CSize;
 			}
 		default:
-			return bytes(format) * width * height * depth;
+			return bytes(format) * width * height * depth * samples;
 		}
 	}
 
@@ -3184,7 +3223,7 @@
 		return 1;
 	}
 
-	void *Surface::allocateBuffer(int width, int height, int depth, int border, Format format)
+	void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format)
 	{
 		// Render targets require 2x2 quads
 		int width2 = (width + 1) & ~1;
@@ -3193,7 +3232,7 @@
 		// FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
 		// and stencil operations also read 8 bytes per four 8-bit stencil values,
 		// so we have to allocate 4 extra bytes to avoid buffer overruns.
-		return allocate(size(width2, height2, depth, border, format) + 4);
+		return allocate(size(width2, height2, depth, border, samples, format) + 4);
 	}
 
 	void Surface::memfill4(void *buffer, int pattern, int bytes)
@@ -3308,7 +3347,7 @@
 		{
 			float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
 
-			for(int z = 0; z < internal.depth; z++)
+			for(int z = 0; z < internal.samples; z++)
 			{
 				for(int y = y0; y < y1; y++)
 				{
@@ -3333,7 +3372,7 @@
 			int evenX0 = ((x0 + 1) & ~1) * 2;
 			int evenBytes = (oddX1 - evenX0) * sizeof(float);
 
-			for(int z = 0; z < internal.depth; z++)
+			for(int z = 0; z < internal.samples; z++)
 			{
 				for(int y = y0; y < y1; y++)
 				{
@@ -3437,7 +3476,7 @@
 		char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
 
 		// Stencil buffers are assumed to use quad layout
-		for(int z = 0; z < stencil.depth; z++)
+		for(int z = 0; z < stencil.samples; z++)
 		{
 			for(int y = y0; y < y1; y++)
 			{
@@ -3534,7 +3573,7 @@
 		}
 	}
 
-	void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
+	void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter)
 	{
 		ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
 
@@ -3542,17 +3581,17 @@
 
 		if(!filter)
 		{
-			color = source->internal.read((int)srcX, (int)srcY);
+			color = source->internal.read((int)srcX, (int)srcY, 0);
 		}
 		else   // Bilinear filtering
 		{
-			color = source->internal.sample(srcX, srcY);
+			color = source->internal.sample(srcX, srcY, 0);
 		}
 
 		internal.write(x, y, color);
 	}
 
-	void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
+	void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
 	{
 		ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
 
@@ -3688,7 +3727,8 @@
 		       external.depth  == internal.depth &&
 		       external.pitchB == internal.pitchB &&
 		       external.sliceB == internal.sliceB &&
-		       external.border == internal.border;
+		       external.border == internal.border &&
+		       external.samples == internal.samples;
 	}
 
 	Format Surface::selectInternalFormat(Format format) const
@@ -3947,11 +3987,13 @@
 
 	void Surface::resolve()
 	{
-		if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
+		if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
 		{
 			return;
 		}
 
+		ASSERT(internal.depth == 1);  // Unimplemented
+
 		void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
 
 		int width = internal.width;
@@ -3983,7 +4025,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE2() && (width % 4) == 0)
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4001,7 +4043,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4025,7 +4067,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4061,7 +4103,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4128,7 +4170,7 @@
 			{
 				#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
 
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4146,7 +4188,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4170,7 +4212,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4206,7 +4248,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4277,7 +4319,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE2() && (width % 4) == 0)
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4295,7 +4337,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4319,7 +4361,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4355,7 +4397,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4422,7 +4464,7 @@
 			{
 				#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
 
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4440,7 +4482,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4464,7 +4506,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4500,7 +4542,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4570,7 +4612,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE2() && (width % 2) == 0)
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4588,7 +4630,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4612,7 +4654,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4648,7 +4690,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4715,7 +4757,7 @@
 			{
 				#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
 
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4733,7 +4775,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4757,7 +4799,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4793,7 +4835,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -4863,7 +4905,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE() && (width % 4) == 0)
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4882,7 +4924,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4907,7 +4949,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -4944,7 +4986,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5010,7 +5052,7 @@
 				else
 			#endif
 			{
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5029,7 +5071,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5054,7 +5096,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5091,7 +5133,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5160,7 +5202,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE() && (width % 2) == 0)
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5179,7 +5221,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5204,7 +5246,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5241,7 +5283,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5307,7 +5349,7 @@
 				else
 			#endif
 			{
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5326,7 +5368,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5351,7 +5393,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5388,7 +5430,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5457,7 +5499,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE())
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5476,7 +5518,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5501,7 +5543,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5538,7 +5580,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5604,7 +5646,7 @@
 				else
 			#endif
 			{
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5623,7 +5665,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5648,7 +5690,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5685,7 +5727,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -5754,7 +5796,7 @@
 			#if defined(__i386__) || defined(__x86_64__)
 				if(CPUID::supportsSSE2() && (width % 8) == 0)
 				{
-					if(internal.depth == 2)
+					if(internal.samples == 2)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5783,7 +5825,7 @@
 							source1 += pitch;
 						}
 					}
-					else if(internal.depth == 4)
+					else if(internal.samples == 4)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5824,7 +5866,7 @@
 							source3 += pitch;
 						}
 					}
-					else if(internal.depth == 8)
+					else if(internal.samples == 8)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -5889,7 +5931,7 @@
 							source7 += pitch;
 						}
 					}
-					else if(internal.depth == 16)
+					else if(internal.samples == 16)
 					{
 						for(int y = 0; y < height; y++)
 						{
@@ -6009,7 +6051,7 @@
 			{
 				#define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
 
-				if(internal.depth == 2)
+				if(internal.samples == 2)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -6027,7 +6069,7 @@
 						source1 += pitch;
 					}
 				}
-				else if(internal.depth == 4)
+				else if(internal.samples == 4)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -6051,7 +6093,7 @@
 						source3 += pitch;
 					}
 				}
-				else if(internal.depth == 8)
+				else if(internal.samples == 8)
 				{
 					for(int y = 0; y < height; y++)
 					{
@@ -6087,7 +6129,7 @@
 						source7 += pitch;
 					}
 				}
-				else if(internal.depth == 16)
+				else if(internal.samples == 16)
 				{
 					for(int y = 0; y < height; y++)
 					{