Fix problems in x64 build that were discovered by the testsuite:
- Reenable modules on x64 that had been disabled aeons ago for Itanium.
- Cleared up confusion about compilers for 64 bit windows.  There is only Itanium and x64.  Added macros MS_WINI64 and MS_WINX64 for those rare cases where it matters, such as the disabling of modules above.
- Set target platform (_WIN32_WINNT and WINVER) to 0x0501 (XP) for x64, and 0x0400 (NT 4.0) otherwise, which are the targeted minimum platforms.
- Fixed thread_nt.h.  The emulated InterlockedCompareExchange function didn´t work on x64, probaby due to the lack of a "volatile" specifier.  Anyway, win95 is no longer a target platform.
- Itertools module used wrong constant to check for overflow in count()
- PyInt_AsSsize_t couldn't deal with attribute error when accessing the __long__ member.
- PyLong_FromSsize_t() incorrectly specified that the operand were unsigned.

With these changes, the x64 passes the testsuite, for those modules present.
diff --git a/Python/thread_nt.h b/Python/thread_nt.h
index 27fca72..9445529 100644
--- a/Python/thread_nt.h
+++ b/Python/thread_nt.h
@@ -15,72 +15,16 @@
 	HANDLE hevent ;
 } NRMUTEX, *PNRMUTEX ;
 
-typedef PVOID WINAPI interlocked_cmp_xchg_t(PVOID *dest, PVOID exc, PVOID comperand) ;
-
-/* Sorry mate, but we haven't got InterlockedCompareExchange in Win95! */
-static PVOID WINAPI
-interlocked_cmp_xchg(PVOID *dest, PVOID exc, PVOID comperand)
-{
-	static LONG spinlock = 0 ;
-	PVOID result ;
-	DWORD dwSleep = 0;
-
-	/* Acqire spinlock (yielding control to other threads if cant aquire for the moment) */
-	while(InterlockedExchange(&spinlock, 1))
-	{
-		// Using Sleep(0) can cause a priority inversion.
-		// Sleep(0) only yields the processor if there's
-		// another thread of the same priority that's
-		// ready to run.  If a high-priority thread is
-		// trying to acquire the lock, which is held by
-		// a low-priority thread, then the low-priority
-		// thread may never get scheduled and hence never
-		// free the lock.  NT attempts to avoid priority
-		// inversions by temporarily boosting the priority
-		// of low-priority runnable threads, but the problem
-		// can still occur if there's a medium-priority
-		// thread that's always runnable.  If Sleep(1) is used,
-		// then the thread unconditionally yields the CPU.  We
-		// only do this for the second and subsequent even
-		// iterations, since a millisecond is a long time to wait
-		// if the thread can be scheduled in again sooner
-		// (~100,000 instructions).
-		// Avoid priority inversion: 0, 1, 0, 1,...
-		Sleep(dwSleep);
-		dwSleep = !dwSleep;
-	}
-	result = *dest ;
-	if (result == comperand)
-		*dest = exc ;
-	/* Release spinlock */
-	spinlock = 0 ;
-	return result ;
-} ;
-
-static interlocked_cmp_xchg_t *ixchg;
 
 BOOL
 InitializeNonRecursiveMutex(PNRMUTEX mutex)
 {
-	if (!ixchg)
-	{
-		/* Sorely, Win95 has no InterlockedCompareExchange API (Win98 has), so we have to use emulation */
-		HANDLE kernel = GetModuleHandle("kernel32.dll") ;
-		if (!kernel || (ixchg = (interlocked_cmp_xchg_t *)GetProcAddress(kernel, "InterlockedCompareExchange")) == NULL)
-			ixchg = interlocked_cmp_xchg ;
-	}
-
 	mutex->owned = -1 ;  /* No threads have entered NonRecursiveMutex */
 	mutex->thread_id = 0 ;
 	mutex->hevent = CreateEvent(NULL, FALSE, FALSE, NULL) ;
 	return mutex->hevent != NULL ;	/* TRUE if the mutex is created */
 }
 
-#ifdef InterlockedCompareExchange
-#undef InterlockedCompareExchange
-#endif
-#define InterlockedCompareExchange(dest,exchange,comperand) (ixchg((dest), (exchange), (comperand)))
-
 VOID
 DeleteNonRecursiveMutex(PNRMUTEX mutex)
 {
@@ -98,7 +42,7 @@
 	/* InterlockedIncrement(&mutex->owned) == 0 means that no thread currently owns the mutex */
 	if (!wait)
 	{
-		if (InterlockedCompareExchange((PVOID *)&mutex->owned, (PVOID)0, (PVOID)-1) != (PVOID)-1)
+		if (InterlockedCompareExchange(&mutex->owned, 0, -1) != -1)
 			return WAIT_TIMEOUT ;
 		ret = WAIT_OBJECT_0 ;
 	}
@@ -196,7 +140,10 @@
 	if (obj.done == NULL)
 		return -1;
 
-	rv = _beginthread(bootstrap, _pythread_stacksize, &obj);
+	rv = _beginthread(bootstrap,
+			  Py_SAFE_DOWNCAST(_pythread_stacksize,
+					   Py_ssize_t, int),
+			  &obj);
 	if (rv == (Py_uintptr_t)-1) {
 		/* I've seen errno == EAGAIN here, which means "there are
 		 * too many threads".