Merge pull request #531 from soby-mathew/sm/multicluster_fvp

Allow multi cluster topology definitions for ARM platforms
diff --git a/bl31/runtime_svc.c b/bl31/runtime_svc.c
index 5b7a21c..f011f11 100644
--- a/bl31/runtime_svc.c
+++ b/bl31/runtime_svc.c
@@ -103,8 +103,8 @@
 		 */
 		rc = validate_rt_svc_desc(&rt_svc_descs[index]);
 		if (rc) {
-			ERROR("Invalid runtime service descriptor 0x%lx (%s)\n",
-					(uintptr_t) &rt_svc_descs[index],
+			ERROR("Invalid runtime service descriptor %p (%s)\n",
+					(void *) &rt_svc_descs[index],
 					rt_svc_descs[index].name);
 			goto error;
 		}
diff --git a/common/bl_common.c b/common/bl_common.c
index 0eeef83..d5b095a 100644
--- a/common/bl_common.c
+++ b/common/bl_common.c
@@ -229,7 +229,8 @@
 		return io_result;
 	}
 
-	INFO("Loading image id=%u at address 0x%lx\n", image_id, image_base);
+	INFO("Loading image id=%u at address %p\n", image_id,
+		(void *) image_base);
 
 	/* Find the size of the image */
 	io_result = io_size(image_handle, &image_size);
@@ -242,8 +243,8 @@
 	/* Check that the memory where the image will be loaded is free */
 	if (!is_mem_free(mem_layout->free_base, mem_layout->free_size,
 			 image_base, image_size)) {
-		WARN("Failed to reserve memory: 0x%lx - 0x%lx\n",
-			image_base, image_base + image_size);
+		WARN("Failed to reserve memory: %p - %p\n", (void *) image_base,
+		     (void *) (image_base + image_size));
 		dump_load_info(image_base, image_size, mem_layout);
 		io_result = -ENOMEM;
 		goto exit;
@@ -268,8 +269,8 @@
 		reserve_mem(&mem_layout->free_base, &mem_layout->free_size,
 				image_base, image_size);
 	} else {
-		INFO("Skip reserving memory: 0x%lx - 0x%lx\n",
-				image_base, image_base + image_size);
+		INFO("Skip reserving memory: %p - %p\n", (void *) image_base,
+		     (void *) (image_base + image_size));
 	}
 
 	image_data->image_base = image_base;
@@ -284,8 +285,8 @@
 	 */
 	flush_dcache_range(image_base, image_size);
 
-	INFO("Image id=%u loaded: 0x%lx - 0x%lx\n", image_id, image_base,
-	     image_base + image_size);
+	INFO("Image id=%u loaded: %p - %p\n", image_id, (void *) image_base,
+	     (void *) (image_base + image_size));
 
 exit:
 	io_close(image_handle);
diff --git a/common/tf_printf.c b/common/tf_printf.c
index c68b990..c1d4188 100644
--- a/common/tf_printf.c
+++ b/common/tf_printf.c
@@ -68,6 +68,7 @@
  * %u - unsigned 32 bit decimal format
  * %ld and %lld - signed 64 bit decimal format
  * %lu and %llu - unsigned 64 bit decimal format
+ * %p - pointer format
  * Exits on all other formats.
  *******************************************************************/
 
@@ -107,6 +108,14 @@
 				str = va_arg(args, char *);
 				string_print(str);
 				break;
+			case 'p':
+				unum = (uint64_t)va_arg(args, void *);
+
+				if (unum)
+					string_print("0x");
+
+				unsigned_num_print(unum, 16);
+				break;
 			case 'x':
 				if (bit64)
 					unum = va_arg(args, uint64_t);
diff --git a/docs/cpu-specific-build-macros.md b/docs/cpu-specific-build-macros.md
index e7185aa..c57dc7e 100644
--- a/docs/cpu-specific-build-macros.md
+++ b/docs/cpu-specific-build-macros.md
@@ -85,8 +85,12 @@
 
 *    `A57_DISABLE_NON_TEMPORAL_HINT`: This flag has the same behaviour as
      `A53_DISABLE_NON_TEMPORAL_HINT` but for Cortex-A57. This needs to be
-     enabled only for revisions <= r1p2 of the CPU and is enabled by default.
+     enabled only for revisions <= r1p2 of the CPU and is enabled by default,
+     as recommended in section "4.7 Non-Temporal Loads/Stores" of the
+     [Cortex-A57 Software Optimization Guide][A57 SW Optimization Guide].
 
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
 _Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved._
+
+[A57 SW Optimization Guide]: http://infocenter.arm.com/help/topic/com.arm.doc.uan0015b/Cortex_A57_Software_Optimization_Guide_external.pdf
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index d0cb399..cd8c7aa 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -1194,6 +1194,72 @@
 on FVP, BL31 and TSP need to know the limit address that their PROGBITS
 sections must not overstep. The platform code must provide those.
 
+Trusted Firmware provides a mechanism to verify at boot time that the memory
+to load a new image is free to prevent overwriting a previously loaded image.
+For this mechanism to work, the platform must specify the memory available in
+the system as regions, where each region consists of base address, total size
+and the free area within it (as defined in the `meminfo_t` structure). Trusted
+Firmware retrieves these memory regions by calling the corresponding platform
+API:
+
+*   `meminfo_t *bl1_plat_sec_mem_layout(void)`
+*   `meminfo_t *bl2_plat_sec_mem_layout(void)`
+*   `void bl2_plat_get_scp_bl2_meminfo(meminfo_t *scp_bl2_meminfo)`
+*   `void bl2_plat_get_bl32_meminfo(meminfo_t *bl32_meminfo)`
+*   `void bl2_plat_get_bl33_meminfo(meminfo_t *bl33_meminfo)`
+
+For example, in the case of BL1 loading BL2, `bl1_plat_sec_mem_layout()` will
+return the region defined by the platform where BL1 intends to load BL2. The
+`load_image()` function will check that the memory where BL2 will be loaded is
+within the specified region and marked as free.
+
+The actual number of regions and their base addresses and sizes is platform
+specific. The platform may return the same region or define a different one for
+each API. However, the overlap verification mechanism applies only to a single
+region. Hence, it is the platform responsibility to guarantee that different
+regions do not overlap, or that if they do, the overlapping images are not
+accessed at the same time. This could be used, for example, to load temporary
+images (e.g. certificates) or firmware images prior to being transfered to its
+corresponding processor (e.g. the SCP BL2 image).
+
+To reduce fragmentation and simplify the tracking of free memory, all the free
+memory within a region is always located in one single buffer defined by its
+base address and size. Trusted Firmware implements a top/bottom load approach:
+after a new image is loaded, it checks how much memory remains free above and
+below the image. The smallest area is marked as unavailable, while the larger
+area becomes the new free memory buffer. Platforms should take this behaviour
+into account when defining the base address for each of the images. For example,
+if an image is loaded near the middle of the region, small changes in image size
+could cause a flip between a top load and a bottom load, which may result in an
+unexpected memory layout.
+
+The following diagram is an example of an image loaded in the bottom part of
+the memory region. The region is initially free (nothing has been loaded yet):
+
+               Memory region
+               +----------+
+               |          |
+               |          |  <<<<<<<<<<<<<  Free
+               |          |
+               |----------|                 +------------+
+               |  image   |  <<<<<<<<<<<<<  |   image    |
+               |----------|                 +------------+
+               | xxxxxxxx |  <<<<<<<<<<<<<  Marked as unavailable
+               +----------+
+
+And the following diagram is an example of an image loaded in the top part:
+
+               Memory region
+               +----------+
+               | xxxxxxxx |  <<<<<<<<<<<<<  Marked as unavailable
+               |----------|                 +------------+
+               |  image   |  <<<<<<<<<<<<<  |   image    |
+               |----------|                 +------------+
+               |          |
+               |          |  <<<<<<<<<<<<<  Free
+               |          |
+               +----------+
+
 
 ####  Memory layout on ARM development platforms
 
@@ -1229,9 +1295,47 @@
     *   Secure region of DRAM (top 16MB of DRAM configured by the TrustZone
         controller)
 
-When BL32 is loaded into Trusted SRAM, its NOBITS sections are allowed to
-overlay BL2. This memory layout is designed to give the BL32 image as much
-memory as possible when it is loaded into Trusted SRAM.
+    When BL32 is loaded into Trusted SRAM, its NOBITS sections are allowed to
+    overlay BL2. This memory layout is designed to give the BL32 image as much
+    memory as possible when it is loaded into Trusted SRAM.
+
+The memory regions for the overlap detection mechanism at boot time are
+defined as follows (shown per API):
+
+*   `meminfo_t *bl1_plat_sec_mem_layout(void)`
+
+    This region corresponds to the whole Trusted SRAM except for the shared
+    memory at the base. This region is initially free. At boot time, BL1 will
+    mark the BL1(rw) section within this region as occupied. The BL1(rw) section
+    is placed at the top of Trusted SRAM.
+
+*   `meminfo_t *bl2_plat_sec_mem_layout(void)`
+
+    This region corresponds to the whole Trusted SRAM as defined by
+    `bl1_plat_sec_mem_layout()`, but with the BL1(rw) section marked as
+    occupied. This memory region is used to check that BL2 and BL31 do not
+    overlap with each other. BL2_BASE and BL1_RW_BASE are carefully chosen so
+    that the memory for BL31 is top loaded above BL2.
+
+*   `void bl2_plat_get_scp_bl2_meminfo(meminfo_t *scp_bl2_meminfo)`
+
+    This region is an exact copy of the region defined by
+    `bl2_plat_sec_mem_layout()`. Being a disconnected copy means that all the
+    changes made to this region by the Trusted Firmware will not be propagated.
+    This approach is valid because the SCP BL2 image is loaded temporarily
+    while it is being transferred to the SCP, so this memory is reused
+    afterwards.
+
+*   `void bl2_plat_get_bl32_meminfo(meminfo_t *bl32_meminfo)`
+
+    This region depends on the location of the BL32 image. Currently, ARM
+    platforms support three different locations (detailed below): Trusted SRAM,
+    Trusted DRAM and the TZC-Secured DRAM.
+
+*   `void bl2_plat_get_bl33_meminfo(meminfo_t *bl33_meminfo)`
+
+    This region corresponds to the Non-Secure DDR-DRAM, excluding the
+    TZC-Secured area.
 
 The location of the BL32 image will result in different memory maps. This is
 illustrated for both FVP and Juno in the following diagrams, using the TSP as
diff --git a/docs/user-guide.md b/docs/user-guide.md
index 0db622b..70e1abc 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -413,6 +413,25 @@
     any register that is not part of the SBSA generic UART specification.
     Default value is 0 (a full PL011 compliant UART is present).
 
+*   `CTX_INCLUDE_FPREGS`: Boolean option that, when set to 1, will cause the FP
+    registers to be included when saving and restoring the CPU context. Default
+    is 0.
+
+*   `DISABLE_PEDANTIC`: When set to 1 it will disable the -pedantic option in
+    the GCC command line. Default is 0.
+
+*   `BUILD_STRING`: Input string for VERSION_STRING, which allows the TF build
+    to be uniquely identified. Defaults to the current git commit id.
+
+*   `VERSION_STRING`: String used in the log output for each TF image. Defaults
+    to a string formed by concatenating the version number, build type and build
+    string.
+
+*   `BUILD_MESSAGE_TIMESTAMP`: String used to identify the time and date of the
+    compilation of each build. It must be set to a C string (including quotes
+    where applicable). Defaults to a string that contains the time and date of
+    the compilation.
+
 #### ARM development platform specific build options
 
 *   `ARM_TSP_RAM_LOCATION`: location of the TSP binary. Options:
@@ -1006,14 +1025,15 @@
 This version of the ARM Trusted Firmware has been tested on the following ARM
 FVPs (64-bit versions only).
 
-*   `Foundation_Platform` (Version 9.4, Build 9.4.59)
-*   `FVP_Base_AEMv8A-AEMv8A` (Version 7.0, Build 0.8.7004)
-*   `FVP_Base_Cortex-A57x4-A53x4` (Version 7.0, Build 0.8.7004)
-*   `FVP_Base_Cortex-A57x1-A53x1` (Version 7.0, Build 0.8.7004)
-*   `FVP_Base_Cortex-A57x2-A53x4` (Version 7.0, Build 0.8.7004)
+*   `Foundation_Platform` (Version 9.5, Build 9.5.40)
+*   `FVP_Base_AEMv8A-AEMv8A` (Version 7.2, Build 0.8.7202)
+*   `FVP_Base_Cortex-A57x4-A53x4` (Version 7.2, Build 0.8.7202)
+*   `FVP_Base_Cortex-A57x1-A53x1` (Version 7.2, Build 0.8.7202)
+*   `FVP_Base_Cortex-A57x2-A53x4` (Version 7.2, Build 0.8.7202)
 
 NOTE: The build numbers quoted above are those reported by launching the FVP
-with the `--version` parameter.
+with the `--version` parameter. `Foundation_Platform` tarball for `--version`
+9.5.40 is labeled as version 9.5.41.
 
 NOTE: The software will not work on Version 1.0 of the Foundation FVP.
 The commands below would report an `unhandled argument` error in this case.
diff --git a/drivers/arm/gic/gic_v3.c b/drivers/arm/gic/gic_v3.c
index f429662..11185b2 100644
--- a/drivers/arm/gic/gic_v3.c
+++ b/drivers/arm/gic/gic_v3.c
@@ -60,8 +60,8 @@
 			/* Disable this print for now as it appears every time
 			 * when using PSCI CPU_SUSPEND.
 			 * TODO: Print this only the first time for each CPU.
-			 * INFO("GICv3 - Found RDIST for MPIDR(0x%lx) at 0x%lx\n",
-			 *	mpidr, addr);
+			 * INFO("GICv3 - Found RDIST for MPIDR(0x%lx) at %p\n",
+			 *	mpidr, (void *) addr);
 			 */
 			return addr;
 		}
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 067b830..d1ad31d 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -194,6 +194,8 @@
 DEFINE_SYSOP_FUNC(sev)
 DEFINE_SYSOP_TYPE_FUNC(dsb, sy)
 DEFINE_SYSOP_TYPE_FUNC(dmb, sy)
+DEFINE_SYSOP_TYPE_FUNC(dmb, st)
+DEFINE_SYSOP_TYPE_FUNC(dmb, ld)
 DEFINE_SYSOP_TYPE_FUNC(dsb, ish)
 DEFINE_SYSOP_TYPE_FUNC(dmb, ish)
 DEFINE_SYSOP_FUNC(isb)
diff --git a/include/plat/arm/common/arm_def.h b/include/plat/arm/common/arm_def.h
index f6c090f..dab7545 100644
--- a/include/plat/arm/common/arm_def.h
+++ b/include/plat/arm/common/arm_def.h
@@ -150,14 +150,10 @@
 #define ARM_G0_IRQS			ARM_IRQ_SEC_SGI_0,		\
 					ARM_IRQ_SEC_SGI_6
 
-#define ARM_SHARED_RAM_ATTR		((PLAT_ARM_SHARED_RAM_CACHED ?	\
-						MT_MEMORY : MT_DEVICE)	\
-						| MT_RW | MT_SECURE)
-
 #define ARM_MAP_SHARED_RAM		MAP_REGION_FLAT(		\
 						ARM_SHARED_RAM_BASE,	\
 						ARM_SHARED_RAM_SIZE,	\
-						ARM_SHARED_RAM_ATTR)
+						MT_DEVICE | MT_RW | MT_SECURE)
 
 #define ARM_MAP_NS_DRAM1		MAP_REGION_FLAT(		\
 						ARM_NS_DRAM1_BASE,	\
diff --git a/include/plat/arm/css/common/css_def.h b/include/plat/arm/css/common/css_def.h
index 3d6884a..f92126b 100644
--- a/include/plat/arm/css/common/css_def.h
+++ b/include/plat/arm/css/common/css_def.h
@@ -37,8 +37,6 @@
 /*************************************************************************
  * Definitions common to all ARM Compute SubSystems (CSS)
  *************************************************************************/
-#define MHU_PAYLOAD_CACHED		0
-
 #define NSROM_BASE			0x1f000000
 #define NSROM_SIZE			0x00001000
 
@@ -141,8 +139,6 @@
 #define SCP_BL2U_BASE			BL31_BASE
 #endif /* CSS_LOAD_SCP_IMAGES */
 
-#define PLAT_ARM_SHARED_RAM_CACHED	MHU_PAYLOAD_CACHED
-
 /* Load address of Non-Secure Image for CSS platform ports */
 #define PLAT_ARM_NS_IMAGE_OFFSET	0xE0000000
 
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index b912643..a8267de 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -64,8 +64,6 @@
 
 #define PLAT_ARM_DRAM2_SIZE		MAKE_ULL(0x780000000)
 
-#define PLAT_ARM_SHARED_RAM_CACHED	1
-
 /*
  * Load address of BL33 for this platform port
  */
diff --git a/plat/arm/common/arm_pm.c b/plat/arm/common/arm_pm.c
index 2ddc583..1e756a9 100644
--- a/plat/arm/common/arm_pm.c
+++ b/plat/arm/common/arm_pm.c
@@ -192,11 +192,6 @@
 	assert((PLAT_ARM_TRUSTED_MAILBOX_BASE >= ARM_SHARED_RAM_BASE) &&
 		((PLAT_ARM_TRUSTED_MAILBOX_BASE + sizeof(*mailbox)) <= \
 				(ARM_SHARED_RAM_BASE + ARM_SHARED_RAM_SIZE)));
-
-	/* Flush data cache if the mail box shared RAM is cached */
-#if PLAT_ARM_SHARED_RAM_CACHED
-	flush_dcache_range((uintptr_t) mailbox, sizeof(*mailbox));
-#endif
 }
 
 /*******************************************************************************
diff --git a/plat/arm/css/common/css_scp_bootloader.c b/plat/arm/css/common/css_scp_bootloader.c
index 8bfaa87..d3f671e 100644
--- a/plat/arm/css/common/css_scp_bootloader.c
+++ b/plat/arm/css/common/css_scp_bootloader.c
@@ -77,10 +77,10 @@
 
 static void scp_boot_message_send(size_t payload_size)
 {
-	/* Make sure payload can be seen by SCP */
-	if (MHU_PAYLOAD_CACHED)
-		flush_dcache_range(BOM_SHARED_MEM,
-				   sizeof(bom_cmd_t) + payload_size);
+	/* Ensure that any write to the BOM payload area is seen by SCP before
+	 * we write to the MHU register. If these 2 writes were reordered by
+	 * the CPU then SCP would read stale payload data */
+	dmbst();
 
 	/* Send command to SCP */
 	mhu_secure_message_send(BOM_MHU_SLOT_ID);
@@ -99,9 +99,10 @@
 		panic();
 	}
 
-	/* Make sure we see the reply from the SCP and not any stale data */
-	if (MHU_PAYLOAD_CACHED)
-		inv_dcache_range(BOM_SHARED_MEM, size);
+	/* Ensure that any read to the BOM payload area is done after reading
+	 * the MHU register. If these 2 reads were reordered then the CPU would
+	 * read invalid payload data */
+	dmbld();
 
 	return *(uint32_t *) BOM_SHARED_MEM;
 }
diff --git a/plat/arm/css/common/css_scpi.c b/plat/arm/css/common/css_scpi.c
index 9e1f973..02d573c 100644
--- a/plat/arm/css/common/css_scpi.c
+++ b/plat/arm/css/common/css_scpi.c
@@ -56,10 +56,10 @@
 
 static void scpi_secure_message_send(size_t payload_size)
 {
-	/* Make sure payload can be seen by SCP */
-	if (MHU_PAYLOAD_CACHED)
-		flush_dcache_range(SCPI_SHARED_MEM_AP_TO_SCP,
-				   sizeof(scpi_cmd_t) + payload_size);
+	/* Ensure that any write to the SCPI payload area is seen by SCP before
+	 * we write to the MHU register. If these 2 writes were reordered by
+	 * the CPU then SCP would read stale payload data */
+	dmbst();
 
 	mhu_secure_message_send(SCPI_MHU_SLOT_ID);
 }
@@ -79,9 +79,10 @@
 		panic();
 	}
 
-	/* Make sure we don't read stale data */
-	if (MHU_PAYLOAD_CACHED)
-		inv_dcache_range(SCPI_SHARED_MEM_SCP_TO_AP, sizeof(*cmd));
+	/* Ensure that any read to the SCPI payload area is done after reading
+	 * the MHU register. If these 2 reads were reordered then the CPU would
+	 * read invalid payload data */
+	dmbld();
 
 	memcpy(cmd, (void *) SCPI_SHARED_MEM_SCP_TO_AP, sizeof(*cmd));
 }
diff --git a/plat/arm/css/common/css_scpi.h b/plat/arm/css/common/css_scpi.h
index 379a821..4a601f3 100644
--- a/plat/arm/css/common/css_scpi.h
+++ b/plat/arm/css/common/css_scpi.h
@@ -45,7 +45,7 @@
 	uint32_t set		: 1;
 	/* Sender ID to match a reply. The value is sender specific. */
 	uint32_t sender		: 8;
-	/* Size of the payload in bytes (0 – 511) */
+	/* Size of the payload in bytes (0 - 511) */
 	uint32_t size		: 9;
 	uint32_t reserved	: 7;
 	/*