Blame - arch/ia64/mm/discontig.c - kernel/msm-4.19

blob: 3456a9b6971ec6b5c448fe773ef39505196a1e29 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved.
				3	* Copyright (c) 2001 Intel Corp.
				4	* Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
				5	* Copyright (c) 2002 NEC Corp.
				6	* Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
				7	* Copyright (c) 2004 Silicon Graphics, Inc
				8	* Russ Anderson <rja@sgi.com>
				9	* Jesse Barnes <jbarnes@sgi.com>
				10	* Jack Steiner <steiner@sgi.com>
				11	*/
				12
				13	/*
				14	* Platform initialization for Discontig Memory
				15	*/
				16
				17	#include <linux/kernel.h>
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bootmem.h>
				21	#include <linux/acpi.h>
				22	#include <linux/efi.h>
				23	#include <linux/nodemask.h>
				24	#include <asm/pgalloc.h>
				25	#include <asm/tlb.h>
				26	#include <asm/meminit.h>
				27	#include <asm/numa.h>
				28	#include <asm/sections.h>
				29
				30	/*
				31	* Track per-node information needed to setup the boot memory allocator, the
				32	* per-node areas, and the real VM.
				33	*/
				34	struct early_node_data {
				35	struct ia64_node_data *node_data;
				36	pg_data_t *pgdat;
				37	unsigned long pernode_addr;
				38	unsigned long pernode_size;
				39	struct bootmem_data bootmem_data;
				40	unsigned long num_physpages;
				41	unsigned long num_dma_physpages;
				42	unsigned long min_pfn;
				43	unsigned long max_pfn;
				44	};
				45
				46	static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
				47
				48	/**
				49	* reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node
				50	*
				51	* This function will move nodes with only CPUs (no memory)
				52	* to a node with memory which is at the minimum numa_slit distance.
				53	* Any reassigments will result in the compression of the nodes
				54	* and renumbering the nid values where appropriate.
				55	* The static declarations below are to avoid large stack size which
				56	* makes the code not re-entrant.
				57	*/
				58	static void __init reassign_cpu_only_nodes(void)
				59	{
				60	struct node_memblk_s *p;
				61	int i, j, k, nnode, nid, cpu, cpunid, pxm;
				62	u8 cslit, slit;
				63	static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata;
				64	static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
				65	static int node_flip[MAX_NUMNODES] __initdata;
				66	static int old_nid_map[NR_CPUS] __initdata;
				67
				68	for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
				69	if (!test_bit(p->nid, (void *) nodes_with_mem)) {
				70	set_bit(p->nid, (void *) nodes_with_mem);
				71	nnode++;
				72	}
				73
				74	/*
				75	* All nids with memory.
				76	*/
				77	if (nnode == num_online_nodes())
				78	return;
				79
				80	/*
				81	* Change nids and attempt to migrate CPU-only nodes
				82	* to the best numa_slit (closest neighbor) possible.
				83	* For reassigned CPU nodes a nid can't be arrived at
				84	* until after this loop because the target nid's new
				85	* identity might not have been established yet. So
				86	* new nid values are fabricated above num_online_nodes() and
				87	* mapped back later to their true value.
				88	*/
				89	/* MCD - This code is a bit complicated, but may be unnecessary now.
				90	* We can now handle much more interesting node-numbering.
				91	* The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES
				92	* and that there be no holes in the numbering 0..numnodes
				93	* has become simply 0 <= nid <= MAX_NUMNODES.
				94	*/
				95	nid = 0;
				96	for_each_online_node(i) {
				97	if (test_bit(i, (void *) nodes_with_mem)) {
				98	/*
				99	* Save original nid value for numa_slit
				100	* fixup and node_cpuid reassignments.
				101	*/
				102	node_flip[nid] = i;
				103
				104	if (i == nid) {
				105	nid++;
				106	continue;
				107	}
				108
				109	for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
				110	if (p->nid == i)
				111	p->nid = nid;
				112
				113	cpunid = nid;
				114	nid++;
				115	} else
				116	cpunid = MAX_NUMNODES;
				117
				118	for (cpu = 0; cpu < NR_CPUS; cpu++)
				119	if (node_cpuid[cpu].nid == i) {
				120	/*
				121	* For nodes not being reassigned just
				122	* fix the cpu's nid and reverse pxm map
				123	*/
				124	if (cpunid < MAX_NUMNODES) {
				125	pxm = nid_to_pxm_map[i];
				126	pxm_to_nid_map[pxm] =
				127	node_cpuid[cpu].nid = cpunid;
				128	continue;
				129	}
				130
				131	/*
				132	* For nodes being reassigned, find best node by
				133	* numa_slit information and then make a temporary
				134	* nid value based on current nid and num_online_nodes().
				135	*/
				136	slit = 0xff;
				137	k = 2*num_online_nodes();
				138	for_each_online_node(j) {
				139	if (i == j)
				140	continue;
				141	else if (test_bit(j, (void *) nodes_with_mem)) {
				142	cslit = numa_slit[i * num_online_nodes() + j];
				143	if (cslit < slit) {
				144	k = num_online_nodes() + j;
				145	slit = cslit;
				146	}
				147	}
				148	}
				149
				150	/* save old nid map so we can update the pxm */
				151	old_nid_map[cpu] = node_cpuid[cpu].nid;
				152	node_cpuid[cpu].nid = k;
				153	}
				154	}
				155
				156	/*
				157	* Fixup temporary nid values for CPU-only nodes.
				158	*/
				159	for (cpu = 0; cpu < NR_CPUS; cpu++)
				160	if (node_cpuid[cpu].nid == (2*num_online_nodes())) {
				161	pxm = nid_to_pxm_map[old_nid_map[cpu]];
				162	pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
				163	} else {
				164	for (i = 0; i < nnode; i++) {
				165	if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes()))
				166	continue;
				167
				168	pxm = nid_to_pxm_map[old_nid_map[cpu]];
				169	pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
				170	break;
				171	}
				172	}
				173
				174	/*
				175	* Fix numa_slit by compressing from larger
				176	* nid array to reduced nid array.
				177	*/
				178	for (i = 0; i < nnode; i++)
				179	for (j = 0; j < nnode; j++)
				180	numa_slit_fix[i * nnode + j] =
				181	numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]];
				182
				183	memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
				184
				185	nodes_clear(node_online_map);
				186	for (i = 0; i < nnode; i++)
				187	node_set_online(i);
				188
				189	return;
				190	}
				191
				192	/*
				193	* To prevent cache aliasing effects, align per-node structures so that they
				194	* start at addresses that are strided by node number.
				195	*/
				196	#define NODEDATA_ALIGN(addr, node) \
				197	((((addr) + 10241024-1) & ~(10241024-1)) + (node)*PERCPU_PAGE_SIZE)
				198
				199	/**
				200	* build_node_maps - callback to setup bootmem structs for each node
				201	* @start: physical start of range
				202	* @len: length of range
				203	* @node: node where this range resides
				204	*
				205	* We allocate a struct bootmem_data for each piece of memory that we wish to
				206	* treat as a virtually contiguous block (i.e. each node). Each such block
				207	* must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
				208	* if necessary. Any non-existent pages will simply be part of the virtual
				209	* memmap. We also update min_low_pfn and max_low_pfn here as we receive
				210	* memory ranges from the caller.
				211	*/
				212	static int __init build_node_maps(unsigned long start, unsigned long len,
				213	int node)
				214	{
				215	unsigned long cstart, epfn, end = start + len;
				216	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
				217
				218	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
				219	cstart = GRANULEROUNDDOWN(start);
				220
				221	if (!bdp->node_low_pfn) {
				222	bdp->node_boot_start = cstart;
				223	bdp->node_low_pfn = epfn;
				224	} else {
				225	bdp->node_boot_start = min(cstart, bdp->node_boot_start);
				226	bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
				227	}
				228
				229	min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
				230	max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
				231
				232	return 0;
				233	}
				234
				235	/**
				236	* early_nr_phys_cpus_node - return number of physical cpus on a given node
				237	* @node: node to check
				238	*
				239	* Count the number of physical cpus on @node. These are cpus that actually
				240	* exist. We can't use nr_cpus_node() yet because
				241	* acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
				242	* called yet.
				243	*/
				244	static int early_nr_phys_cpus_node(int node)
				245	{
				246	int cpu, n = 0;
				247
				248	for (cpu = 0; cpu < NR_CPUS; cpu++)
				249	if (node == node_cpuid[cpu].nid)
				250	if ((cpu == 0) \|\| node_cpuid[cpu].phys_id)
				251	n++;
				252
				253	return n;
				254	}
				255
				256
				257	/**
				258	* early_nr_cpus_node - return number of cpus on a given node
				259	* @node: node to check
				260	*
				261	* Count the number of cpus on @node. We can't use nr_cpus_node() yet because
				262	* acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
				263	* called yet. Note that node 0 will also count all non-existent cpus.
				264	*/
				265	static int early_nr_cpus_node(int node)
				266	{
				267	int cpu, n = 0;
				268
				269	for (cpu = 0; cpu < NR_CPUS; cpu++)
				270	if (node == node_cpuid[cpu].nid)
				271	n++;
				272
				273	return n;
				274	}
				275
				276	/**
				277	* find_pernode_space - allocate memory for memory map and per-node structures
				278	* @start: physical start of range
				279	* @len: length of range
				280	* @node: node where this range resides
				281	*
				282	* This routine reserves space for the per-cpu data struct, the list of
				283	* pg_data_ts and the per-node data struct. Each node will have something like
				284	* the following in the first chunk of addr. space large enough to hold it.
				285	*
				286	* ________________________
				287	* \| \|
				288	* \|~~~~~~~~~~~~~~~~~~~~~~~~\| <-- NODEDATA_ALIGN(start, node) for the first
				289	* \| PERCPU_PAGE_SIZE * \| start and length big enough
				290	* \| cpus_on_this_node \| Node 0 will also have entries for all non-existent cpus.
				291	* \|------------------------\|
				292	* \| local pg_data_t * \|
				293	* \|------------------------\|
				294	* \| local ia64_node_data \|
				295	* \|------------------------\|
				296	* \| ??? \|
				297	* \|________________________\|
				298	*
				299	* Once this space has been set aside, the bootmem maps are initialized. We
				300	* could probably move the allocation of the per-cpu and ia64_node_data space
				301	* outside of this function and use alloc_bootmem_node(), but doing it here
				302	* is straightforward and we get the alignments we want so...
				303	*/
				304	static int __init find_pernode_space(unsigned long start, unsigned long len,
				305	int node)
				306	{
				307	unsigned long epfn, cpu, cpus, phys_cpus;
				308	unsigned long pernodesize = 0, pernode, pages, mapsize;
				309	void *cpu_data;
				310	struct bootmem_data *bdp = &mem_data[node].bootmem_data;
				311
				312	epfn = (start + len) >> PAGE_SHIFT;
				313
				314	pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
				315	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
				316
				317	/*
				318	* Make sure this memory falls within this node's usable memory
				319	* since we may have thrown some away in build_maps().
				320	*/
				321	if (start < bdp->node_boot_start \|\| epfn > bdp->node_low_pfn)
				322	return 0;
				323
				324	/* Don't setup this node's local space twice... */
				325	if (mem_data[node].pernode_addr)
				326	return 0;
				327
				328	/*
				329	* Calculate total size needed, incl. what's necessary
				330	* for good alignment and alias prevention.
				331	*/
				332	cpus = early_nr_cpus_node(node);
				333	phys_cpus = early_nr_phys_cpus_node(node);
				334	pernodesize += PERCPU_PAGE_SIZE * cpus;
				335	pernodesize += node * L1_CACHE_BYTES;
				336	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
				337	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
				338	pernodesize = PAGE_ALIGN(pernodesize);
				339	pernode = NODEDATA_ALIGN(start, node);
				340
				341	/* Is this range big enough for what we want to store here? */
				342	if (start + len > (pernode + pernodesize + mapsize)) {
				343	mem_data[node].pernode_addr = pernode;
				344	mem_data[node].pernode_size = pernodesize;
				345	memset(__va(pernode), 0, pernodesize);
				346
				347	cpu_data = (void *)pernode;
				348	pernode += PERCPU_PAGE_SIZE * cpus;
				349	pernode += node * L1_CACHE_BYTES;
				350
				351	mem_data[node].pgdat = __va(pernode);
				352	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
				353
				354	mem_data[node].node_data = __va(pernode);
				355	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
				356
				357	mem_data[node].pgdat->bdata = bdp;
				358	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
				359
				360	/*
				361	* Copy the static per-cpu data into the region we
				362	* just set aside and then setup __per_cpu_offset
				363	* for each CPU on this node.
				364	*/
				365	for (cpu = 0; cpu < NR_CPUS; cpu++) {
				366	if (node == node_cpuid[cpu].nid) {
				367	memcpy(__va(cpu_data), __phys_per_cpu_start,
				368	__per_cpu_end - __per_cpu_start);
				369	__per_cpu_offset[cpu] = (char*)__va(cpu_data) -
				370	__per_cpu_start;
				371	cpu_data += PERCPU_PAGE_SIZE;
				372	}
				373	}
				374	}
				375
				376	return 0;
				377	}
				378
				379	/**
				380	* free_node_bootmem - free bootmem allocator memory for use
				381	* @start: physical start of range
				382	* @len: length of range
				383	* @node: node where this range resides
				384	*
				385	* Simply calls the bootmem allocator to free the specified ranged from
				386	* the given pg_data_t's bdata struct. After this function has been called
				387	* for all the entries in the EFI memory map, the bootmem allocator will
				388	* be ready to service allocation requests.
				389	*/
				390	static int __init free_node_bootmem(unsigned long start, unsigned long len,
				391	int node)
				392	{
				393	free_bootmem_node(mem_data[node].pgdat, start, len);
				394
				395	return 0;
				396	}
				397
				398	/**
				399	* reserve_pernode_space - reserve memory for per-node space
				400	*
				401	* Reserve the space used by the bootmem maps & per-node space in the boot
				402	* allocator so that when we actually create the real mem maps we don't
				403	* use their memory.
				404	*/
				405	static void __init reserve_pernode_space(void)
				406	{
				407	unsigned long base, size, pages;
				408	struct bootmem_data *bdp;
				409	int node;
				410
				411	for_each_online_node(node) {
				412	pg_data_t *pdp = mem_data[node].pgdat;
				413
				414	bdp = pdp->bdata;
				415
				416	/* First the bootmem_map itself */
				417	pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
				418	size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
				419	base = __pa(bdp->node_bootmem_map);
				420	reserve_bootmem_node(pdp, base, size);
				421
				422	/* Now the per-node space */
				423	size = mem_data[node].pernode_size;
				424	base = __pa(mem_data[node].pernode_addr);
				425	reserve_bootmem_node(pdp, base, size);
				426	}
				427	}
				428
				429	/**
				430	* initialize_pernode_data - fixup per-cpu & per-node pointers
				431	*
				432	* Each node's per-node area has a copy of the global pg_data_t list, so
				433	* we copy that to each node here, as well as setting the per-cpu pointer
				434	* to the local node data structure. The active_cpus field of the per-node
				435	* structure gets setup by the platform_cpu_init() function later.
				436	*/
				437	static void __init initialize_pernode_data(void)
				438	{
				439	int cpu, node;
				440	pg_data_t *pgdat_list[MAX_NUMNODES];
				441
				442	for_each_online_node(node)
				443	pgdat_list[node] = mem_data[node].pgdat;
				444
				445	/* Copy the pg_data_t list to each node and init the node field */
				446	for_each_online_node(node) {
				447	memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
				448	sizeof(pgdat_list));
				449	}
				450
				451	/* Set the node_data pointer for each per-cpu struct */
				452	for (cpu = 0; cpu < NR_CPUS; cpu++) {
				453	node = node_cpuid[cpu].nid;
				454	per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
				455	}
				456	}
				457
				458	/**
				459	* find_memory - walk the EFI memory map and setup the bootmem allocator
				460	*
				461	* Called early in boot to setup the bootmem allocator, and to
				462	* allocate the per-cpu and per-node structures.
				463	*/
				464	void __init find_memory(void)
				465	{
				466	int node;
				467
				468	reserve_memory();
				469
				470	if (num_online_nodes() == 0) {
				471	printk(KERN_ERR "node info missing!\n");
				472	node_set_online(0);
				473	}
				474
				475	min_low_pfn = -1;
				476	max_low_pfn = 0;
				477
				478	if (num_online_nodes() > 1)
				479	reassign_cpu_only_nodes();
				480
				481	/* These actually end up getting called by call_pernode_memory() */
				482	efi_memmap_walk(filter_rsvd_memory, build_node_maps);
				483	efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
				484
				485	/*
				486	* Initialize the boot memory maps in reverse order since that's
				487	* what the bootmem allocator expects
				488	*/
				489	for (node = MAX_NUMNODES - 1; node >= 0; node--) {
				490	unsigned long pernode, pernodesize, map;
				491	struct bootmem_data *bdp;
				492
				493	if (!node_online(node))
				494	continue;
				495
				496	bdp = &mem_data[node].bootmem_data;
				497	pernode = mem_data[node].pernode_addr;
				498	pernodesize = mem_data[node].pernode_size;
				499	map = pernode + pernodesize;
				500
				501	/* Sanity check... */
				502	if (!pernode)
				503	panic("pernode space for node %d "
				504	"could not be allocated!", node);
				505
				506	init_bootmem_node(mem_data[node].pgdat,
				507	map>>PAGE_SHIFT,
				508	bdp->node_boot_start>>PAGE_SHIFT,
				509	bdp->node_low_pfn);
				510	}
				511
				512	efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
				513
				514	reserve_pernode_space();
				515	initialize_pernode_data();
				516
				517	max_pfn = max_low_pfn;
				518
				519	find_initrd();
				520	}
				521
				522	/**
				523	* per_cpu_init - setup per-cpu variables
				524	*
				525	* find_pernode_space() does most of this already, we just need to set
				526	* local_per_cpu_offset
				527	*/
				528	void *per_cpu_init(void)
				529	{
				530	int cpu;
				531
				532	if (smp_processor_id() == 0) {
				533	for (cpu = 0; cpu < NR_CPUS; cpu++) {
				534	per_cpu(local_per_cpu_offset, cpu) =
				535	__per_cpu_offset[cpu];
				536	}
				537	}
				538
				539	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
				540	}
				541
				542	/**
				543	* show_mem - give short summary of memory stats
				544	*
				545	* Shows a simple page count of reserved and used pages in the system.
				546	* For discontig machines, it does this on a per-pgdat basis.
				547	*/
				548	void show_mem(void)
				549	{
				550	int i, total_reserved = 0;
				551	int total_shared = 0, total_cached = 0;
				552	unsigned long total_present = 0;
				553	pg_data_t *pgdat;
				554
				555	printk("Mem-info:\n");
				556	show_free_areas();
				557	printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
				558	for_each_pgdat(pgdat) {
				559	unsigned long present = pgdat->node_present_pages;
				560	int shared = 0, cached = 0, reserved = 0;
				561	printk("Node ID: %d\n", pgdat->node_id);
				562	for(i = 0; i < pgdat->node_spanned_pages; i++) {
				563	if (!ia64_pfn_valid(pgdat->node_start_pfn+i))
				564	continue;
				565	if (PageReserved(pgdat->node_mem_map+i))
				566	reserved++;
				567	else if (PageSwapCache(pgdat->node_mem_map+i))
				568	cached++;
				569	else if (page_count(pgdat->node_mem_map+i))
				570	shared += page_count(pgdat->node_mem_map+i)-1;
				571	}
				572	total_present += present;
				573	total_reserved += reserved;
				574	total_cached += cached;
				575	total_shared += shared;
				576	printk("\t%ld pages of RAM\n", present);
				577	printk("\t%d reserved pages\n", reserved);
				578	printk("\t%d pages shared\n", shared);
				579	printk("\t%d pages swap cached\n", cached);
				580	}
				581	printk("%ld pages of RAM\n", total_present);
				582	printk("%d reserved pages\n", total_reserved);
				583	printk("%d pages shared\n", total_shared);
				584	printk("%d pages swap cached\n", total_cached);
				585	printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
				586	printk("%d free buffer pages\n", nr_free_buffer_pages());
				587	}
				588
				589	/**
				590	* call_pernode_memory - use SRAT to call callback functions with node info
				591	* @start: physical start of range
				592	* @len: length of range
				593	* @arg: function to call for each range
				594	*
				595	* efi_memmap_walk() knows nothing about layout of memory across nodes. Find
				596	* out to which node a block of memory belongs. Ignore memory that we cannot
				597	* identify, and split blocks that run across multiple nodes.
				598	*
				599	* Take this opportunity to round the start address up and the end address
				600	* down to page boundaries.
				601	*/
				602	void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
				603	{
				604	unsigned long rs, re, end = start + len;
				605	void (*func)(unsigned long, unsigned long, int);
				606	int i;
				607
				608	start = PAGE_ALIGN(start);
				609	end &= PAGE_MASK;
				610	if (start >= end)
				611	return;
				612
				613	func = arg;
				614
				615	if (!num_node_memblks) {
				616	/* No SRAT table, so assume one node (node 0) */
				617	if (start < end)
				618	(*func)(start, end - start, 0);
				619	return;
				620	}
				621
				622	for (i = 0; i < num_node_memblks; i++) {
				623	rs = max(start, node_memblk[i].start_paddr);
				624	re = min(end, node_memblk[i].start_paddr +
				625	node_memblk[i].size);
				626
				627	if (rs < re)
				628	(*func)(rs, re - rs, node_memblk[i].nid);
				629
				630	if (re == end)
				631	break;
				632	}
				633	}
				634
				635	/**
				636	* count_node_pages - callback to build per-node memory info structures
				637	* @start: physical start of range
				638	* @len: length of range
				639	* @node: node where this range resides
				640	*
				641	* Each node has it's own number of physical pages, DMAable pages, start, and
				642	* end page frame number. This routine will be called by call_pernode_memory()
				643	* for each piece of usable memory and will setup these values for each node.
				644	* Very similar to build_maps().
				645	*/
				646	static __init int count_node_pages(unsigned long start, unsigned long len, int node)
				647	{
				648	unsigned long end = start + len;
				649
				650	mem_data[node].num_physpages += len >> PAGE_SHIFT;
				651	if (start <= __pa(MAX_DMA_ADDRESS))
				652	mem_data[node].num_dma_physpages +=
				653	(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
				654	start = GRANULEROUNDDOWN(start);
				655	start = ORDERROUNDDOWN(start);
				656	end = GRANULEROUNDUP(end);
				657	mem_data[node].max_pfn = max(mem_data[node].max_pfn,
				658	end >> PAGE_SHIFT);
				659	mem_data[node].min_pfn = min(mem_data[node].min_pfn,
				660	start >> PAGE_SHIFT);
				661
				662	return 0;
				663	}
				664
				665	/**
				666	* paging_init - setup page tables
				667	*
				668	* paging_init() sets up the page tables for each node of the system and frees
				669	* the bootmem allocator memory for general use.
				670	*/
				671	void __init paging_init(void)
				672	{
				673	unsigned long max_dma;
				674	unsigned long zones_size[MAX_NR_ZONES];
				675	unsigned long zholes_size[MAX_NR_ZONES];
				676	unsigned long pfn_offset = 0;
				677	int node;
				678
				679	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
				680
				681	/* so min() will work in count_node_pages */
				682	for_each_online_node(node)
				683	mem_data[node].min_pfn = ~0UL;
				684
				685	efi_memmap_walk(filter_rsvd_memory, count_node_pages);
				686
				687	for_each_online_node(node) {
				688	memset(zones_size, 0, sizeof(zones_size));
				689	memset(zholes_size, 0, sizeof(zholes_size));
				690
				691	num_physpages += mem_data[node].num_physpages;
				692
				693	if (mem_data[node].min_pfn >= max_dma) {
				694	/* All of this node's memory is above ZONE_DMA */
				695	zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
				696	mem_data[node].min_pfn;
				697	zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
				698	mem_data[node].min_pfn -
				699	mem_data[node].num_physpages;
				700	} else if (mem_data[node].max_pfn < max_dma) {
				701	/* All of this node's memory is in ZONE_DMA */
				702	zones_size[ZONE_DMA] = mem_data[node].max_pfn -
				703	mem_data[node].min_pfn;
				704	zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
				705	mem_data[node].min_pfn -
				706	mem_data[node].num_dma_physpages;
				707	} else {
				708	/* This node has memory in both zones */
				709	zones_size[ZONE_DMA] = max_dma -
				710	mem_data[node].min_pfn;
				711	zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
				712	mem_data[node].num_dma_physpages;
				713	zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
				714	max_dma;
				715	zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
				716	(mem_data[node].num_physpages -
				717	mem_data[node].num_dma_physpages);
				718	}
				719
				720	if (node == 0) {
				721	vmalloc_end -=
				722	PAGE_ALIGN(max_low_pfn * sizeof(struct page));
				723	vmem_map = (struct page *) vmalloc_end;
				724
				725	efi_memmap_walk(create_mem_map_page_table, NULL);
				726	printk("Virtual mem_map starts at 0x%p\n", vmem_map);
				727	}
				728
				729	pfn_offset = mem_data[node].min_pfn;
				730
				731	NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
				732	free_area_init_node(node, NODE_DATA(node), zones_size,
				733	pfn_offset, zholes_size);
				734	}
				735
				736	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
				737	}