From fd27bea340012412a5ad1476b0a40381d7407550 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:45 -0500 Subject: x86/platform/uv: Add platform resolving #defines for misc GAM_MMIOH_REDIRECT* Upcoming changes will require use of new #defines UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK and UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK, which provide the appropriate values on different uv platforms. Also, fix typo that defined a couple of "*_CONFIG0_*" values twice where "*_CONFIG1_*" was intended. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-2-steve.wahl%40hpe.com --- arch/x86/include/asm/uv/uv_mmrs.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 57fa67373262..bb45812889dd 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -4199,6 +4199,13 @@ union uvh_rh_gam_mmioh_overlay_config1_u { #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + 0) + union uvh_rh_gam_mmioh_redirect_config0_u { unsigned long v; @@ -4247,8 +4254,8 @@ union uvh_rh_gam_mmioh_redirect_config0_u { 0) /* UV4A unique defines */ -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000000fffUL /* UV4 unique defines */ #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 @@ -4258,6 +4265,13 @@ union uvh_rh_gam_mmioh_redirect_config0_u { #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + 0) + union uvh_rh_gam_mmioh_redirect_config1_u { unsigned long v; -- cgit v1.2.3 From 8c646cee0ae3c0a19a7d9fdb847c45c729cae945 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:46 -0500 Subject: x86/platform/uv: Introduce helper function uv_pnode_to_socket. Add and use uv_pnode_to_socket() function, which parallels other helper functions in here, and will enable avoiding duplicate code in an upcoming patch. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-3-steve.wahl%40hpe.com --- arch/x86/include/asm/uv/uv_hub.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d3e3197917be..0acfd1734c8b 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -519,18 +519,24 @@ static inline int uv_socket_to_node(int socket) return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); } +static inline int uv_pnode_to_socket(int pnode) +{ + unsigned short *p2s = uv_hub_info->pnode_to_socket; + + return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { unsigned int m_val = uv_hub_info->m_val; unsigned long base; - unsigned short sockid, node, *p2s; + unsigned short sockid, node; if (m_val) return __va(((unsigned long)pnode << m_val) | offset); - p2s = uv_hub_info->pnode_to_socket; - sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; + sockid = uv_pnode_to_socket(pnode); node = uv_socket_to_node(sockid); /* limit address of previous socket is our base, except node 0 is 0 */ -- cgit v1.2.3 From e4860f03779cadff011a7a8685c7157ebc133bda Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:47 -0500 Subject: x86/platform/uv: Fix printed information in calc_mmioh_map Fix incorrect mask names and values in calc_mmioh_map() that caused it to print wrong NASID information. And an unused blade position is not an error condition, but will yield an invalid NASID value, so change the invalid NASID message from an error to a debug message. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-4-steve.wahl%40hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 482855227964..6d2739eadb71 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1022,7 +1022,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, switch (index) { case UVY_MMIOH0: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1030,7 +1030,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVY_MMIOH1: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1038,7 +1038,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH0: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1046,7 +1046,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH1: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1072,8 +1072,9 @@ static void __init calc_mmioh_map(enum mmioh_arch index, /* Invalid NASID check */ if (nasid < min_nasid || max_nasid < nasid) { - pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n", - __func__, index, min_nasid, max_nasid); + /* Not an error: unused table entries get "poison" values */ + pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n", + __func__, index, nasid, min_nasid, max_nasid); nasid = -1; } -- cgit v1.2.3 From 35bd896ccc2fa8dfb20df705a44e9d26665f1085 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:48 -0500 Subject: x86/platform/uv: When searching for minimums, start at INT_MAX not 99999 Using a starting value of INT_MAX rather than 999999 or 99999 means this algorithm won't fail should the numbers being compared ever exceed this value. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-5-steve.wahl%40hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 6d2739eadb71..2da84befabf0 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1349,7 +1349,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; unsigned long lgre = 0, gend = 0; int index = 0; - int sock_min = 999999, pnode_min = 99999; + int sock_min = INT_MAX, pnode_min = INT_MAX; int sock_max = -1, pnode_max = -1; uv_gre_table = gre; @@ -1694,7 +1694,7 @@ static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; int bytes, cpu, nodeid; - unsigned short min_pnode = 9999, max_pnode = 0; + unsigned short min_pnode = USHRT_MAX, max_pnode = 0; char *hub = is_uv5_hub() ? "UV500" : is_uv4_hub() ? "UV400" : is_uv3_hub() ? "UV300" : -- cgit v1.2.3 From 45e9f9a99529a54a7ed195eea4aad102b9eadb23 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:49 -0500 Subject: x86/platform/uv: Helper functions for allocating and freeing conversion tables Add alloc_conv_table() and FREE_1_TO_1_TABLE() to reduce duplicated code among the conversion tables we use. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-6-steve.wahl%40hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 97 +++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 44 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2da84befabf0..1bd15b1f7712 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1491,16 +1491,50 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) pr_info("UV: number nodes/possible blades %d\n", uv_pb); } +static int __init alloc_conv_table(int num_elem, unsigned short **table) +{ + int i; + size_t bytes; + + bytes = num_elem * sizeof(*table[0]); + *table = kmalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!*table)) + return -ENOMEM; + for (i = 0; i < num_elem; i++) + ((unsigned short *)*table)[i] = SOCK_EMPTY; + return 0; +} + +/* Remove conversion table if it's 1:1 */ +#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2) + +static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2) +{ + int i; + unsigned short *table = *tp; + + if (table == NULL) + return; + if (max != max2) + return; + for (i = 0; i < max; i++) { + if (i != table[i]) + return; + } + kfree(table); + *tp = NULL; + pr_info("UV: %s is 1:1, conversion table removed\n", tname); +} + static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; - int num, nump; + int nums, numn, nump; int cpu, i, lnid; int minsock = _min_socket; int maxsock = _max_socket; int minpnode = _min_pnode; int maxpnode = _max_pnode; - size_t bytes; if (!gre) { if (is_uv2_hub() || is_uv3_hub()) { @@ -1511,22 +1545,20 @@ static void __init build_socket_tables(void) BUG(); } - /* Build socket id -> node id, pnode */ - num = maxsock - minsock + 1; - bytes = num * sizeof(_socket_to_node[0]); - _socket_to_node = kmalloc(bytes, GFP_KERNEL); - _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); - + numn = num_possible_nodes(); nump = maxpnode - minpnode + 1; - bytes = nump * sizeof(_pnode_to_socket[0]); - _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); - - for (i = 0; i < num; i++) - _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; - - for (i = 0; i < nump; i++) - _pnode_to_socket[i] = SOCK_EMPTY; + nums = maxsock - minsock + 1; + + /* Allocate and clear tables */ + if ((alloc_conv_table(nump, &_pnode_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_pnode) < 0) + || (alloc_conv_table(numn, &_node_to_pnode) < 0) + || (alloc_conv_table(nums, &_socket_to_node) < 0)) { + kfree(_pnode_to_socket); + kfree(_socket_to_pnode); + kfree(_node_to_pnode); + return; + } /* Fill in pnode/node/addr conversion list values: */ pr_info("UV: GAM Building socket/pnode conversion tables\n"); @@ -1565,10 +1597,6 @@ static void __init build_socket_tables(void) } /* Set up physical blade to pnode translation from GAM Range Table: */ - bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); - _node_to_pnode = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_node_to_pnode); - for (lnid = 0; lnid < num_possible_nodes(); lnid++) { unsigned short sockid; @@ -1585,31 +1613,12 @@ static void __init build_socket_tables(void) } /* - * If socket id == pnode or socket id == node for all nodes, + * If e.g. socket id == pnode for all pnodes, * system runs faster by removing corresponding conversion table. */ - pr_info("UV: Checking socket->node/pnode for identity maps\n"); - if (minsock == 0) { - for (i = 0; i < num; i++) - if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) - break; - if (i >= num) { - kfree(_socket_to_node); - _socket_to_node = NULL; - pr_info("UV: 1:1 socket_to_node table removed\n"); - } - } - if (minsock == minpnode) { - for (i = 0; i < num; i++) - if (_socket_to_pnode[i] != SOCK_EMPTY && - _socket_to_pnode[i] != i + minpnode) - break; - if (i >= num) { - kfree(_socket_to_pnode); - _socket_to_pnode = NULL; - pr_info("UV: 1:1 socket_to_pnode table removed\n"); - } - } + FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); + FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump); } /* Check which reboot to use */ -- cgit v1.2.3 From 8a50c58519271dd24ba760bb282875f6ad66ee71 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:50 -0500 Subject: x86/platform/uv: UV support for sub-NUMA clustering Sub-NUMA clustering (SNC) invalidates previous assumptions of a 1:1 relationship between blades, sockets, and nodes. Fix these assumptions and build tables correctly when SNC is enabled. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-7-steve.wahl%40hpe.com --- arch/x86/include/asm/uv/uv_hub.h | 22 ++--- arch/x86/kernel/apic/x2apic_uv_x.c | 162 +++++++++++++++++++++---------------- 2 files changed, 107 insertions(+), 77 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 0acfd1734c8b..5fa76c2ced51 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -177,6 +177,7 @@ struct uv_hub_info_s { unsigned short nr_possible_cpus; unsigned short nr_online_cpus; short memory_nid; + unsigned short *node_to_socket; }; /* CPU specific info with a pointer to the hub common info struct */ @@ -531,19 +532,18 @@ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { unsigned int m_val = uv_hub_info->m_val; unsigned long base; - unsigned short sockid, node; + unsigned short sockid; if (m_val) return __va(((unsigned long)pnode << m_val) | offset); sockid = uv_pnode_to_socket(pnode); - node = uv_socket_to_node(sockid); /* limit address of previous socket is our base, except node 0 is 0 */ - if (!node) + if (sockid == 0) return __va((unsigned long)offset); - base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit); return __va(base << UV_GAM_RANGE_SHFT | offset); } @@ -650,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu) /* Blade number to Node number (UV2..UV4 is 1:1) */ static inline int uv_blade_to_node(int blade) { - return blade; + return uv_socket_to_node(blade); } /* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ @@ -662,23 +662,27 @@ static inline int uv_numa_blade_id(void) /* * Convert linux node number to the UV blade number. * .. Currently for UV2 thru UV4 the node and the blade are identical. - * .. If this changes then you MUST check references to this function! + * .. UV5 needs conversion when sub-numa clustering is enabled. */ static inline int uv_node_to_blade_id(int nid) { - return nid; + unsigned short *n2s = uv_hub_info->node_to_socket; + + return n2s ? n2s[nid] : nid; } /* Convert a CPU number to the UV blade number */ static inline int uv_cpu_to_blade_id(int cpu) { - return uv_node_to_blade_id(cpu_to_node(cpu)); + return uv_cpu_hub_info(cpu)->numa_blade_id; } /* Convert a blade id to the PNODE of the blade */ static inline int uv_blade_to_pnode(int bid) { - return uv_hub_info_list(uv_blade_to_node(bid))->pnode; + unsigned short *s2p = uv_hub_info->socket_to_pnode; + + return s2p ? s2p[bid] : bid; } /* Nid of memory node on blade. -1 if no blade-local memory */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1bd15b1f7712..10d3bdf874a0 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -546,7 +546,6 @@ unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* The following values are used for the per node hub info struct */ -static __initdata unsigned short *_node_to_pnode; static __initdata unsigned short _min_socket, _max_socket; static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; static __initdata struct uv_gam_range_entry *uv_gre_table; @@ -554,6 +553,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table; static __initdata unsigned short *_socket_to_node; static __initdata unsigned short *_socket_to_pnode; static __initdata unsigned short *_pnode_to_socket; +static __initdata unsigned short *_node_to_socket; static __initdata struct uv_gam_range_s *_gr_table; @@ -1293,6 +1293,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi) hi->nasid_shift = uv_cpuid.nasid_shift; hi->min_pnode = _min_pnode; hi->min_socket = _min_socket; + hi->node_to_socket = _node_to_socket; hi->pnode_to_socket = _pnode_to_socket; hi->socket_to_node = _socket_to_node; hi->socket_to_pnode = _socket_to_pnode; @@ -1526,6 +1527,11 @@ static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, pr_info("UV: %s is 1:1, conversion table removed\n", tname); } +/* + * Build Socket Tables + * If the number of nodes is >1 per socket, socket to node table will + * contain lowest node number on that socket. + */ static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; @@ -1552,27 +1558,25 @@ static void __init build_socket_tables(void) /* Allocate and clear tables */ if ((alloc_conv_table(nump, &_pnode_to_socket) < 0) || (alloc_conv_table(nums, &_socket_to_pnode) < 0) - || (alloc_conv_table(numn, &_node_to_pnode) < 0) + || (alloc_conv_table(numn, &_node_to_socket) < 0) || (alloc_conv_table(nums, &_socket_to_node) < 0)) { kfree(_pnode_to_socket); kfree(_socket_to_pnode); - kfree(_node_to_pnode); + kfree(_node_to_socket); return; } /* Fill in pnode/node/addr conversion list values: */ - pr_info("UV: GAM Building socket/pnode conversion tables\n"); for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) continue; i = gre->sockid - minsock; - /* Duplicate: */ - if (_socket_to_pnode[i] != SOCK_EMPTY) - continue; - _socket_to_pnode[i] = gre->pnode; + if (_socket_to_pnode[i] == SOCK_EMPTY) + _socket_to_pnode[i] = gre->pnode; i = gre->pnode - minpnode; - _pnode_to_socket[i] = gre->sockid; + if (_pnode_to_socket[i] == SOCK_EMPTY) + _pnode_to_socket[i] = gre->sockid; pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", gre->sockid, gre->type, gre->nasid, @@ -1582,34 +1586,29 @@ static void __init build_socket_tables(void) /* Set socket -> node values: */ lnid = NUMA_NO_NODE; - for_each_present_cpu(cpu) { + for_each_possible_cpu(cpu) { int nid = cpu_to_node(cpu); int apicid, sockid; if (lnid == nid) continue; lnid = nid; + apicid = per_cpu(x86_cpu_to_apicid, cpu); sockid = apicid >> uv_cpuid.socketid_shift; - _socket_to_node[sockid - minsock] = nid; - pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", - sockid, apicid, nid); - } - /* Set up physical blade to pnode translation from GAM Range Table: */ - for (lnid = 0; lnid < num_possible_nodes(); lnid++) { - unsigned short sockid; + if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) + _socket_to_node[sockid - minsock] = nid; - for (sockid = minsock; sockid <= maxsock; sockid++) { - if (lnid == _socket_to_node[sockid - minsock]) { - _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; - break; - } - } - if (sockid > maxsock) { - pr_err("UV: socket for node %d not found!\n", lnid); - BUG(); - } + if (_node_to_socket[nid] == SOCK_EMPTY) + _node_to_socket[nid] = sockid; + + pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n", + sockid, + apicid, + _node_to_socket[nid], + nid, + _socket_to_node[sockid - minsock]); } /* @@ -1617,6 +1616,7 @@ static void __init build_socket_tables(void) * system runs faster by removing corresponding conversion table. */ FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn); FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump); } @@ -1702,12 +1702,13 @@ static __init int uv_system_init_hubless(void) static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; - int bytes, cpu, nodeid; + int bytes, cpu, nodeid, bid; unsigned short min_pnode = USHRT_MAX, max_pnode = 0; char *hub = is_uv5_hub() ? "UV500" : is_uv4_hub() ? "UV400" : is_uv3_hub() ? "UV300" : is_uv2_hub() ? "UV2000/3000" : NULL; + struct uv_hub_info_s **uv_hub_info_list_blade; if (!hub) { pr_err("UV: Unknown/unsupported UV hub\n"); @@ -1730,9 +1731,12 @@ static void __init uv_system_init_hub(void) build_uv_gr_table(); set_block_size(); uv_init_hub_info(&hub_info); - uv_possible_blades = num_possible_nodes(); - if (!_node_to_pnode) + /* If UV2 or UV3 may need to get # blades from HW */ + if (is_uv(UV2|UV3) && !uv_gre_table) boot_init_possible_blades(&hub_info); + else + /* min/max sockets set in decode_gam_rng_tbl */ + uv_possible_blades = (_max_socket - _min_socket) + 1; /* uv_num_possible_blades() is really the hub count: */ pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); @@ -1741,79 +1745,98 @@ static void __init uv_system_init_hub(void) hub_info.coherency_domain_number = sn_coherency_id; uv_rtc_init(); + /* + * __uv_hub_info_list[] is indexed by node, but there is only + * one hub_info structure per blade. First, allocate one + * structure per blade. Further down we create a per-node + * table (__uv_hub_info_list[]) pointing to hub_info + * structures for the correct blade. + */ + bytes = sizeof(void *) * uv_num_possible_blades(); - __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!__uv_hub_info_list); + uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!uv_hub_info_list_blade)) + return; bytes = sizeof(struct uv_hub_info_s); - for_each_node(nodeid) { + for_each_possible_blade(bid) { struct uv_hub_info_s *new_hub; - if (__uv_hub_info_list[nodeid]) { - pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); - BUG(); + /* Allocate & fill new per hub info list */ + new_hub = (bid == 0) ? &uv_hub_info_node0 + : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); + if (WARN_ON_ONCE(!new_hub)) { + /* do not kfree() bid 0, which is statically allocated */ + while (--bid > 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; } - /* Allocate new per hub info list */ - new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); - BUG_ON(!new_hub); - __uv_hub_info_list[nodeid] = new_hub; - new_hub = uv_hub_info_list(nodeid); - BUG_ON(!new_hub); + uv_hub_info_list_blade[bid] = new_hub; *new_hub = hub_info; /* Use information from GAM table if available: */ - if (_node_to_pnode) - new_hub->pnode = _node_to_pnode[nodeid]; + if (uv_gre_table) + new_hub->pnode = uv_blade_to_pnode(bid); else /* Or fill in during CPU loop: */ new_hub->pnode = 0xffff; - new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); + new_hub->numa_blade_id = bid; new_hub->memory_nid = NUMA_NO_NODE; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } + /* + * Now populate __uv_hub_info_list[] for each node with the + * pointer to the struct for the blade it resides on. + */ + + bytes = sizeof(void *) * num_possible_nodes(); + __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!__uv_hub_info_list)) { + for_each_possible_blade(bid) + /* bid 0 is statically allocated */ + if (bid != 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; + } + + for_each_node(nodeid) + __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)]; + /* Initialize per CPU info: */ for_each_possible_cpu(cpu) { - int apicid = per_cpu(x86_cpu_to_apicid, cpu); - int numa_node_id; + int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + unsigned short bid; unsigned short pnode; - nodeid = cpu_to_node(cpu); - numa_node_id = numa_cpu_node(cpu); pnode = uv_apicid_to_pnode(apicid); + bid = uv_pnode_to_socket(pnode) - _min_socket; - uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); + uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid]; uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); - /* Init memoryless node: */ - if (nodeid != numa_node_id && - uv_hub_info_list(numa_node_id)->pnode == 0xffff) - uv_hub_info_list(numa_node_id)->pnode = pnode; - else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) + if (uv_cpu_hub_info(cpu)->pnode == 0xffff) uv_cpu_hub_info(cpu)->pnode = pnode; } - for_each_node(nodeid) { - unsigned short pnode = uv_hub_info_list(nodeid)->pnode; + for_each_possible_blade(bid) { + unsigned short pnode = uv_hub_info_list_blade[bid]->pnode; - /* Add pnode info for pre-GAM list nodes without CPUs: */ - if (pnode == 0xffff) { - unsigned long paddr; + if (pnode == 0xffff) + continue; - paddr = node_start_pfn(nodeid) << PAGE_SHIFT; - pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); - uv_hub_info_list(nodeid)->pnode = pnode; - } min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); - pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", - nodeid, - uv_hub_info_list(nodeid)->pnode, - uv_hub_info_list(nodeid)->nr_possible_cpus); + pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n", + bid, + uv_hub_info_list_blade[bid]->pnode, + uv_hub_info_list_blade[bid]->nr_possible_cpus); } pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); @@ -1821,6 +1844,9 @@ static void __init uv_system_init_hub(void) map_mmr_high(max_pnode); map_mmioh_high(min_pnode, max_pnode); + kfree(uv_hub_info_list_blade); + uv_hub_info_list_blade = NULL; + uv_nmi_setup(); uv_cpu_init(); uv_setup_proc_files(0); -- cgit v1.2.3 From 89827568a82d5856f3c8d329d3c2bc0f47385eb9 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:51 -0500 Subject: x86/platform/uv: Remove remaining BUG_ON() and BUG() calls Replace BUG and BUG_ON with WARN_ON_ONCE and carry on as best as we can. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-8-steve.wahl%40hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 10d3bdf874a0..1836330ff7b3 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -617,7 +617,8 @@ static __init void build_uv_gr_table(void) bytes = _gr_table_len * sizeof(struct uv_gam_range_s); grt = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!grt); + if (WARN_ON_ONCE(!grt)) + return; _gr_table = grt; for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { @@ -1548,7 +1549,8 @@ static void __init build_socket_tables(void) return; } pr_err("UV: Error: UVsystab address translations not available!\n"); - BUG(); + WARN_ON_ONCE(!gre); + return; } numn = num_possible_nodes(); -- cgit v1.2.3 From 73b3108dfd9d53ea565c0777a27007c1c621467d Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 19 May 2023 14:07:52 -0500 Subject: x86/platform/uv: Update UV[23] platform code for SNC Previous Sub-NUMA Clustering changes need not just a count of blades present, but a count that includes any missing ids for blades not present; in other words, the range from lowest to highest blade id. Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20230519190752.3297140-9-steve.wahl%40hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 42 ++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1836330ff7b3..d9384d5b4b8e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1462,11 +1462,37 @@ static int __init decode_uv_systab(void) return 0; } +/* + * Given a bitmask 'bits' representing presnt blades, numbered + * starting at 'base', masking off unused high bits of blade number + * with 'mask', update the minimum and maximum blade numbers that we + * have found. (Masking with 'mask' necessary because of BIOS + * treatment of system partitioning when creating this table we are + * interpreting.) + */ +static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max) +{ + int first, last; + + if (!bits) + return; + first = (base + __ffs(bits)) & mask; + last = (base + __fls(bits)) & mask; + + if (*min > first) + *min = first; + if (*max < last) + *max = last; +} + /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) { unsigned long np; int i, uv_pb = 0; + int sock_min = INT_MAX, sock_max = -1, s_mask; + + s_mask = (1 << uv_cpuid.n_skt) - 1; if (UVH_NODE_PRESENT_TABLE) { pr_info("UV: NODE_PRESENT_DEPTH = %d\n", @@ -1474,23 +1500,31 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); - uv_pb += hweight64(np); + blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max); } } if (UVH_NODE_PRESENT_0) { np = uv_read_local_mmr(UVH_NODE_PRESENT_0); pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max); } if (UVH_NODE_PRESENT_1) { np = uv_read_local_mmr(UVH_NODE_PRESENT_1); pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max); + } + + /* Only update if we actually found some bits indicating blades present */ + if (sock_max >= sock_min) { + _min_socket = sock_min; + _max_socket = sock_max; + uv_pb = sock_max - sock_min + 1; } if (uv_possible_blades != uv_pb) uv_possible_blades = uv_pb; - pr_info("UV: number nodes/possible blades %d\n", uv_pb); + pr_info("UV: number nodes/possible blades %d (%d - %d)\n", + uv_pb, sock_min, sock_max); } static int __init alloc_conv_table(int num_elem, unsigned short **table) -- cgit v1.2.3