From cf4f40500a4ed62efe92d14d9a84691bc0a0361f Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sat, 7 Dec 2019 16:47:13 -0600 Subject: [PATCH 1/3] amdgpu: Prepare DCN floating point macros for generic arch support Introduce DC_FP_START()/DC_FP_END() macros to help enable floating point kernel mode support across various architectures. v2: move copyright update to commit which adds the changes Signed-off-by: Timothy Pearson Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 24 +++++++++---------- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 5 ++-- drivers/gpu/drm/amd/display/dc/os_types.h | 3 +++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 9b2cb57..cd54712 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -626,7 +626,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) { bool updated = false; - kernel_fpu_begin(); + DC_FP_START(); if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns && dc->debug.sr_exit_time_ns) { updated = true; @@ -662,7 +662,7 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) dc->dcn_soc->dram_clock_change_latency = dc->debug.dram_clock_change_latency_ns / 1000.0; } - kernel_fpu_end(); + DC_FP_END(); return updated; } @@ -742,7 +742,7 @@ bool dcn_validate_bandwidth( dcn_bw_sync_calcs_and_dml(dc); memset(v, 0, sizeof(*v)); - kernel_fpu_begin(); + DC_FP_START(); v->sr_exit_time = dc->dcn_soc->sr_exit_time; v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time; @@ -1275,7 +1275,7 @@ bool dcn_validate_bandwidth( bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9; bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit; - kernel_fpu_end(); + DC_FP_END(); PERFORMANCE_TRACE_END(); BW_VAL_TRACE_FINISH(); @@ -1443,7 +1443,7 @@ void dcn_bw_update_from_pplib(struct dc *dc) res = dm_pp_get_clock_levels_by_type_with_voltage( ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks); - kernel_fpu_begin(); + DC_FP_START(); if (res) res = verify_clock_values(&fclks); @@ -1463,12 +1463,12 @@ void dcn_bw_update_from_pplib(struct dc *dc) } else BREAK_TO_DEBUGGER(); - kernel_fpu_end(); + DC_FP_END(); res = dm_pp_get_clock_levels_by_type_with_voltage( ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks); - kernel_fpu_begin(); + DC_FP_START(); if (res) res = verify_clock_values(&dcfclks); @@ -1481,7 +1481,7 @@ void dcn_bw_update_from_pplib(struct dc *dc) } else BREAK_TO_DEBUGGER(); - kernel_fpu_end(); + DC_FP_END(); } void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) @@ -1496,11 +1496,11 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) if (!pp || !pp->set_wm_ranges) return; - kernel_fpu_begin(); + DC_FP_START(); min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32; min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000; socclk_khz = dc->dcn_soc->socclk * 1000; - kernel_fpu_end(); + DC_FP_END(); /* Now notify PPLib/SMU about which Watermarks sets they should select * depending on DPM state they are in. And update BW MGR GFX Engine and @@ -1551,7 +1551,7 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) void dcn_bw_sync_calcs_and_dml(struct dc *dc) { - kernel_fpu_begin(); + DC_FP_START(); DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n" "sr_enter_plus_exit_time: %f ns\n" "urgent_latency: %f ns\n" @@ -1740,5 +1740,5 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) dc->dml.ip.bug_forcing_LC_req_same_size_fixed = dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes; dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency; - kernel_fpu_end(); + DC_FP_END(); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 6b2f2f1..b859b73 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1,5 +1,6 @@ /* * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -3095,7 +3096,7 @@ static void update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb) { - kernel_fpu_begin(); + DC_FP_START(); if ((int)(bb->sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns && dc->bb_overrides.sr_exit_time_ns) { bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; @@ -3119,7 +3120,7 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s bb->dram_clock_change_latency_us = dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } - kernel_fpu_end(); + DC_FP_END(); } static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 30ec80a..938735b 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -1,5 +1,6 @@ /* * Copyright 2012-16 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -50,6 +51,8 @@ #if defined(CONFIG_DRM_AMD_DC_DCN1_0) #include +#define DC_FP_START() kernel_fpu_begin() +#define DC_FP_END() kernel_fpu_end() #endif /* -- 2.24.0 From 0d92576da4f45d47938fecf8bac79f89a52a57d8 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sat, 7 Dec 2019 16:47:46 -0600 Subject: [PATCH 2/3] amdgpu: Enable initial DCN support on POWER DCN requires floating point support to operate. Add the appropriate x86/ppc64 guards and FPU / AltiVec / VSX context switches to DCN. Note that the current DC20 code doesn't contain all required FPU wrappers on x86 or POWER, so this patch is insufficient to fully enable DC20 on POWER. v2: s/X86_64/X86/g to retain previous behavior. Signed-off-by: Timothy Pearson Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 8 ++--- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 9 ++++++ .../gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 8 +++++ drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 8 +++++ drivers/gpu/drm/amd/display/dc/dml/Makefile | 9 ++++++ drivers/gpu/drm/amd/display/dc/dsc/Makefile | 8 +++++ drivers/gpu/drm/amd/display/dc/os_types.h | 29 +++++++++++++++++++ 8 files changed, 76 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 71991a2..72e5a08 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN1_0 if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and @@ -20,7 +20,7 @@ config DRM_AMD_DC_DCN1_0 config DRM_AMD_DC_DCN2_0 bool "DCN 2.0 family" default y - depends on DRM_AMD_DC && X86 + depends on DRM_AMD_DC && (X86 || PPC64) depends on DRM_AMD_DC_DCN1_0 help Choose this option if you want to have @@ -28,7 +28,7 @@ config DRM_AMD_DC_DCN2_0 config DRM_AMD_DC_DCN2_1 bool "DCN 2.1 family" - depends on DRM_AMD_DC && X86 + depends on DRM_AMD_DC && (X86 || PPC64) depends on DRM_AMD_DC_DCN2_0 help Choose this option if you want to have @@ -37,7 +37,7 @@ config DRM_AMD_DC_DCN2_1 config DRM_AMD_DC_DSC_SUPPORT bool "DSC support" default y - depends on DRM_AMD_DC && X86 + depends on DRM_AMD_DC && (X86 || PPC64) depends on DRM_AMD_DC_DCN1_0 depends on DRM_AMD_DC_DCN2_0 help diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 26c6d73..9112076 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -1,5 +1,6 @@ # # Copyright 2017 Advanced Micro Devices, Inc. +# Copyright 2019 Raptor Engineering, LLC # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -24,7 +25,13 @@ # It calculates Bandwidth and Watermarks values for HW programming # +ifdef CONFIG_X86 calcs_ccflags := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +calcs_ccflags := -mhard-float -maltivec +endif ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) @@ -32,6 +39,7 @@ IS_OLD_GCC = 1 endif endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -40,6 +48,7 @@ calcs_ccflags += -mpreferred-stack-boundary=4 else calcs_ccflags += -msse2 endif +endif CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_ccflags) CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index cd54712..e34ff2f 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -1,5 +1,6 @@ /* * Copyright 2017 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 63f3bdd..70011ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -10,7 +10,13 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT DCN20 += dcn20_dsc.o endif +ifdef CONFIG_X86 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec +endif ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) @@ -18,6 +24,7 @@ IS_OLD_GCC = 1 endif endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -26,6 +33,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 else CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 endif +endif AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index ff50ae7..2802998 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,7 +3,13 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o +ifdef CONFIG_X86 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec +endif ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) @@ -11,6 +17,7 @@ IS_OLD_GCC = 1 endif endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -19,6 +26,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 else CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 endif +endif AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 8df2516..74d61ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -1,5 +1,6 @@ # # Copyright 2017 Advanced Micro Devices, Inc. +# Copyright 2019 Raptor Engineering, LLC # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -24,7 +25,13 @@ # It provides the general basic services required by other DAL # subcomponents. +ifdef CONFIG_X86 dml_ccflags := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +dml_ccflags := -mhard-float -maltivec +endif ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) @@ -32,6 +39,7 @@ IS_OLD_GCC = 1 endif endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -40,6 +48,7 @@ dml_ccflags += -mpreferred-stack-boundary=4 else dml_ccflags += -msse2 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index 9707372..7415b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,7 +1,13 @@ # # Makefile for the 'dsc' sub-component of DAL. +ifdef CONFIG_X86 dsc_ccflags := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +dsc_ccflags := -mhard-float -maltivec +endif ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) @@ -9,6 +15,7 @@ IS_OLD_GCC = 1 endif endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -17,6 +24,7 @@ dsc_ccflags += -mpreferred-stack-boundary=4 else dsc_ccflags += -msse2 endif +endif CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags) CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 938735b..c687d64 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -50,9 +50,38 @@ #define dm_error(fmt, ...) DRM_ERROR(fmt, ##__VA_ARGS__) #if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#if defined(CONFIG_X86) #include #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() +#elif defined(CONFIG_PPC64) +#include +#include +#define DC_FP_START() { \ + if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \ + preempt_disable(); \ + enable_kernel_vsx(); \ + } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \ + preempt_disable(); \ + enable_kernel_altivec(); \ + } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \ + preempt_disable(); \ + enable_kernel_fp(); \ + } \ +} +#define DC_FP_END() { \ + if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \ + disable_kernel_vsx(); \ + preempt_enable(); \ + } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \ + disable_kernel_altivec(); \ + preempt_enable(); \ + } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \ + disable_kernel_fp(); \ + preempt_enable(); \ + } \ +} +#endif #endif /* -- 2.24.0 From 125b03f660dcb8277c7a034507afb728c3f4296a Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sat, 7 Dec 2019 16:48:09 -0600 Subject: [PATCH 3/3] amdgpu: Wrap FPU dependent functions in dc20 dc20 containes several FPU-dependent functions without proper FPU kernel mode enable/disable wrappers. Add the required wrappers for both x86 and POWER. This enables Navi DC20 support for POWER systems. v2: fix compilation Signed-off-by: Timothy Pearson Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b859b73..ece1cc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2773,14 +2773,19 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { + DC_FP_START(); + bool voltage_supported = false; bool full_pstate_supported = false; bool dummy_pstate_supported = false; double p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; - if (fast_validate) - return dcn20_validate_bandwidth_internal(dc, context, true); + if (fast_validate) { + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true); + DC_FP_END(); + return voltage_supported; + } // Best case, we support full UCLK switch latency voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); @@ -2810,6 +2815,7 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; + DC_FP_END(); return voltage_supported; } @@ -3320,6 +3326,8 @@ static bool construct( enum dml_project dml_project_version = get_dml_project_version(ctx->asic_id.hw_internal_rev); + DC_FP_START(); + ctx->dc_bios->regs = &bios_regs; pool->base.funcs = &dcn20_res_pool_funcs; @@ -3607,10 +3615,12 @@ static bool construct( dc->cap_funcs = cap_funcs; + DC_FP_END(); return true; create_fail: + DC_FP_END(); destruct(pool); return false; -- 2.24.0