diff mbox series

[v17,3/4] pwm: Add support for RZ/G2L GPT

Message ID 20231120113307.80710-4-biju.das.jz@bp.renesas.com (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show
Series Add support for RZ/G2L GPT | expand

Commit Message

Biju Das Nov. 20, 2023, 11:33 a.m. UTC
RZ/G2L General PWM Timer (GPT) composed of 8 channels with 32-bit timer
(GPT32E). It supports the following functions
 * 32 bits × 8 channels
 * Up-counting or down-counting (saw waves) or up/down-counting
   (triangle waves) for each counter.
 * Clock sources independently selectable for each channel
 * Two I/O pins per channel
 * Two output compare/input capture registers per channel
 * For the two output compare/input capture registers of each channel,
   four registers are provided as buffer registers and are capable of
   operating as comparison registers when buffering is not in use.
 * In output compare operation, buffer switching can be at crests or
   troughs, enabling the generation of laterally asymmetric PWM waveforms.
 * Registers for setting up frame cycles in each channel (with capability
   for generating interrupts at overflow or underflow)
 * Generation of dead times in PWM operation
 * Synchronous starting, stopping and clearing counters for arbitrary
   channels
 * Starting, stopping, clearing and up/down counters in response to input
   level comparison
 * Starting, clearing, stopping and up/down counters in response to a
   maximum of four external triggers
 * Output pin disable function by dead time error and detected
   short-circuits between output pins
 * A/D converter start triggers can be generated (GPT32E0 to GPT32E3)
 * Enables the noise filter for input capture and external trigger
   operation

Add basic pwm support for RZ/G2L GPT driver by creating separate
logical channels for each IOs.

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
---
v16->v17:
* Added ret = dev_err_probe() to avoid return success in probe().
* Dropped unneeded MODULE_ALIAS().
* Dropped .owner from struct rzg2l_gpt_ops.
* Fixed build issue reported by kernel test robot <lkp@intel.com> by
  replacing DIV_ROUND_UP()->DIV64_U64_ROUND_UP() in
  rzg2l_gpt_calculate_pv_or_dc().
* Added max_val to struct rzg2l_gpt_chip to compute maximum period
  supported by the HW in probe() and limit its value in apply() to
  avoid 64-bit overflow with computation.
* Added helper function calculate_period_or_duty() to avoid losing
  precision for smaller period/duty cycle values
  ((2^32 * 10^9 << 2) < 2^64), by not processing the rounded values.
* Replaced mul_u64_u64_div_u64()->mul_u64_u32_div() as the former is
  giving warnings with CONFIG_PWM_DEBUG enabled for very high values. 
  eg:
	echo "###### Medium setting 11000 sec = 3hours ##"
	echo 11000000000000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
	echo  5500000000000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
	dumpgptreg
	
	echo "###### High setting##"
	echo 43980465100800 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
	echo 23980465100800 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
	dumpgptreg

	with mul_u64_u32_div():
	###### Medium setting 11000 sec = 3hours ##
	Read at address  0x10048464 (0xffffb9426464): 0x400746FE
	Read at address  0x1004844C (0xffff8fdfb44c): 0x2003A37F
	Read at address  0x1004842C (0xffff9855b42c): 0x05000001
	###### High setting##
	Read at address  0x10048464 (0xffff9101b464): 0xFFFFFFFF
	Read at address  0x1004844C (0xffffaee0544c): 0x8B95AD77
	Read at address  0x1004842C (0xffffbbc8a42c): 0x05000001

	with mul_u64_u64_div_u64():
	###### Medium setting 11000 sec = 3hours ##
	Read at address  0x10048464 (0xffffb3185464): 0x400746FE
	Read at address  0x1004844C (0xffff81ebb44c): 0x2003A37F
	Read at address  0x1004842C (0xffff904fd42c): 0x05000001
	######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0 5500000000000/43980239923200)
	 High setting##
	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0 23980465100800/43980239923200)
	Read at address  0x10048464 (0xffffb5bb3464): 0xFFFFAA19
	Read at address  0x1004844C (0xffff99b8c44c): 0x8B95AD77
	Read at address  0x1004842C (0xffffbba2342c): 0x05000001
v15->v16:
* Replaced the macro DIV_ROUND_UP_ULL->DIV64_U64_ROUND_UP
* Added DIV_ROUND_UP in rzg2l_gpt_calculate_pv_or_dc() to avoid loss of
  precision.
* Replaced min->min_t() in rzg2l_gpt_calculate_pv_or_dc().
* Added a comment for rzg2l_gpt_config()
* Replaced mul_u64_u32_div()->mul_u64_u64_div_u64() in rzg2l_gpt_config()
* Fixed the logical condition related to counter stop in
  rzg2l_gpt_config().
* Dropped pm_runtime_resume_*() from rzg2l_gpt_config() as it is managed
  by rzg2l_gpt_apply().
* Moved pm_runtime_resume_*() from rzg2l_gpt_{en,dis}able() to
  rzg2l_gpt_apply().
v14->v15:
* Added enable_count and ch_en_bits variables to struct rzg2l_gpt_chip
  based on feedback for pwm_mtu3 driver.
* Updated copyright header and commit description by replacing "This patch
  adds"-> "Add"
* Replaced macro RZG2L_GET_CH_INDEX->RZG2L_GET_CH and replaced ch_index->ch
  throughout
* rzg2l_gpt_{enable,disable}() enables/disables PWM based on the
  enable_count.
* Replaced pm_runtime_get_sync->pm_runtime_resume_and_get and propogated
  the error in rzg2l_gpt_get_state() and rzg2l_gpt_config()
* Reduced variable scope in rzg2l_gpt_get_state() by moving most of variables
  inside the if statement.
* Updated rzg2l_gpt_get_state() by moving duty > period check
  inside the top if block.
* Added helper functions rzg2l_gpt_calculate_pv_or_dc()to simplify config. 
  Also Improved the logic in rzg2l_gpt_calculate_pv_or_dc() by using
  min(period_or_duty_cycle >> (2 * prescale), (u64)U32_MAX);
* Updated rzg2l_gpt_get_state() by moving duty > period check
  inside the top if block.
* Simplified rzg2l_gpt_config() for updating registers
* Dropped pm_runtime_get_sync() and used bitmap variable "ch_en_bits"
  to make balanced PM usage count in rzg2l_gpt_reset_assert_pm_disable()
  For case were unbind is called before apply where pwm is enabled by
  bootloader.
* Added error check for clk_rate_exclusive_get() and clk_get_rate() in
  probe().
* Dropped prescale from struct rzg2l_gpt_chip.
* Replaced of_match_ptr(rzg2l_gpt_of_table)->rzg2l_gpt_of_table in struct
  rzg2l_gpt_driver
v13->v14:
 * Removed parenthesis for RZG2L_MAX_HW_CHANNELS and RZG2L_CHANNELS_PER_IO
 * Removed duty_cycle variable from struct rzg2l_gpt_chip and added comment
   for cache for prescale variable.
 * Fixed a bug in rzg2l_gpt_cntr_need_stop().
 * Reordered rzg2l_gpt_config() just above apply()
 * Replaced pwm_is_enabled()->pwm->state.enabled in config
 * Replaced pm_runtime_resume_and_get with unconditional pm_runtime_get_sync()
   in config().
 * Restored duty_cycle > period check in rzg2l_gpt_get_state().
 * Added error check for clk_prepare_enable() in probe() and propagating error
   to the caller for pm_runtime_resume()
 * clk_get_rate() is called after enabling the clock and clk_rate_exclusive_get()
 * Simplified rzg2l_gpt_probe() by removing bitmap variables.
 * Added pm_runtime_idle() to suspend the device during probe.
 * Moved overflow condition check from config->probe().
 * Simplified rzg2l_gpt_reset_assert_pm_disable().
v12->v13:
 * Replaced Kconfig dependency from ARCH_RENESAS->ARCH_RZG2L
 * Sorted #include <linux/limits.h> alphabetically
 * Added a comment for mutex_lock to fix check patch warning
 * Replaced data type of duty_cycle from unsigned int->u32 as
   the maximum value stored is U32_MAX.
 * Improved rzg2l_gpt_config() by removing unwanted duty_cycle related code.
 * Improved rzg2l_gpt_get_state() by setting "val = rzg2l_gpt->duty_cycle[pwm->hwpwm];", 
   and factor "tmp = NSEC_PER_SEC * (u64)val;" out of the if-statement.
 * Started using DEFINE_RUNTIME_DEV_PM_OPS(), and dropped __maybe_unused
   from the callbacks.
v11->v12:
 * Added return code for get_state()
 * Cache duty cycle/prescale as the driver cannot read the current duty
   cycle/prescale from the hardware if the hardware is disabled. Cache the
   last programmed duty cycle/prescale value to return in that case.
 * Updated rzg2l_gpt_enable to enable the clocks.
 * Updated rzg2l_gpt_disable to disable the clocks.
 * Updated rzg2l_gpt_config() to cache duty cucle/prescale value
 * Updated rzg2l_gpt_get_state to use cached value of duty cycle/prescale,If the PWM
   is disabled.
 * Simplified rzg2l_gpt_apply()
 * Added comments in rzg2l_gpt_reset_assert_pm_disable()
v10->v11:
 * Used bitmap_zero for initializing bitmap varable.
 * Fixed clock imbalance during remove for the case bootloader turning
   on PWM and module unload is called just after the boot.
 * Fixed over flow condition in get_state() for a prescale value of 2 & more.
 * Improved rzg2l_gpt_cntr_need_stop() based on prescale as it is the
   only runtime variable.
 * Added array for Cache variables state_period and prescale
 * Probe caches the prescale value set by the bootloader.
 * Updated rzg2l_gpt_config() to make use of array variables.
v9->v10:
 * Updated the error handling in probe(), clk_disable_unprepare called
   on the error path.
 * Removed ch_en array and started using bitmask instead.
v8->v9:
 * deassert after devm_clk_get() to avoid reset stays deasserted,in case
   clk_get() fails.
 * Removed ch_offs from struct rzg2l_gpt_chip and use macro instead.
 * Updated error handling in probe()
v7->v8:
 * Modelled as single PWM device handling multiple channels
 * Replaced shared reset->devm_reset_control_get_exclusive()
 * Replaced iowrite32->writel and ioread32->readl
 * Updated prescale calculation
 * Added PM runtime callbacks
 * Updated PM handling and removed "pwm_enabled_by_bootloader" variable
 * Introduced rzg2l_gpt_is_ch_enabled for checking enable status on both
   IO's
 * Moved enable/disable output pins from config->enable/disable.
 * Added rzg2l_gpt_cntr_need_stop() for caching prescalar/mode values.
v6->v7:
 * Added the comment for cacheing rzg2l_gpt->state_period.
 * Fixed boundary values for pv and dc.
 * Added comment for modifying mode, prescaler, timer counter and buffer enable
   registers.
 * Fixed buffer overflow in get_state()
 * Removed unnecessary assignment of state->period value in get_state().
 * Fixed state->duty_cycle value in get_state().
 * Added a limitation for disabling the channels.
v5->v6:
 * Updated macros RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH and
   RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH with computation
   involving FIELD_PREP macro.
 * Removed struct rzg2l_gpt_phase and started using RZG2L_GTCCR macro
   for duty_offset.
 * replaced misnomer real_period->state_period.
 * Added handling for values >= (1024 << 32) for both period
   and duty cycle.
 * Added comments for pwm {en,dis}abled by bootloader during probe.
v4->v5:
 * Added Hardware manual details
 * Replaced the comment GTCNT->Counter
 * Removed the macros RZG2L_GPT_IO_PER_CHANNEL and chip.npwm directly
   used in probe.
 * Removed the unsed macro RZG2L_GTPR_MAX_VALUE
 * Added driver prefix for the type name and the variable.
 * Initialization of per_channel data moved from request->probe.
 * Updated clr parameter for rzg2l_gpt_modify for Start count.
 * Started using mutex and usage_count for handling shared
   period and prescalar for the 2 channels.
 * Updated the comment cycle->period.
 * Removed clk_disable from rzg2l_gpt_reset_assert_pm_disable()
 * Replaced pc->rzg2l_gpt.
 * Updated prescale calculation.
 * Moved pm_runtime_{get_sync,put} from {request,free}->{enable,disable}
 * Removed platform_set_drvdata as it is unused
 * Removed the variable pwm_enabled_by_bootloader 
 * Added dev_err_probe in various error paths in probe.
 * Added an error message, if devm_pwmchip_add() fails.
v3->v4:
 * Changed the local variable type i from u16->u8 and prescaled_period_
   cycles from u64->u32 in calculate_prescale().
 * Replaced mul_u64_u64_div_u64()->mul_u64_u32_div()
 * Dropped the comma after the sentinel.
 * Add a variable to track pwm enabled by bootloader and added comments
   in probe().
 * Removed unnecessary rzg2l_gpt_reset_assert_pm_disable() from probe.
 * Replaced devm_clk_get()->devm_clk_get_prepared()
 * Removed devm_clk_get_optional_enabled()
v2->v3:
 * Updated limitation section
 * Added prefix "RZG2L_" for all macros
 * Modified prescale calculation
 * Removed pwm_set_chip_data
 * Updated comment related to modifying Mode and Prescaler
 * Updated setting of prescale value in rzg2l_gpt_config()
 * Removed else branch from rzg2l_gpt_get_state()
 * removed the err label from rzg2l_gpt_apply()
 * Added devm_clk_get_optional_enabled() to retain clk on status,
   in case bootloader turns on the clk of pwm.
 * Replaced devm_reset_control_get_exclusive->devm_reset_control_get_shared
   as single reset shared between 8 channels.
v1->v2:
 * Added Limitations section
 * dropped "_MASK" from the define names.
 * used named initializer for struct phase
 * Added gpt_pwm_device into a flexible array member in rzg2l_gpt_chip
 * Revised the logic for prescale
 * Added .get_state callback
 * Improved error handling in rzg2l_gpt_apply
 * Removed .remove callback
 * Tested driver with PWM_DEBUG enabled
RFC->V1:
 * Updated macros
 * replaced rzg2l_gpt_write_mask()->rzg2l_gpt_modify()
 * Added rzg2l_gpt_read()
---
 drivers/pwm/Kconfig         |  11 +
 drivers/pwm/Makefile        |   1 +
 drivers/pwm/pwm-rzg2l-gpt.c | 572 ++++++++++++++++++++++++++++++++++++
 3 files changed, 584 insertions(+)
 create mode 100644 drivers/pwm/pwm-rzg2l-gpt.c

Comments

Biju Das Dec. 4, 2023, 2:55 p.m. UTC | #1
Hi Uwe,

Gentle ping. Are you happy with this patch?

Please let me know.

Cheers,
Biju

> -----Original Message-----
> From: Biju Das <biju.das.jz@bp.renesas.com>
> Sent: Monday, November 20, 2023 11:33 AM
> Subject: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> RZ/G2L General PWM Timer (GPT) composed of 8 channels with 32-bit timer
> (GPT32E). It supports the following functions
>  * 32 bits × 8 channels
>  * Up-counting or down-counting (saw waves) or up/down-counting
>    (triangle waves) for each counter.
>  * Clock sources independently selectable for each channel
>  * Two I/O pins per channel
>  * Two output compare/input capture registers per channel
>  * For the two output compare/input capture registers of each channel,
>    four registers are provided as buffer registers and are capable of
>    operating as comparison registers when buffering is not in use.
>  * In output compare operation, buffer switching can be at crests or
>    troughs, enabling the generation of laterally asymmetric PWM waveforms.
>  * Registers for setting up frame cycles in each channel (with capability
>    for generating interrupts at overflow or underflow)
>  * Generation of dead times in PWM operation
>  * Synchronous starting, stopping and clearing counters for arbitrary
>    channels
>  * Starting, stopping, clearing and up/down counters in response to input
>    level comparison
>  * Starting, clearing, stopping and up/down counters in response to a
>    maximum of four external triggers
>  * Output pin disable function by dead time error and detected
>    short-circuits between output pins
>  * A/D converter start triggers can be generated (GPT32E0 to GPT32E3)
>  * Enables the noise filter for input capture and external trigger
>    operation
> 
> Add basic pwm support for RZ/G2L GPT driver by creating separate logical
> channels for each IOs.
> 
> Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
> ---
> v16->v17:
> * Added ret = dev_err_probe() to avoid return success in probe().
> * Dropped unneeded MODULE_ALIAS().
> * Dropped .owner from struct rzg2l_gpt_ops.
> * Fixed build issue reported by kernel test robot <lkp@intel.com> by
>   replacing DIV_ROUND_UP()->DIV64_U64_ROUND_UP() in
>   rzg2l_gpt_calculate_pv_or_dc().
> * Added max_val to struct rzg2l_gpt_chip to compute maximum period
>   supported by the HW in probe() and limit its value in apply() to
>   avoid 64-bit overflow with computation.
> * Added helper function calculate_period_or_duty() to avoid losing
>   precision for smaller period/duty cycle values
>   ((2^32 * 10^9 << 2) < 2^64), by not processing the rounded values.
> * Replaced mul_u64_u64_div_u64()->mul_u64_u32_div() as the former is
>   giving warnings with CONFIG_PWM_DEBUG enabled for very high values.
>   eg:
> 	echo "###### Medium setting 11000 sec = 3hours ##"
> 	echo 11000000000000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
> 	echo  5500000000000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
> 	dumpgptreg
> 
> 	echo "###### High setting##"
> 	echo 43980465100800 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
> 	echo 23980465100800 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
> 	dumpgptreg
> 
> 	with mul_u64_u32_div():
> 	###### Medium setting 11000 sec = 3hours ##
> 	Read at address  0x10048464 (0xffffb9426464): 0x400746FE
> 	Read at address  0x1004844C (0xffff8fdfb44c): 0x2003A37F
> 	Read at address  0x1004842C (0xffff9855b42c): 0x05000001
> 	###### High setting##
> 	Read at address  0x10048464 (0xffff9101b464): 0xFFFFFFFF
> 	Read at address  0x1004844C (0xffffaee0544c): 0x8B95AD77
> 	Read at address  0x1004842C (0xffffbbc8a42c): 0x05000001
> 
> 	with mul_u64_u64_div_u64():
> 	###### Medium setting 11000 sec = 3hours ##
> 	Read at address  0x10048464 (0xffffb3185464): 0x400746FE
> 	Read at address  0x1004844C (0xffff81ebb44c): 0x2003A37F
> 	Read at address  0x1004842C (0xffff904fd42c): 0x05000001
> 	######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0
> 5500000000000/43980239923200)
> 	 High setting##
> 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent
> (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> 23980465100800/43980239923200)
> 	Read at address  0x10048464 (0xffffb5bb3464): 0xFFFFAA19
> 	Read at address  0x1004844C (0xffff99b8c44c): 0x8B95AD77
> 	Read at address  0x1004842C (0xffffbba2342c): 0x05000001
> v15->v16:
> * Replaced the macro DIV_ROUND_UP_ULL->DIV64_U64_ROUND_UP
> * Added DIV_ROUND_UP in rzg2l_gpt_calculate_pv_or_dc() to avoid loss of
>   precision.
> * Replaced min->min_t() in rzg2l_gpt_calculate_pv_or_dc().
> * Added a comment for rzg2l_gpt_config()
> * Replaced mul_u64_u32_div()->mul_u64_u64_div_u64() in rzg2l_gpt_config()
> * Fixed the logical condition related to counter stop in
>   rzg2l_gpt_config().
> * Dropped pm_runtime_resume_*() from rzg2l_gpt_config() as it is managed
>   by rzg2l_gpt_apply().
> * Moved pm_runtime_resume_*() from rzg2l_gpt_{en,dis}able() to
>   rzg2l_gpt_apply().
> v14->v15:
> * Added enable_count and ch_en_bits variables to struct rzg2l_gpt_chip
>   based on feedback for pwm_mtu3 driver.
> * Updated copyright header and commit description by replacing "This patch
>   adds"-> "Add"
> * Replaced macro RZG2L_GET_CH_INDEX->RZG2L_GET_CH and replaced ch_index-
> >ch
>   throughout
> * rzg2l_gpt_{enable,disable}() enables/disables PWM based on the
>   enable_count.
> * Replaced pm_runtime_get_sync->pm_runtime_resume_and_get and propogated
>   the error in rzg2l_gpt_get_state() and rzg2l_gpt_config()
> * Reduced variable scope in rzg2l_gpt_get_state() by moving most of
> variables
>   inside the if statement.
> * Updated rzg2l_gpt_get_state() by moving duty > period check
>   inside the top if block.
> * Added helper functions rzg2l_gpt_calculate_pv_or_dc()to simplify config.
>   Also Improved the logic in rzg2l_gpt_calculate_pv_or_dc() by using
>   min(period_or_duty_cycle >> (2 * prescale), (u64)U32_MAX);
> * Updated rzg2l_gpt_get_state() by moving duty > period check
>   inside the top if block.
> * Simplified rzg2l_gpt_config() for updating registers
> * Dropped pm_runtime_get_sync() and used bitmap variable "ch_en_bits"
>   to make balanced PM usage count in rzg2l_gpt_reset_assert_pm_disable()
>   For case were unbind is called before apply where pwm is enabled by
>   bootloader.
> * Added error check for clk_rate_exclusive_get() and clk_get_rate() in
>   probe().
> * Dropped prescale from struct rzg2l_gpt_chip.
> * Replaced of_match_ptr(rzg2l_gpt_of_table)->rzg2l_gpt_of_table in struct
>   rzg2l_gpt_driver
> v13->v14:
>  * Removed parenthesis for RZG2L_MAX_HW_CHANNELS and RZG2L_CHANNELS_PER_IO
>  * Removed duty_cycle variable from struct rzg2l_gpt_chip and added
> comment
>    for cache for prescale variable.
>  * Fixed a bug in rzg2l_gpt_cntr_need_stop().
>  * Reordered rzg2l_gpt_config() just above apply()
>  * Replaced pwm_is_enabled()->pwm->state.enabled in config
>  * Replaced pm_runtime_resume_and_get with unconditional
> pm_runtime_get_sync()
>    in config().
>  * Restored duty_cycle > period check in rzg2l_gpt_get_state().
>  * Added error check for clk_prepare_enable() in probe() and propagating
> error
>    to the caller for pm_runtime_resume()
>  * clk_get_rate() is called after enabling the clock and
> clk_rate_exclusive_get()
>  * Simplified rzg2l_gpt_probe() by removing bitmap variables.
>  * Added pm_runtime_idle() to suspend the device during probe.
>  * Moved overflow condition check from config->probe().
>  * Simplified rzg2l_gpt_reset_assert_pm_disable().
> v12->v13:
>  * Replaced Kconfig dependency from ARCH_RENESAS->ARCH_RZG2L
>  * Sorted #include <linux/limits.h> alphabetically
>  * Added a comment for mutex_lock to fix check patch warning
>  * Replaced data type of duty_cycle from unsigned int->u32 as
>    the maximum value stored is U32_MAX.
>  * Improved rzg2l_gpt_config() by removing unwanted duty_cycle related
> code.
>  * Improved rzg2l_gpt_get_state() by setting "val = rzg2l_gpt-
> >duty_cycle[pwm->hwpwm];",
>    and factor "tmp = NSEC_PER_SEC * (u64)val;" out of the if-statement.
>  * Started using DEFINE_RUNTIME_DEV_PM_OPS(), and dropped __maybe_unused
>    from the callbacks.
> v11->v12:
>  * Added return code for get_state()
>  * Cache duty cycle/prescale as the driver cannot read the current duty
>    cycle/prescale from the hardware if the hardware is disabled. Cache the
>    last programmed duty cycle/prescale value to return in that case.
>  * Updated rzg2l_gpt_enable to enable the clocks.
>  * Updated rzg2l_gpt_disable to disable the clocks.
>  * Updated rzg2l_gpt_config() to cache duty cucle/prescale value
>  * Updated rzg2l_gpt_get_state to use cached value of duty
> cycle/prescale,If the PWM
>    is disabled.
>  * Simplified rzg2l_gpt_apply()
>  * Added comments in rzg2l_gpt_reset_assert_pm_disable()
> v10->v11:
>  * Used bitmap_zero for initializing bitmap varable.
>  * Fixed clock imbalance during remove for the case bootloader turning
>    on PWM and module unload is called just after the boot.
>  * Fixed over flow condition in get_state() for a prescale value of 2 &
> more.
>  * Improved rzg2l_gpt_cntr_need_stop() based on prescale as it is the
>    only runtime variable.
>  * Added array for Cache variables state_period and prescale
>  * Probe caches the prescale value set by the bootloader.
>  * Updated rzg2l_gpt_config() to make use of array variables.
> v9->v10:
>  * Updated the error handling in probe(), clk_disable_unprepare called
>    on the error path.
>  * Removed ch_en array and started using bitmask instead.
> v8->v9:
>  * deassert after devm_clk_get() to avoid reset stays deasserted,in case
>    clk_get() fails.
>  * Removed ch_offs from struct rzg2l_gpt_chip and use macro instead.
>  * Updated error handling in probe()
> v7->v8:
>  * Modelled as single PWM device handling multiple channels
>  * Replaced shared reset->devm_reset_control_get_exclusive()
>  * Replaced iowrite32->writel and ioread32->readl
>  * Updated prescale calculation
>  * Added PM runtime callbacks
>  * Updated PM handling and removed "pwm_enabled_by_bootloader" variable
>  * Introduced rzg2l_gpt_is_ch_enabled for checking enable status on both
>    IO's
>  * Moved enable/disable output pins from config->enable/disable.
>  * Added rzg2l_gpt_cntr_need_stop() for caching prescalar/mode values.
> v6->v7:
>  * Added the comment for cacheing rzg2l_gpt->state_period.
>  * Fixed boundary values for pv and dc.
>  * Added comment for modifying mode, prescaler, timer counter and buffer
> enable
>    registers.
>  * Fixed buffer overflow in get_state()
>  * Removed unnecessary assignment of state->period value in get_state().
>  * Fixed state->duty_cycle value in get_state().
>  * Added a limitation for disabling the channels.
> v5->v6:
>  * Updated macros RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH and
>    RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH with computation
>    involving FIELD_PREP macro.
>  * Removed struct rzg2l_gpt_phase and started using RZG2L_GTCCR macro
>    for duty_offset.
>  * replaced misnomer real_period->state_period.
>  * Added handling for values >= (1024 << 32) for both period
>    and duty cycle.
>  * Added comments for pwm {en,dis}abled by bootloader during probe.
> v4->v5:
>  * Added Hardware manual details
>  * Replaced the comment GTCNT->Counter
>  * Removed the macros RZG2L_GPT_IO_PER_CHANNEL and chip.npwm directly
>    used in probe.
>  * Removed the unsed macro RZG2L_GTPR_MAX_VALUE
>  * Added driver prefix for the type name and the variable.
>  * Initialization of per_channel data moved from request->probe.
>  * Updated clr parameter for rzg2l_gpt_modify for Start count.
>  * Started using mutex and usage_count for handling shared
>    period and prescalar for the 2 channels.
>  * Updated the comment cycle->period.
>  * Removed clk_disable from rzg2l_gpt_reset_assert_pm_disable()
>  * Replaced pc->rzg2l_gpt.
>  * Updated prescale calculation.
>  * Moved pm_runtime_{get_sync,put} from {request,free}->{enable,disable}
>  * Removed platform_set_drvdata as it is unused
>  * Removed the variable pwm_enabled_by_bootloader
>  * Added dev_err_probe in various error paths in probe.
>  * Added an error message, if devm_pwmchip_add() fails.
> v3->v4:
>  * Changed the local variable type i from u16->u8 and prescaled_period_
>    cycles from u64->u32 in calculate_prescale().
>  * Replaced mul_u64_u64_div_u64()->mul_u64_u32_div()
>  * Dropped the comma after the sentinel.
>  * Add a variable to track pwm enabled by bootloader and added comments
>    in probe().
>  * Removed unnecessary rzg2l_gpt_reset_assert_pm_disable() from probe.
>  * Replaced devm_clk_get()->devm_clk_get_prepared()
>  * Removed devm_clk_get_optional_enabled()
> v2->v3:
>  * Updated limitation section
>  * Added prefix "RZG2L_" for all macros
>  * Modified prescale calculation
>  * Removed pwm_set_chip_data
>  * Updated comment related to modifying Mode and Prescaler
>  * Updated setting of prescale value in rzg2l_gpt_config()
>  * Removed else branch from rzg2l_gpt_get_state()
>  * removed the err label from rzg2l_gpt_apply()
>  * Added devm_clk_get_optional_enabled() to retain clk on status,
>    in case bootloader turns on the clk of pwm.
>  * Replaced devm_reset_control_get_exclusive-
> >devm_reset_control_get_shared
>    as single reset shared between 8 channels.
> v1->v2:
>  * Added Limitations section
>  * dropped "_MASK" from the define names.
>  * used named initializer for struct phase
>  * Added gpt_pwm_device into a flexible array member in rzg2l_gpt_chip
>  * Revised the logic for prescale
>  * Added .get_state callback
>  * Improved error handling in rzg2l_gpt_apply
>  * Removed .remove callback
>  * Tested driver with PWM_DEBUG enabled
> RFC->V1:
>  * Updated macros
>  * replaced rzg2l_gpt_write_mask()->rzg2l_gpt_modify()
>  * Added rzg2l_gpt_read()
> ---
>  drivers/pwm/Kconfig         |  11 +
>  drivers/pwm/Makefile        |   1 +
>  drivers/pwm/pwm-rzg2l-gpt.c | 572 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 584 insertions(+)
>  create mode 100644 drivers/pwm/pwm-rzg2l-gpt.c
> 
> diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index
> 4b956d661755..bf658bb472f5 100644
> --- a/drivers/pwm/Kconfig
> +++ b/drivers/pwm/Kconfig
> @@ -513,6 +513,17 @@ config PWM_ROCKCHIP
>  	  Generic PWM framework driver for the PWM controller found on
>  	  Rockchip SoCs.
> 
> +config PWM_RZG2L_GPT
> +	tristate "Renesas RZ/G2L General PWM Timer support"
> +	depends on ARCH_RZG2L || COMPILE_TEST
> +	depends on HAS_IOMEM
> +	help
> +	  This driver exposes the General PWM Timer controller found in
> Renesas
> +	  RZ/G2L like chips through the PWM API.
> +
> +	  To compile this driver as a module, choose M here: the module
> +	  will be called pwm-rzg2l-gpt.
> +
>  config PWM_RZ_MTU3
>  	tristate "Renesas RZ/G2L MTU3a PWM Timer support"
>  	depends on RZ_MTU3
> diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index
> c5ec9e168ee7..50a6520363aa 100644
> --- a/drivers/pwm/Makefile
> +++ b/drivers/pwm/Makefile
> @@ -47,6 +47,7 @@ obj-$(CONFIG_PWM_RASPBERRYPI_POE)	+= pwm-raspberrypi-
> poe.o
>  obj-$(CONFIG_PWM_RCAR)		+= pwm-rcar.o
>  obj-$(CONFIG_PWM_RENESAS_TPU)	+= pwm-renesas-tpu.o
>  obj-$(CONFIG_PWM_ROCKCHIP)	+= pwm-rockchip.o
> +obj-$(CONFIG_PWM_RZG2L_GPT)	+= pwm-rzg2l-gpt.o
>  obj-$(CONFIG_PWM_RZ_MTU3)	+= pwm-rz-mtu3.o
>  obj-$(CONFIG_PWM_SAMSUNG)	+= pwm-samsung.o
>  obj-$(CONFIG_PWM_SIFIVE)	+= pwm-sifive.o
> diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c new
> file mode 100644 index 000000000000..428e6e577db6
> --- /dev/null
> +++ b/drivers/pwm/pwm-rzg2l-gpt.c
> @@ -0,0 +1,572 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Renesas RZ/G2L General PWM Timer (GPT) driver
> + *
> + * Copyright (C) 2023 Renesas Electronics Corporation
> + *
> + * Hardware manual for this IP can be found here
> + *
> +https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-use
> +rs-manual-hardware-0?language=en
> + *
> + * Limitations:
> + * - Counter must be stopped before modifying Mode and Prescaler.
> + * - When PWM is disabled, the output is driven to inactive.
> + * - While the hardware supports both polarities, the driver (for now)
> + *   only handles normal polarity.
> + * - When both channels are used, disabling the channel on one stops the
> + *   other.
> + */
> +
> +#include <linux/bitfield.h>
> +#include <linux/clk.h>
> +#include <linux/io.h>
> +#include <linux/limits.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/pwm.h>
> +#include <linux/reset.h>
> +#include <linux/time.h>
> +
> +#define RZG2L_GTCR		0x2c
> +#define RZG2L_GTUDDTYC		0x30
> +#define RZG2L_GTIOR		0x34
> +#define RZG2L_GTBER		0x40
> +#define RZG2L_GTCNT		0x48
> +#define RZG2L_GTCCRA		0x4c
> +#define RZG2L_GTCCRB		0x50
> +#define RZG2L_GTPR		0x64
> +
> +#define RZG2L_GTCR_CST		BIT(0)
> +#define RZG2L_GTCR_MD		GENMASK(18, 16)
> +#define RZG2L_GTCR_TPCS		GENMASK(26, 24)
> +
> +#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE	FIELD_PREP(RZG2L_GTCR_MD, 0)
> +
> +#define RZG2L_GTUDDTYC_UP	BIT(0)
> +#define RZG2L_GTUDDTYC_UDF	BIT(1)
> +#define RZG2L_UP_COUNTING	(RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF)
> +
> +#define RZG2L_GTIOR_GTIOA	GENMASK(4, 0)
> +#define RZG2L_GTIOR_GTIOB	GENMASK(20, 16)
> +#define RZG2L_GTIOR_OAE		BIT(8)
> +#define RZG2L_GTIOR_OBE		BIT(24)
> +
> +#define RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE	0x07
> +#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE	0x1b
> +
> +#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \
> +	(RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE) #define
> +RZG2L_GTIOR_GTIOA_OUT_LO_END_TOGGLE_CMP_MATCH \
> +	(RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE | RZG2L_GTIOR_OAE) #define
> +RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \
> +	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE)
> |
> +RZG2L_GTIOR_OBE) #define RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH \
> +	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE)
> |
> +RZG2L_GTIOR_OBE)
> +
> +#define RZG2L_GTCCR(i) (0x4c + 4 * (i))
> +
> +#define RZG2L_MAX_HW_CHANNELS	8
> +#define RZG2L_CHANNELS_PER_IO	2
> +#define RZG2L_MAX_PWM_CHANNELS	(RZG2L_MAX_HW_CHANNELS *
> RZG2L_CHANNELS_PER_IO)
> +#define RZG2L_MAX_SCALE_FACTOR	1024
> +
> +#define RZG2L_IS_IOB(a)	((a) & 0x1)
> +#define RZG2L_GET_CH(a)	((a) / 2)
> +
> +#define RZG2L_GET_CH_OFFS(i) (0x100 * (i))
> +
> +struct rzg2l_gpt_chip {
> +	struct pwm_chip chip;
> +	void __iomem *mmio;
> +	struct reset_control *rstc;
> +	struct clk *clk;
> +	struct mutex lock; /* lock to protect shared channel resources */
> +	unsigned long rate;
> +	u64 max_val;
> +	u32 state_period[RZG2L_MAX_HW_CHANNELS];
> +	u32 user_count[RZG2L_MAX_HW_CHANNELS];
> +	u32 enable_count[RZG2L_MAX_HW_CHANNELS];
> +	DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS); };
> +
> +static inline struct rzg2l_gpt_chip *to_rzg2l_gpt_chip(struct pwm_chip
> +*chip) {
> +	return container_of(chip, struct rzg2l_gpt_chip, chip); }
> +
> +static void rzg2l_gpt_write(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg,
> +u32 data) {
> +	writel(data, rzg2l_gpt->mmio + reg);
> +}
> +
> +static u32 rzg2l_gpt_read(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg) {
> +	return readl(rzg2l_gpt->mmio + reg);
> +}
> +
> +static void rzg2l_gpt_modify(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg,
> u32 clr,
> +			     u32 set)
> +{
> +	rzg2l_gpt_write(rzg2l_gpt, reg,
> +			(rzg2l_gpt_read(rzg2l_gpt, reg) & ~clr) | set); }
> +
> +static u8 rzg2l_gpt_calculate_prescale(struct rzg2l_gpt_chip *rzg2l_gpt,
> +				       u64 period_cycles)
> +{
> +	u32 prescaled_period_cycles;
> +	u8 prescale;
> +
> +	prescaled_period_cycles = period_cycles >> 32;
> +	if (prescaled_period_cycles >= 256)
> +		prescale = 5;
> +	else
> +		prescale = (fls(prescaled_period_cycles) + 1) / 2;
> +
> +	return prescale;
> +}
> +
> +static int rzg2l_gpt_request(struct pwm_chip *chip, struct pwm_device
> +*pwm) {
> +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> +	u32 ch = RZG2L_GET_CH(pwm->hwpwm);
> +
> +	mutex_lock(&rzg2l_gpt->lock);
> +	rzg2l_gpt->user_count[ch]++;
> +	mutex_unlock(&rzg2l_gpt->lock);
> +
> +	return 0;
> +}
> +
> +static void rzg2l_gpt_free(struct pwm_chip *chip, struct pwm_device
> +*pwm) {
> +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> +	u32 ch = RZG2L_GET_CH(pwm->hwpwm);
> +
> +	mutex_lock(&rzg2l_gpt->lock);
> +	rzg2l_gpt->user_count[ch]--;
> +	mutex_unlock(&rzg2l_gpt->lock);
> +}
> +
> +static bool rzg2l_gpt_is_ch_enabled(struct rzg2l_gpt_chip *rzg2l_gpt,
> +u8 hwpwm) {
> +	u8 ch = RZG2L_GET_CH(hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +	bool is_counter_running, is_output_en;
> +	u32 val;
> +
> +	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR);
> +	is_counter_running = val & RZG2L_GTCR_CST;
> +
> +	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTIOR);
> +	if (RZG2L_IS_IOB(hwpwm))
> +		is_output_en = val & RZG2L_GTIOR_OBE;
> +	else
> +		is_output_en = val & RZG2L_GTIOR_OAE;
> +
> +	return (is_counter_running && is_output_en); }
> +
> +static int rzg2l_gpt_enable(struct rzg2l_gpt_chip *rzg2l_gpt,
> +			    struct pwm_device *pwm)
> +{
> +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +
> +	/* Enable pin output */
> +	if (RZG2L_IS_IOB(pwm->hwpwm))
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> +				 RZG2L_GTIOR_GTIOB | RZG2L_GTIOR_OBE,
> +				 RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH);
> +	else
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> +				 RZG2L_GTIOR_GTIOA | RZG2L_GTIOR_OAE,
> +				 RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH);
> +
> +	mutex_lock(&rzg2l_gpt->lock);
> +	if (!rzg2l_gpt->enable_count[ch])
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, 0,
> RZG2L_GTCR_CST);
> +
> +	rzg2l_gpt->enable_count[ch]++;
> +	mutex_unlock(&rzg2l_gpt->lock);
> +
> +	return 0;
> +}
> +
> +static void rzg2l_gpt_disable(struct rzg2l_gpt_chip *rzg2l_gpt,
> +			      struct pwm_device *pwm)
> +{
> +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +
> +	/* Disable pin output */
> +	if (RZG2L_IS_IOB(pwm->hwpwm))
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> RZG2L_GTIOR_OBE, 0);
> +	else
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> RZG2L_GTIOR_OAE, 0);
> +
> +	/* Stop count, Output low on GTIOCx pin when counting stops */
> +	mutex_lock(&rzg2l_gpt->lock);
> +	rzg2l_gpt->enable_count[ch]--;
> +	if (!rzg2l_gpt->enable_count[ch])
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_CST,
> 0);
> +
> +	mutex_unlock(&rzg2l_gpt->lock);
> +
> +	/*
> +	 * Probe() set these bits, if pwm is enabled by bootloader. In such
> +	 * case, clearing the bits will avoid errors during unbind.
> +	 */
> +	if (test_bit(pwm->hwpwm, rzg2l_gpt->ch_en_bits))
> +		clear_bit(pwm->hwpwm, rzg2l_gpt->ch_en_bits); }
> +
> +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt,
> +u32 val, u8 prescale) {
> +	u64 retval;
> +	u64 tmp;
> +
> +	tmp = NSEC_PER_SEC * (u64)val;
> +	/*
> +	 * To avoid losing precision for smaller period/duty cycle values
> +	 * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded values.
> +	 */
> +	if (prescale < 2)
> +		retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale), rzg2l_gpt-
> >rate);
> +	else
> +		retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2 *
> prescale);
> +
> +	return retval;
> +}
> +
> +static int rzg2l_gpt_get_state(struct pwm_chip *chip, struct pwm_device
> *pwm,
> +			       struct pwm_state *state)
> +{
> +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> +	int rc;
> +
> +	rc = pm_runtime_resume_and_get(chip->dev);
> +	if (rc)
> +		return rc;
> +
> +	state->enabled = rzg2l_gpt_is_ch_enabled(rzg2l_gpt, pwm->hwpwm);
> +	if (state->enabled) {
> +		u32 ch = RZG2L_GET_CH(pwm->hwpwm);
> +		u32 offs = RZG2L_GET_CH_OFFS(ch);
> +		u8 prescale;
> +		u32 val;
> +
> +		val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR);
> +		prescale = FIELD_GET(RZG2L_GTCR_TPCS, val);
> +
> +		val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTPR);
> +		state->period = calculate_period_or_duty(rzg2l_gpt, val,
> prescale);
> +
> +		val = rzg2l_gpt_read(rzg2l_gpt,
> +				     offs + RZG2L_GTCCR(RZG2L_IS_IOB(pwm-
> >hwpwm)));
> +		state->duty_cycle = calculate_period_or_duty(rzg2l_gpt, val,
> prescale);
> +		if (state->duty_cycle > state->period)
> +			state->duty_cycle = state->period;
> +	}
> +
> +	state->polarity = PWM_POLARITY_NORMAL;
> +	pm_runtime_put(chip->dev);
> +
> +	return 0;
> +}
> +
> +static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8
> +prescale) {
> +	return min_t(u64, DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 *
> prescale)),
> +		     (u64)U32_MAX);
> +}
> +
> +/* Caller holds the lock while calling rzg2l_gpt_config() */ static int
> +rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm,
> +			    const struct pwm_state *state)
> +{
> +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +	unsigned long pv, dc;
> +	u64 period_cycles;
> +	u64 duty_cycles;
> +	u8 prescale;
> +
> +	/*
> +	 * GPT counter is shared by multiple channels, so prescale and
> period
> +	 * can NOT be modified when there are multiple channels in use with
> +	 * different settings.
> +	 */
> +	if (state->period != rzg2l_gpt->state_period[ch] && rzg2l_gpt-
> >user_count[ch] > 1)
> +		return -EBUSY;
> +
> +	/* Limit period/duty cycle to max value supported by the HW */
> +	if (state->period > rzg2l_gpt->max_val)
> +		period_cycles = rzg2l_gpt->max_val;
> +	else
> +		period_cycles = state->period;
> +
> +	period_cycles = mul_u64_u32_div(period_cycles, rzg2l_gpt->rate,
> NSEC_PER_SEC);
> +	prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles);
> +
> +	pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale);
> +
> +	if (state->duty_cycle > rzg2l_gpt->max_val)
> +		duty_cycles = rzg2l_gpt->max_val;
> +	else
> +		duty_cycles = state->duty_cycle;
> +
> +	duty_cycles = mul_u64_u32_div(duty_cycles, rzg2l_gpt->rate,
> NSEC_PER_SEC);
> +	dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale);
> +
> +	/*
> +	 * GPT counter is shared by multiple channels, we cache the period
> value
> +	 * from the first enabled channel and use the same value for both
> +	 * channels.
> +	 */
> +	rzg2l_gpt->state_period[ch] = state->period;
> +
> +	/*
> +	 * Counter must be stopped before modifying mode, prescaler, timer
> +	 * counter and buffer enable registers. These registers are shared
> +	 * between both channels. So allow updating these registers only for
> the
> +	 * first enabled channel.
> +	 */
> +	if (rzg2l_gpt->enable_count[ch] <= 1)
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_CST,
> 0);
> +
> +	/* GPT set operating mode (saw-wave up-counting) */
> +	rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_MD,
> +			 RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE);
> +
> +	/* Set count direction */
> +	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTUDDTYC,
> RZG2L_UP_COUNTING);
> +	/* Select count clock */
> +	rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_TPCS,
> +			 FIELD_PREP(RZG2L_GTCR_TPCS, prescale));
> +
> +	/* Set period */
> +	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTPR, pv);
> +
> +	/* Set duty cycle */
> +	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTCCR(RZG2L_IS_IOB(pwm-
> >hwpwm)),
> +			dc);
> +
> +	/* Set initial value for counter */
> +	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTCNT, 0);
> +
> +	/* Set no buffer operation */
> +	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTBER, 0);
> +
> +	/* Restart the counter after updating the registers */
> +	if (rzg2l_gpt->enable_count[ch] <= 1)
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR,
> +				 RZG2L_GTCR_CST, RZG2L_GTCR_CST);
> +
> +	return 0;
> +}
> +
> +static int rzg2l_gpt_apply(struct pwm_chip *chip, struct pwm_device *pwm,
> +			   const struct pwm_state *state)
> +{
> +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> +	bool enabled = pwm->state.enabled;
> +	int ret;
> +
> +	if (state->polarity != PWM_POLARITY_NORMAL)
> +		return -EINVAL;
> +
> +	if (!state->enabled) {
> +		if (enabled) {
> +			rzg2l_gpt_disable(rzg2l_gpt, pwm);
> +			pm_runtime_put_sync(rzg2l_gpt->chip.dev);
> +		}
> +
> +		return 0;
> +	}
> +
> +	if (!enabled) {
> +		ret = pm_runtime_resume_and_get(rzg2l_gpt->chip.dev);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	mutex_lock(&rzg2l_gpt->lock);
> +	ret = rzg2l_gpt_config(chip, pwm, state);
> +	mutex_unlock(&rzg2l_gpt->lock);
> +	if (ret)
> +		return ret;
> +
> +	if (!enabled)
> +		ret = rzg2l_gpt_enable(rzg2l_gpt, pwm);
> +
> +	return ret;
> +}
> +
> +static const struct pwm_ops rzg2l_gpt_ops = {
> +	.request = rzg2l_gpt_request,
> +	.free = rzg2l_gpt_free,
> +	.get_state = rzg2l_gpt_get_state,
> +	.apply = rzg2l_gpt_apply,
> +};
> +
> +static int rzg2l_gpt_pm_runtime_suspend(struct device *dev) {
> +	struct rzg2l_gpt_chip *rzg2l_gpt = dev_get_drvdata(dev);
> +
> +	clk_disable_unprepare(rzg2l_gpt->clk);
> +
> +	return 0;
> +}
> +
> +static int rzg2l_gpt_pm_runtime_resume(struct device *dev) {
> +	struct rzg2l_gpt_chip *rzg2l_gpt = dev_get_drvdata(dev);
> +
> +	return clk_prepare_enable(rzg2l_gpt->clk);
> +}
> +
> +static DEFINE_RUNTIME_DEV_PM_OPS(rzg2l_gpt_pm_ops,
> +				 rzg2l_gpt_pm_runtime_suspend,
> +				 rzg2l_gpt_pm_runtime_resume, NULL);
> +
> +static void rzg2l_gpt_reset_assert_pm_disable(void *data) {
> +	struct rzg2l_gpt_chip *rzg2l_gpt = data;
> +	u32 i;
> +
> +	clk_rate_exclusive_put(rzg2l_gpt->clk);
> +	/*
> +	 * The below check is for making balanced PM usage count
> +	 * eg: boot loader is turning on PWM and probe increments the PM
> usage
> +	 * count. Before apply, if there is unbind/remove callback we need
> to
> +	 * decrement the PM usage count.
> +	 */
> +	for (i = 0; i < RZG2L_MAX_PWM_CHANNELS; i++) {
> +		if (test_bit(i, rzg2l_gpt->ch_en_bits))
> +			pm_runtime_put(rzg2l_gpt->chip.dev);
> +	}
> +
> +	pm_runtime_disable(rzg2l_gpt->chip.dev);
> +	pm_runtime_set_suspended(rzg2l_gpt->chip.dev);
> +	reset_control_assert(rzg2l_gpt->rstc);
> +}
> +
> +static int rzg2l_gpt_probe(struct platform_device *pdev) {
> +	struct rzg2l_gpt_chip *rzg2l_gpt;
> +	int ret;
> +	u32 i;
> +
> +	rzg2l_gpt = devm_kzalloc(&pdev->dev, sizeof(*rzg2l_gpt),
> GFP_KERNEL);
> +	if (!rzg2l_gpt)
> +		return -ENOMEM;
> +
> +	rzg2l_gpt->mmio = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(rzg2l_gpt->mmio))
> +		return PTR_ERR(rzg2l_gpt->mmio);
> +
> +	rzg2l_gpt->rstc = devm_reset_control_get_exclusive(&pdev->dev,
> NULL);
> +	if (IS_ERR(rzg2l_gpt->rstc))
> +		return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->rstc),
> +				     "get reset failed\n");
> +
> +	rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL);
> +	if (IS_ERR(rzg2l_gpt->clk))
> +		return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk),
> +				     "cannot get clock\n");
> +
> +	ret = reset_control_deassert(rzg2l_gpt->rstc);
> +	if (ret)
> +		return dev_err_probe(&pdev->dev, ret,
> +				     "cannot deassert reset control\n");
> +
> +	ret = clk_prepare_enable(rzg2l_gpt->clk);
> +	if (ret)
> +		goto err_reset;
> +
> +	ret = clk_rate_exclusive_get(rzg2l_gpt->clk);
> +	if (ret)
> +		goto err_clk_disable;
> +
> +	rzg2l_gpt->rate = clk_get_rate(rzg2l_gpt->clk);
> +	if (!rzg2l_gpt->rate) {
> +		ret = dev_err_probe(&pdev->dev, -EINVAL, "gpt clk rate is 0");
> +		goto err_clk_rate_put;
> +	}
> +
> +	/*
> +	 * Refuse clk rates > 1 GHz to prevent overflow later for computing
> +	 * period and duty cycle.
> +	 */
> +	if (rzg2l_gpt->rate > NSEC_PER_SEC) {
> +		ret = -EINVAL;
> +		goto err_clk_rate_put;
> +	}
> +
> +	rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX, NSEC_PER_SEC,
> +						 rzg2l_gpt->rate) *
> RZG2L_MAX_SCALE_FACTOR;
> +
> +	/*
> +	 *  We need to keep the clock on, in case the bootloader has enabled
> the
> +	 *  PWM and is running during probe().
> +	 */
> +	for (i = 0; i < RZG2L_MAX_PWM_CHANNELS; i++) {
> +		if (rzg2l_gpt_is_ch_enabled(rzg2l_gpt, i)) {
> +			set_bit(i, rzg2l_gpt->ch_en_bits);
> +			pm_runtime_get_sync(&pdev->dev);
> +		}
> +	}
> +
> +	mutex_init(&rzg2l_gpt->lock);
> +	platform_set_drvdata(pdev, rzg2l_gpt);
> +	rzg2l_gpt->chip.dev = &pdev->dev;
> +
> +	pm_runtime_set_active(&pdev->dev);
> +	pm_runtime_enable(&pdev->dev);
> +	ret = devm_add_action_or_reset(&pdev->dev,
> +				       rzg2l_gpt_reset_assert_pm_disable,
> +				       rzg2l_gpt);
> +	if (ret < 0)
> +		return ret;
> +
> +	rzg2l_gpt->chip.ops = &rzg2l_gpt_ops;
> +	rzg2l_gpt->chip.npwm = RZG2L_MAX_PWM_CHANNELS;
> +	ret = devm_pwmchip_add(&pdev->dev, &rzg2l_gpt->chip);
> +	if (ret)
> +		return dev_err_probe(&pdev->dev, ret, "failed to add PWM
> chip\n");
> +
> +	pm_runtime_idle(&pdev->dev);
> +
> +	return 0;
> +
> +err_clk_rate_put:
> +	clk_rate_exclusive_put(rzg2l_gpt->clk);
> +err_clk_disable:
> +	clk_disable_unprepare(rzg2l_gpt->clk);
> +err_reset:
> +	reset_control_assert(rzg2l_gpt->rstc);
> +	return ret;
> +}
> +
> +static const struct of_device_id rzg2l_gpt_of_table[] = {
> +	{ .compatible = "renesas,rzg2l-gpt", },
> +	{ /* Sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(of, rzg2l_gpt_of_table);
> +
> +static struct platform_driver rzg2l_gpt_driver = {
> +	.driver = {
> +		.name = "pwm-rzg2l-gpt",
> +		.pm = pm_ptr(&rzg2l_gpt_pm_ops),
> +		.of_match_table = rzg2l_gpt_of_table,
> +	},
> +	.probe = rzg2l_gpt_probe,
> +};
> +module_platform_driver(rzg2l_gpt_driver);
> +
> +MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
> +MODULE_DESCRIPTION("Renesas RZ/G2L General PWM Timer (GPT) Driver");
> +MODULE_LICENSE("GPL");
> --
> 2.25.1
Uwe Kleine-König Dec. 6, 2023, 6:38 p.m. UTC | #2
Hello Biju,

On Mon, Nov 20, 2023 at 11:33:06AM +0000, Biju Das wrote:
> diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c
> new file mode 100644
> index 000000000000..428e6e577db6
> --- /dev/null
> +++ b/drivers/pwm/pwm-rzg2l-gpt.c
> @@ -0,0 +1,572 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Renesas RZ/G2L General PWM Timer (GPT) driver
> + *
> + * Copyright (C) 2023 Renesas Electronics Corporation
> + *
> + * Hardware manual for this IP can be found here
> + * https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-users-manual-hardware-0?language=en
> + *
> + * Limitations:
> + * - Counter must be stopped before modifying Mode and Prescaler.
> + * - When PWM is disabled, the output is driven to inactive.
> + * - While the hardware supports both polarities, the driver (for now)
> + *   only handles normal polarity.
> + * - When both channels are used, disabling the channel on one stops the
> + *   other.

Just for my understanding: The combination of the first and the last
item means you cannot update Moed and Prescaler for one channel without
affecting the other one, right?

> + */
> +
> +#include <linux/bitfield.h>
> +#include <linux/clk.h>
> +#include <linux/io.h>
> +#include <linux/limits.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/pwm.h>
> +#include <linux/reset.h>
> +#include <linux/time.h>
> +
> +#define RZG2L_GTCR		0x2c
> +#define RZG2L_GTUDDTYC		0x30
> +#define RZG2L_GTIOR		0x34
> +#define RZG2L_GTBER		0x40
> +#define RZG2L_GTCNT		0x48
> +#define RZG2L_GTCCRA		0x4c
> +#define RZG2L_GTCCRB		0x50
> +#define RZG2L_GTPR		0x64
> +
> +#define RZG2L_GTCR_CST		BIT(0)
> +#define RZG2L_GTCR_MD		GENMASK(18, 16)
> +#define RZG2L_GTCR_TPCS		GENMASK(26, 24)
> +
> +#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE	FIELD_PREP(RZG2L_GTCR_MD, 0)
> +
> +#define RZG2L_GTUDDTYC_UP	BIT(0)
> +#define RZG2L_GTUDDTYC_UDF	BIT(1)
> +#define RZG2L_UP_COUNTING	(RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF)
> +
> +#define RZG2L_GTIOR_GTIOA	GENMASK(4, 0)
> +#define RZG2L_GTIOR_GTIOB	GENMASK(20, 16)
> +#define RZG2L_GTIOR_OAE		BIT(8)
> +#define RZG2L_GTIOR_OBE		BIT(24)
> +
> +#define RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE	0x07
> +#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE	0x1b
> +
> +#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \
> +	(RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE)
> +#define RZG2L_GTIOR_GTIOA_OUT_LO_END_TOGGLE_CMP_MATCH \
> +	(RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE | RZG2L_GTIOR_OAE)
> +#define RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \
> +	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE) | RZG2L_GTIOR_OBE)
> +#define RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH \
> +	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE) | RZG2L_GTIOR_OBE)

These are not all used. Is it sensible to still keep them?

> +#define RZG2L_GTCCR(i) (0x4c + 4 * (i))

For i = 0 this is RZG2L_GTCCRA, for i = 1 it's RZG2L_GTCCRB. Als
RZG2L_GTCCRA and RZG2L_GTCCRB are unused. Maybe move the definition of
RZG2L_GTCCR to the list of registers above?

> +#define RZG2L_MAX_HW_CHANNELS	8
> +#define RZG2L_CHANNELS_PER_IO	2
> +#define RZG2L_MAX_PWM_CHANNELS	(RZG2L_MAX_HW_CHANNELS * RZG2L_CHANNELS_PER_IO)
> +#define RZG2L_MAX_SCALE_FACTOR	1024
> +
> +#define RZG2L_IS_IOB(a)	((a) & 0x1)
> +#define RZG2L_GET_CH(a)	((a) / 2)
> +
> +#define RZG2L_GET_CH_OFFS(i) (0x100 * (i))
> +
> +struct rzg2l_gpt_chip {
> +	struct pwm_chip chip;
> +	void __iomem *mmio;
> +	struct reset_control *rstc;
> +	struct clk *clk;
> +	struct mutex lock; /* lock to protect shared channel resources */
> +	unsigned long rate;
> +	u64 max_val;
> +	u32 state_period[RZG2L_MAX_HW_CHANNELS];
> +	u32 user_count[RZG2L_MAX_HW_CHANNELS];
> +	u32 enable_count[RZG2L_MAX_HW_CHANNELS];
> +	DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS);
> +};
> +
> [...]
> +
> +static bool rzg2l_gpt_is_ch_enabled(struct rzg2l_gpt_chip *rzg2l_gpt, u8 hwpwm)
> +{
> +	u8 ch = RZG2L_GET_CH(hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +	bool is_counter_running, is_output_en;
> +	u32 val;
> +
> +	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR);
> +	is_counter_running = val & RZG2L_GTCR_CST;
> +
> +	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTIOR);
> +	if (RZG2L_IS_IOB(hwpwm))
> +		is_output_en = val & RZG2L_GTIOR_OBE;
> +	else
> +		is_output_en = val & RZG2L_GTIOR_OAE;

IIUC the i in RZG2L_GTCCR(i) stands for either A (0) or B (1), right?
Maybe define RZG2L_GTIOR_OxE(i) assuming this is about the same A and B
here? Then this could be simplified to:

	is_output_en = val & RZG2L_GTIOR_OxE(rzg2l_gpt_subchannel(hwpwm));

(maybe modulo better names).

> +	return (is_counter_running && is_output_en);

You could return early after knowing that is_counter_running is false
without determining is_output_en.

> +}
> +
> +static int rzg2l_gpt_enable(struct rzg2l_gpt_chip *rzg2l_gpt,
> +			    struct pwm_device *pwm)
> +{
> +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +
> +	/* Enable pin output */
> +	if (RZG2L_IS_IOB(pwm->hwpwm))
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> +				 RZG2L_GTIOR_GTIOB | RZG2L_GTIOR_OBE,
> +				 RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH);
> +	else
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> +				 RZG2L_GTIOR_GTIOA | RZG2L_GTIOR_OAE,
> +				 RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH);

Similar suggestion as above for A and B?!

> +	mutex_lock(&rzg2l_gpt->lock);
> +	if (!rzg2l_gpt->enable_count[ch])
> +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, 0, RZG2L_GTCR_CST);
> +
> +	rzg2l_gpt->enable_count[ch]++;
> +	mutex_unlock(&rzg2l_gpt->lock);
> +
> +	return 0;
> +}
> +
> [...]
> +
> +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt, u32 val, u8 prescale)
> +{
> +	u64 retval;
> +	u64 tmp;
> +
> +	tmp = NSEC_PER_SEC * (u64)val;
> +	/*
> +	 * To avoid losing precision for smaller period/duty cycle values
> +	 * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded values.
> +	 */
> +	if (prescale < 2)
> +		retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale), rzg2l_gpt->rate);
> +	else
> +		retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2 * prescale);

Maybe introduce a mul_u64_u64_div64_roundup (similar to
mul_u64_u64_div64) to also be exact for prescale >= 2?

With prescale <= 5 and rzg2l_gpt->rate >= 1024 this shouldn't work just
fine.

> +	return retval;
> +}
> +
> [...]
> +static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8 prescale)
> +{
> +	return min_t(u64, DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 * prescale)),
> +		     (u64)U32_MAX);

DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 * prescale)) can be
calculated a bit cheaper by using:

	(period_or_duty_cycle + 1 << (2 * prescale) - 1) >> (2 * prescale)

assuming the LHS doesn't overflow.

When using min_t, you can drop the cast for U32_MAX.

> +}
> +
> +/* Caller holds the lock while calling rzg2l_gpt_config() */
> +static int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm,
> +			    const struct pwm_state *state)
> +{
> +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> +	unsigned long pv, dc;
> +	u64 period_cycles;
> +	u64 duty_cycles;
> +	u8 prescale;
> +
> +	/*
> +	 * GPT counter is shared by multiple channels, so prescale and period
> +	 * can NOT be modified when there are multiple channels in use with
> +	 * different settings.
> +	 */
> +	if (state->period != rzg2l_gpt->state_period[ch] && rzg2l_gpt->user_count[ch] > 1)
> +		return -EBUSY;

if (state->period < rzg2l_gpt->state_period[ch] && rzg2l_gpt->user_count[ch] > 1)

would be more forgiving and still correct.

> +	/* Limit period/duty cycle to max value supported by the HW */
> +	if (state->period > rzg2l_gpt->max_val)
> +		period_cycles = rzg2l_gpt->max_val;
> +	else
> +		period_cycles = state->period;
> +
> +	period_cycles = mul_u64_u32_div(period_cycles, rzg2l_gpt->rate, NSEC_PER_SEC);
> +	prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles);
> +
> +	pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale);
> +
> +	if (state->duty_cycle > rzg2l_gpt->max_val)
> +		duty_cycles = rzg2l_gpt->max_val;
> +	else
> +		duty_cycles = state->duty_cycle;
> +
> +	duty_cycles = mul_u64_u32_div(duty_cycles, rzg2l_gpt->rate, NSEC_PER_SEC);
> +	dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale);
> +
> +	/*
> +	 * GPT counter is shared by multiple channels, we cache the period value
> +	 * from the first enabled channel and use the same value for both
> +	 * channels.
> +	 */
> +	rzg2l_gpt->state_period[ch] = state->period;

With the rounding that happens above, I think it would be more
approriate to track period_cycles here instead of period.

> [...]
> +	rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL);
> +	if (IS_ERR(rzg2l_gpt->clk))
> +		return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk),
> +				     "cannot get clock\n");
> +
> +	ret = reset_control_deassert(rzg2l_gpt->rstc);
> +	if (ret)
> +		return dev_err_probe(&pdev->dev, ret,
> +				     "cannot deassert reset control\n");
> +
> +	ret = clk_prepare_enable(rzg2l_gpt->clk);
> +	if (ret)
> +		goto err_reset;

devm_clk_get_enabled()?

> [...]
> +	/*
> +	 * Refuse clk rates > 1 GHz to prevent overflow later for computing
> +	 * period and duty cycle.
> +	 */
> +	if (rzg2l_gpt->rate > NSEC_PER_SEC) {
> +		ret = -EINVAL;
> +		goto err_clk_rate_put;
> +	}
> +
> +	rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX, NSEC_PER_SEC,
> +						 rzg2l_gpt->rate) * RZG2L_MAX_SCALE_FACTOR;

Is it clear that this won't overflow?

> +	/*
> +	 *  We need to keep the clock on, in case the bootloader has enabled the
> +	 *  PWM and is running during probe().
> +	 */

Best regards
Uwe
Biju Das Dec. 7, 2023, 6:26 p.m. UTC | #3
Hi Uwe,

Thanks for the feedback.

> -----Original Message-----
> From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> Sent: Wednesday, December 6, 2023 6:38 PM
> Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> Hello Biju,
> 
> On Mon, Nov 20, 2023 at 11:33:06AM +0000, Biju Das wrote:
> > diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c
> > new file mode 100644 index 000000000000..428e6e577db6
> > --- /dev/null
> > +++ b/drivers/pwm/pwm-rzg2l-gpt.c
> > @@ -0,0 +1,572 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Renesas RZ/G2L General PWM Timer (GPT) driver
> > + *
> > + * Copyright (C) 2023 Renesas Electronics Corporation
> > + *
> > + * Hardware manual for this IP can be found here
> > + *
> > +https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-u
> > +sers-manual-hardware-0?language=en
> > + *
> > + * Limitations:
> > + * - Counter must be stopped before modifying Mode and Prescaler.
> > + * - When PWM is disabled, the output is driven to inactive.
> > + * - While the hardware supports both polarities, the driver (for now)
> > + *   only handles normal polarity.
> > + * - When both channels are used, disabling the channel on one stops
> the
> > + *   other.
> 
> Just for my understanding: The combination of the first and the last item
> means you cannot update Moed and Prescaler for one channel without
> affecting the other one, right?

Yes that is correct.

> 
> > + */
> > +
> > +#include <linux/bitfield.h>
> > +#include <linux/clk.h>
> > +#include <linux/io.h>
> > +#include <linux/limits.h>
> > +#include <linux/module.h>
> > +#include <linux/of.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/pm_runtime.h>
> > +#include <linux/pwm.h>
> > +#include <linux/reset.h>
> > +#include <linux/time.h>
> > +
> > +#define RZG2L_GTCR		0x2c
> > +#define RZG2L_GTUDDTYC		0x30
> > +#define RZG2L_GTIOR		0x34
> > +#define RZG2L_GTBER		0x40
> > +#define RZG2L_GTCNT		0x48
> > +#define RZG2L_GTCCRA		0x4c
> > +#define RZG2L_GTCCRB		0x50
> > +#define RZG2L_GTPR		0x64
> > +
> > +#define RZG2L_GTCR_CST		BIT(0)
> > +#define RZG2L_GTCR_MD		GENMASK(18, 16)
> > +#define RZG2L_GTCR_TPCS		GENMASK(26, 24)
> > +
> > +#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE	FIELD_PREP(RZG2L_GTCR_MD,
> 0)
> > +
> > +#define RZG2L_GTUDDTYC_UP	BIT(0)
> > +#define RZG2L_GTUDDTYC_UDF	BIT(1)
> > +#define RZG2L_UP_COUNTING	(RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF)
> > +
> > +#define RZG2L_GTIOR_GTIOA	GENMASK(4, 0)
> > +#define RZG2L_GTIOR_GTIOB	GENMASK(20, 16)
> > +#define RZG2L_GTIOR_OAE		BIT(8)
> > +#define RZG2L_GTIOR_OBE		BIT(24)
> > +
> > +#define RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE	0x07
> > +#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE	0x1b
> > +
> > +#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \
> > +	(RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE) #define
> > +RZG2L_GTIOR_GTIOA_OUT_LO_END_TOGGLE_CMP_MATCH \
> > +	(RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE | RZG2L_GTIOR_OAE) #define
> > +RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \
> > +	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE)
> > +| RZG2L_GTIOR_OBE) #define
> RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH \
> > +	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE)
> > +| RZG2L_GTIOR_OBE)
> 
> These are not all used. Is it sensible to still keep them?

I will remove unused ones.

> 
> > +#define RZG2L_GTCCR(i) (0x4c + 4 * (i))
> 
> For i = 0 this is RZG2L_GTCCRA, for i = 1 it's RZG2L_GTCCRB. Als
> RZG2L_GTCCRA and RZG2L_GTCCRB are unused. Maybe move the definition of
> RZG2L_GTCCR to the list of registers above?

Agreed.

> 
> > +#define RZG2L_MAX_HW_CHANNELS	8
> > +#define RZG2L_CHANNELS_PER_IO	2
> > +#define RZG2L_MAX_PWM_CHANNELS	(RZG2L_MAX_HW_CHANNELS *
> RZG2L_CHANNELS_PER_IO)
> > +#define RZG2L_MAX_SCALE_FACTOR	1024
> > +
> > +#define RZG2L_IS_IOB(a)	((a) & 0x1)
> > +#define RZG2L_GET_CH(a)	((a) / 2)
> > +
> > +#define RZG2L_GET_CH_OFFS(i) (0x100 * (i))
> > +
> > +struct rzg2l_gpt_chip {
> > +	struct pwm_chip chip;
> > +	void __iomem *mmio;
> > +	struct reset_control *rstc;
> > +	struct clk *clk;
> > +	struct mutex lock; /* lock to protect shared channel resources */
> > +	unsigned long rate;
> > +	u64 max_val;
> > +	u32 state_period[RZG2L_MAX_HW_CHANNELS];
> > +	u32 user_count[RZG2L_MAX_HW_CHANNELS];
> > +	u32 enable_count[RZG2L_MAX_HW_CHANNELS];
> > +	DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS); };
> > +
> > [...]
> > +
> > +static bool rzg2l_gpt_is_ch_enabled(struct rzg2l_gpt_chip *rzg2l_gpt,
> > +u8 hwpwm) {
> > +	u8 ch = RZG2L_GET_CH(hwpwm);
> > +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> > +	bool is_counter_running, is_output_en;
> > +	u32 val;
> > +
> > +	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR);
> > +	is_counter_running = val & RZG2L_GTCR_CST;
> > +
> > +	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTIOR);
> > +	if (RZG2L_IS_IOB(hwpwm))
> > +		is_output_en = val & RZG2L_GTIOR_OBE;
> > +	else
> > +		is_output_en = val & RZG2L_GTIOR_OAE;
> 
> IIUC the i in RZG2L_GTCCR(i) stands for either A (0) or B (1), right?
> Maybe define RZG2L_GTIOR_OxE(i) assuming this is about the same A and B
> here? Then this could be simplified to:
> 
> 	is_output_en = val & RZG2L_GTIOR_OxE(rzg2l_gpt_subchannel(hwpwm));
> 
> (maybe modulo better names).

OK.

> 
> > +	return (is_counter_running && is_output_en);
> 
> You could return early after knowing that is_counter_running is false
> without determining is_output_en.

Agreed.

> 
> > +}
> > +
> > +static int rzg2l_gpt_enable(struct rzg2l_gpt_chip *rzg2l_gpt,
> > +			    struct pwm_device *pwm)
> > +{
> > +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> > +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> > +
> > +	/* Enable pin output */
> > +	if (RZG2L_IS_IOB(pwm->hwpwm))
> > +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> > +				 RZG2L_GTIOR_GTIOB | RZG2L_GTIOR_OBE,
> > +				 RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH);
> > +	else
> > +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
> > +				 RZG2L_GTIOR_GTIOA | RZG2L_GTIOR_OAE,
> > +				 RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH);
> 
> Similar suggestion as above for A and B?!

OK.

> 
> > +	mutex_lock(&rzg2l_gpt->lock);
> > +	if (!rzg2l_gpt->enable_count[ch])
> > +		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, 0,
> RZG2L_GTCR_CST);
> > +
> > +	rzg2l_gpt->enable_count[ch]++;
> > +	mutex_unlock(&rzg2l_gpt->lock);
> > +
> > +	return 0;
> > +}
> > +
> > [...]
> > +
> > +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt,
> > +u32 val, u8 prescale) {
> > +	u64 retval;
> > +	u64 tmp;
> > +
> > +	tmp = NSEC_PER_SEC * (u64)val;
> > +	/*
> > +	 * To avoid losing precision for smaller period/duty cycle values
> > +	 * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded values.
> > +	 */
> > +	if (prescale < 2)
> > +		retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale), rzg2l_gpt-
> >rate);
> > +	else
> > +		retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2 *
> > +prescale);
> 
> Maybe introduce a mul_u64_u64_div64_roundup (similar to
> mul_u64_u64_div64) to also be exact for prescale >= 2?

mul_u64_u64_div_u64()has a bug already and we are losing precision with very high values.
See the output with CONFIG_PWM_DEBUG enabled[1]. So we cannot reuse mul_u64_u64_div64()
for roundup operation.

Maybe we need to come with 128bit operations for mul_u64_u64_div64_roundup().
Do you have any specific algorithm in mind for doing mul_u64_u64_div64_roundup()?

Fabrizio already developed an algorithm for 128 bit operation, I could reuse once it
hits the mainline.

[1]
######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0 5500000000000/43980239923200)
	 High setting##
	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0 23980465100800/43980239923200)


> 
> With prescale <= 5 and rzg2l_gpt->rate >= 1024 this shouldn't work just
> fine.

Practically this is not possible. Yes, from theoretical point, rate=1kHz 
compared to the practical case, 100MHz won't work.

For the practical case, the current logic is fine. If we need to achieve
theoretical example like you mentioned then we need to have 
mul_u64_u64_div64_roundup().

> 
> > +	return retval;
> > +}
> > +
> > [...]
> > +static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8
> > +prescale) {
> > +	return min_t(u64, DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 *
> prescale)),
> > +		     (u64)U32_MAX);
> 
> DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 * prescale)) can be
> calculated a bit cheaper by using:
> 
> 	(period_or_duty_cycle + 1 << (2 * prescale) - 1) >> (2 * prescale)
> 
> assuming the LHS doesn't overflow.

Need to check this.

> 
> When using min_t, you can drop the cast for U32_MAX.

OK.

> 
> > +}
> > +
> > +/* Caller holds the lock while calling rzg2l_gpt_config() */ static
> > +int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm,
> > +			    const struct pwm_state *state) {
> > +	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
> > +	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
> > +	u32 offs = RZG2L_GET_CH_OFFS(ch);
> > +	unsigned long pv, dc;
> > +	u64 period_cycles;
> > +	u64 duty_cycles;
> > +	u8 prescale;
> > +
> > +	/*
> > +	 * GPT counter is shared by multiple channels, so prescale and
> period
> > +	 * can NOT be modified when there are multiple channels in use with
> > +	 * different settings.
> > +	 */
> > +	if (state->period != rzg2l_gpt->state_period[ch] && rzg2l_gpt-
> >user_count[ch] > 1)
> > +		return -EBUSY;
> 
> if (state->period < rzg2l_gpt->state_period[ch] && rzg2l_gpt-
> >user_count[ch] > 1)
> 
> would be more forgiving and still correct.

OK.

> 
> > +	/* Limit period/duty cycle to max value supported by the HW */
> > +	if (state->period > rzg2l_gpt->max_val)
> > +		period_cycles = rzg2l_gpt->max_val;
> > +	else
> > +		period_cycles = state->period;
> > +
> > +	period_cycles = mul_u64_u32_div(period_cycles, rzg2l_gpt->rate,
> NSEC_PER_SEC);
> > +	prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles);
> > +
> > +	pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale);
> > +
> > +	if (state->duty_cycle > rzg2l_gpt->max_val)
> > +		duty_cycles = rzg2l_gpt->max_val;
> > +	else
> > +		duty_cycles = state->duty_cycle;
> > +
> > +	duty_cycles = mul_u64_u32_div(duty_cycles, rzg2l_gpt->rate,
> NSEC_PER_SEC);
> > +	dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale);
> > +
> > +	/*
> > +	 * GPT counter is shared by multiple channels, we cache the period
> value
> > +	 * from the first enabled channel and use the same value for both
> > +	 * channels.
> > +	 */
> > +	rzg2l_gpt->state_period[ch] = state->period;
> 
> With the rounding that happens above, I think it would be more approriate
> to track period_cycles here instead of period.

OK.

> 
> > [...]
> > +	rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL);
> > +	if (IS_ERR(rzg2l_gpt->clk))
> > +		return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk),
> > +				     "cannot get clock\n");
> > +
> > +	ret = reset_control_deassert(rzg2l_gpt->rstc);
> > +	if (ret)
> > +		return dev_err_probe(&pdev->dev, ret,
> > +				     "cannot deassert reset control\n");
> > +
> > +	ret = clk_prepare_enable(rzg2l_gpt->clk);
> > +	if (ret)
> > +		goto err_reset;
> 
> devm_clk_get_enabled()?

OK.

> 
> > [...]
> > +	/*
> > +	 * Refuse clk rates > 1 GHz to prevent overflow later for computing
> > +	 * period and duty cycle.
> > +	 */
> > +	if (rzg2l_gpt->rate > NSEC_PER_SEC) {
> > +		ret = -EINVAL;
> > +		goto err_clk_rate_put;
> > +	}
> > +
> > +	rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX, NSEC_PER_SEC,
> > +						 rzg2l_gpt->rate) *
> RZG2L_MAX_SCALE_FACTOR;
> 
> Is it clear that this won't overflow?

It won't over flow as [2] < [3]

(2^32 * 10 ^9 % 100 * 10^6) * 1024.

2^32 * 10 ^9 = 4,294,967,296,000,000,000 --[2]
2^64         = 18,446,744,073,709,551,616--[3]

Cheers,
Biju

> 
> > +	/*
> > +	 *  We need to keep the clock on, in case the bootloader has enabled
> the
> > +	 *  PWM and is running during probe().
> > +	 */
>
Uwe Kleine-König Dec. 7, 2023, 9:41 p.m. UTC | #4
Hello Biju,

On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > -----Original Message-----
> > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > Sent: Wednesday, December 6, 2023 6:38 PM
> > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > On Mon, Nov 20, 2023 at 11:33:06AM +0000, Biju Das wrote:
> > > +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt,
> > > +u32 val, u8 prescale) {
> > > +	u64 retval;
> > > +	u64 tmp;
> > > +
> > > +	tmp = NSEC_PER_SEC * (u64)val;
> > > +	/*
> > > +	 * To avoid losing precision for smaller period/duty cycle values
> > > +	 * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded values.
> > > +	 */
> > > +	if (prescale < 2)
> > > +		retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale), rzg2l_gpt-
> > >rate);
> > > +	else
> > > +		retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2 *
> > > +prescale);
> > 
> > Maybe introduce a mul_u64_u64_div64_roundup (similar to
> > mul_u64_u64_div64) to also be exact for prescale >= 2?
> 
> mul_u64_u64_div_u64()has a bug already and we are losing precision with very high values.
> See the output with CONFIG_PWM_DEBUG enabled[1]. So we cannot reuse mul_u64_u64_div64()
> for roundup operation.
> 
> Maybe we need to come with 128bit operations for mul_u64_u64_div64_roundup().
> Do you have any specific algorithm in mind for doing mul_u64_u64_div64_roundup()?
> 
> Fabrizio already developed an algorithm for 128 bit operation, I could reuse once it
> hits the mainline.
> 
> [1]
> ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0 5500000000000/43980239923200)
> 	 High setting##
> 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0 23980465100800/43980239923200)

Have you tried to understand that? What is the clk rate when this
happens? You're not suggesting that mul_u64_u64_div_u64 is wrong, are
you?

> > With prescale <= 5 and rzg2l_gpt->rate >= 1024 this shouldn't work just
> > fine.

I meant here "this should work just fine", I guess you understood that
right.

> Practically this is not possible. Yes, from theoretical point, rate=1kHz 
> compared to the practical case, 100MHz won't work.
> 
> For the practical case, the current logic is fine. If we need to achieve
> theoretical example like you mentioned then we need to have 
> mul_u64_u64_div64_roundup().

That shouldn't be so hard to implement.

> > > +	rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX, NSEC_PER_SEC,
> > > +						 rzg2l_gpt->rate) * RZG2L_MAX_SCALE_FACTOR;
> > 
> > Is it clear that this won't overflow?

U32_MAX * NSEC_PER_SEC doesn't even overflow an u64, so using a plain
u64 division would be more efficient.

You'd get a smaller rounding error with:

	rzg2l_gpt->max_val = mul_u64_u64_div_u64((u64)U32_MAX * NSEC_PER_SEC, RZG2L_MAX_SCALE_FACTOR, rzg2l_gpt->rate);

Best regards
Uwe
Biju Das Dec. 8, 2023, 10:34 a.m. UTC | #5
Hi Uwe Kleine-König,

Thanks for the feedback.

> -----Original Message-----
> From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> Sent: Thursday, December 7, 2023 9:42 PM
> Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> Hello Biju,
> 
> On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > -----Original Message-----
> > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > Sent: Wednesday, December 6, 2023 6:38 PM
> > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT On Mon,
> > > Nov 20, 2023 at 11:33:06AM +0000, Biju Das wrote:
> > > > +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip
> > > > +*rzg2l_gpt,
> > > > +u32 val, u8 prescale) {
> > > > +	u64 retval;
> > > > +	u64 tmp;
> > > > +
> > > > +	tmp = NSEC_PER_SEC * (u64)val;
> > > > +	/*
> > > > +	 * To avoid losing precision for smaller period/duty cycle
> values
> > > > +	 * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded
> values.
> > > > +	 */
> > > > +	if (prescale < 2)
> > > > +		retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale),
> rzg2l_gpt-
> > > >rate);
> > > > +	else
> > > > +		retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2
> *
> > > > +prescale);
> > >
> > > Maybe introduce a mul_u64_u64_div64_roundup (similar to
> > > mul_u64_u64_div64) to also be exact for prescale >= 2?
> >
> > mul_u64_u64_div_u64()has a bug already and we are losing precision with
> very high values.
> > See the output with CONFIG_PWM_DEBUG enabled[1]. So we cannot reuse
> > mul_u64_u64_div64() for roundup operation.
> >
> > Maybe we need to come with 128bit operations for
> mul_u64_u64_div64_roundup().
> > Do you have any specific algorithm in mind for doing
> mul_u64_u64_div64_roundup()?
> >
> > Fabrizio already developed an algorithm for 128 bit operation, I could
> > reuse once it hits the mainline.
> >
> > [1]
> > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0
> 5500000000000/43980239923200)
> > 	 High setting##
> > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent
> > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> > 23980465100800/43980239923200)
> 
> Have you tried to understand that? What is the clk rate when this happens?
> You're not suggesting that mul_u64_u64_div_u64 is wrong, are you?

mul_u64_u64_div_u64() works for certain values. But for very high values we are losing precision
and is giving unexpected values.

See
[1] test program
[2] with mul_u64_u32_div()
[3] with mul_u64_u64_div_u64


[1]
test_prescale_1024() {
	echo "prescale 1024"
	echo "###### Low setting##"
	echo ${IO_1} > /sys/class/pwm/$PWMCHIP/export
	echo 10995116288000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
	echo  6995116277760 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
	echo 1 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/enable
	dumpgptreg
	
	echo "###### Medium setting 11000 sec = 3hours ##"
	echo 11000000000000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
	echo  5500000000000 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
	dumpgptreg
	
	echo "###### High setting##"
	echo 43980465100800 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
	echo 23980465100800 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
	dumpgptreg
	echo 0 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle
	echo 10 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/period
	echo 0 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/enable
	echo ${IO_1} > /sys/class/pwm/$PWMCHIP/unexport
}

[2]
root@smarc-rzv2l:~# /pwm.sh
prescale 1024
###### Low setting##
Read at address  0x10048464 (0xffffa114c464): 0x40000001
Read at address  0x1004844C (0xffff8f06d44c): 0x28B78918
Read at address  0x1004842C (0xffffbf46e42c): 0x05000001
###### Medium setting 11000 sec = 3hours ##
Read at address  0x10048464 (0xffffb3491464): 0x400746FE
Read at address  0x1004844C (0xffffb0bbd44c): 0x2003A37F
Read at address  0x1004842C (0xffff86b2d42c): 0x05000001
###### High setting##
Read at address  0x10048464 (0xffffad78f464): 0xFFFFFFFF
Read at address  0x1004844C (0xffff8499344c): 0x8B95AD77
Read at address  0x1004842C (0xffffba33e42c): 0x05000001
[  502.409228] test end
root@smarc-rzv2l:~#

[3]
root@smarc-rzv2l:~# /pwm.sh
prescale 1024
###### Low setting##
Read at address  0x10048464 (0xffffb283e464): 0x40000001
Read at address  0x1004844C (0xffff8156044c): 0x28B78918
Read at address  0x1004842C (0xffffaf54242c): 0x05000001
###### Medium setting 11000 sec = 3hours ##
Read at address  0x10048464 (0xffff86d71464): 0x400746FE
Read at address  0x1004844C (0xffff8179244c): 0x2003A37F
Read at address  0x1004842C (0xffff8a58442c): 0x05000001
[ 1167.749392] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0 5500000000000/43980239923200)
###### High setting##
[ 1167.765592] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0 23980465100800/43980239923200)
Read at address  0x10048464 (0xffff987cc464): 0xFFFFAA19
Read at address  0x1004844C (0xffff8fedd44c): 0x8B95AD77
Read at address  0x1004842C (0xffffb124642c): 0x05000001
[ 1169.949153] test end
root@smarc-rzv2l:~#


> 
> > > With prescale <= 5 and rzg2l_gpt->rate >= 1024 this shouldn't work
> > > just fine.
> 
> I meant here "this should work just fine", I guess you understood that
> right.

Got it.

> 
> > Practically this is not possible. Yes, from theoretical point,
> > rate=1kHz compared to the practical case, 100MHz won't work.
> >
> > For the practical case, the current logic is fine. If we need to
> > achieve theoretical example like you mentioned then we need to have
> > mul_u64_u64_div64_roundup().
> 
> That shouldn't be so hard to implement.

You need to implement 128-bit operation to support mul_u64_u64_div64_roundup().
Otherwise, we will lose precision and get unexpected results.

For that you need to produce an algorithm and then implement the algorithm
and testing various 32/64/128 build systems?



> 
> > > > +	rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX,
> NSEC_PER_SEC,
> > > > +						 rzg2l_gpt->rate) *
> RZG2L_MAX_SCALE_FACTOR;
> > >
> > > Is it clear that this won't overflow?
> 
> U32_MAX * NSEC_PER_SEC doesn't even overflow an u64, so using a plain
> u64 division would be more efficient.

OK.

> 
> You'd get a smaller rounding error with:

> 
> 	rzg2l_gpt->max_val = mul_u64_u64_div_u64((u64)U32_MAX *
> NSEC_PER_SEC, RZG2L_MAX_SCALE_FACTOR, rzg2l_gpt->rate);
> 

I agree. With above logic, it will overflow and due to precision
adjustment there will be rounding error. 

2^32 * 10^9 * 1024 = 4,398,046,511,104,000,000,000 > 2^64.

Cheers,
Biju
Uwe Kleine-König Dec. 8, 2023, 2:07 p.m. UTC | #6
Hello Biju,

On Fri, Dec 08, 2023 at 10:34:55AM +0000, Biju Das wrote:
> > -----Original Message-----
> > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > Sent: Thursday, December 7, 2023 9:42 PM
> > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > 
> > Hello Biju,
> > 
> > On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0
> > > 5500000000000/43980239923200)
> > > 	 High setting##
> > > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent
> > > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> > > 23980465100800/43980239923200)
> > 
> > Have you tried to understand that? What is the clk rate when this happens?
> > You're not suggesting that mul_u64_u64_div_u64 is wrong, are you?
> 
> mul_u64_u64_div_u64() works for certain values. But for very high
> values we are losing precision and is giving unexpected values.

Can you reduce the problem to a bogus result of mul_u64_u64_div_u64()?
I'd be very surprised if the problem was mul_u64_u64_div_u64() and not
how it's used in your pwm driver.

Best regards
Uwe
Biju Das Dec. 8, 2023, 2:11 p.m. UTC | #7
Hi Uwe Kleine-König,

> -----Original Message-----
> From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> Sent: Friday, December 8, 2023 2:07 PM
> Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> Hello Biju,
> 
> On Fri, Dec 08, 2023 at 10:34:55AM +0000, Biju Das wrote:
> > > -----Original Message-----
> > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > Sent: Thursday, December 7, 2023 9:42 PM
> > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > >
> > > Hello Biju,
> > >
> > > On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1
> > > > pol=0
> > > > 5500000000000/43980239923200)
> > > > 	 High setting##
> > > > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > idempotent
> > > > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> > > > 23980465100800/43980239923200)
> > >
> > > Have you tried to understand that? What is the clk rate when this
> happens?
> > > You're not suggesting that mul_u64_u64_div_u64 is wrong, are you?
> >
> > mul_u64_u64_div_u64() works for certain values. But for very high
> > values we are losing precision and is giving unexpected values.
> 
> Can you reduce the problem to a bogus result of mul_u64_u64_div_u64()?
> I'd be very surprised if the problem was mul_u64_u64_div_u64() and not how
> it's used in your pwm driver.

When I looked last time, it drops precision here[1]. I will recheck again.
On RZ/G2L family devices, the PWM rate is 100MHz.

[1]
https://elixir.bootlin.com/linux/v6.7-rc4/source/lib/math/div64.c#L214

Cheers,
Biju
Biju Das Dec. 13, 2023, 9:06 a.m. UTC | #8
Hi Uwe,

> -----Original Message-----
> From: Biju Das
> Sent: Friday, December 8, 2023 2:12 PM
> Subject: RE: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> Hi Uwe Kleine-König,
> 
> > -----Original Message-----
> > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > Sent: Friday, December 8, 2023 2:07 PM
> > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> >
> > Hello Biju,
> >
> > On Fri, Dec 08, 2023 at 10:34:55AM +0000, Biju Das wrote:
> > > > -----Original Message-----
> > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > Sent: Thursday, December 7, 2023 9:42 PM
> > > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > >
> > > > Hello Biju,
> > > >
> > > > On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > > > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > > idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1
> > > > > pol=0
> > > > > 5500000000000/43980239923200)
> > > > > 	 High setting##
> > > > > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > > idempotent
> > > > > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> > > > > 23980465100800/43980239923200)
> > > >
> > > > Have you tried to understand that? What is the clk rate when this
> > happens?
> > > > You're not suggesting that mul_u64_u64_div_u64 is wrong, are you?
> > >
> > > mul_u64_u64_div_u64() works for certain values. But for very high
> > > values we are losing precision and is giving unexpected values.
> >
> > Can you reduce the problem to a bogus result of mul_u64_u64_div_u64()?
> > I'd be very surprised if the problem was mul_u64_u64_div_u64() and not
> > how it's used in your pwm driver.
> 
> When I looked last time, it drops precision here[1]. I will recheck again.
> On RZ/G2L family devices, the PWM rate is 100MHz.
> 
 [1]
https://elixir.bootlin.com/linux/v6.7-rc4/source/lib/math/div64.c#L214


Please find the bug details in mul_u64_u64_div_u64() compared to mul_u64_u32_div()

Theoretical calculation:

Period = 43980465100800 nsec
Duty_cycle = 23980465100800 nsec
PWM rate = 100MHz

period_cycles(tmp) = 43980465100800 * (100 * 10 ^ 6) / (10 ^ 9) = 4398046510080
prescale = ((43980465100800 >> 32) >= 256) = 5
period_cycles = min (round_up(4398046510080,( 1 << (2 * 5 )), U32_MAX) = min (4295162607, U32_MAX) = U32_MAX = 0xFFFFFFFF
duty_cycles = min (2398046510080, ,( 1 << (2 * 5 )), U32_MAX) =  min (2341842295, U32_MAX) = 0x8B95AD77


with mul_u64_u64_div_u64 (ARM64):
[   54.551612] ##### period_cycles_norm=43980465100800
[   54.305923] ##### period_cycles_tmp=4398035251080 ---> This is the bug.
[   54.311828] ##### pv=ffffd50c
[   54.386047] ##### duty_cycles_tmp=2398046510080
[   54.390903] ##### dc=8b95ad77
[   54.395574] ##### period_cycles_norm=43980352512000
[   54.398715] ##### period_cycles_tmp=4398023992229
[   54.403717] ##### pv=ffffaa19
[   54.408594] ##### duty_cycles_norm=23980465100800
[   54.411673] ##### duty_cycles_tmp=2398046510080
[   54.416557] ##### dc=8b95ad77

with mul_u64_u32_div(ARM64):

[   46.725909] ##### period_cycles_norm=43980465100800
[   46.732468] ##### period_cycles_tmp=4398046510080
[   46.740484] ##### pv=ffffffff
[   46.748457] ##### duty_cycles_norm=23980465100800
[   46.751510] ##### duty_cycles_tmp=2398046510080
[   46.760462] ##### dc=8b95ad77
[   46.765256] ##### period_cycles_norm=43980465100800
[   46.768282] ##### period_cycles_tmp=4398046510080
[   46.776590] ##### pv=ffffffff
[   46.782762] ##### duty_cycles_norm=23980465100800
[   46.802617] ##### dc=8b95ad77

Cheers,
Biju
Uwe Kleine-König Dec. 13, 2023, 11:40 a.m. UTC | #9
On Wed, Dec 13, 2023 at 09:06:56AM +0000, Biju Das wrote:
> Hi Uwe,
> 
> > -----Original Message-----
> > From: Biju Das
> > Sent: Friday, December 8, 2023 2:12 PM
> > Subject: RE: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > 
> > Hi Uwe Kleine-König,
> > 
> > > -----Original Message-----
> > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > Sent: Friday, December 8, 2023 2:07 PM
> > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > >
> > > Hello Biju,
> > >
> > > On Fri, Dec 08, 2023 at 10:34:55AM +0000, Biju Das wrote:
> > > > > -----Original Message-----
> > > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > > Sent: Thursday, December 7, 2023 9:42 PM
> > > > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > > >
> > > > > Hello Biju,
> > > > >
> > > > > On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > > > > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > > > idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1
> > > > > > pol=0
> > > > > > 5500000000000/43980239923200)
> > > > > > 	 High setting##
> > > > > > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > > > idempotent
> > > > > > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> > > > > > 23980465100800/43980239923200)
> > > > >
> > > > > Have you tried to understand that? What is the clk rate when this
> > > happens?
> > > > > You're not suggesting that mul_u64_u64_div_u64 is wrong, are you?
> > > >
> > > > mul_u64_u64_div_u64() works for certain values. But for very high
> > > > values we are losing precision and is giving unexpected values.
> > >
> > > Can you reduce the problem to a bogus result of mul_u64_u64_div_u64()?
> > > I'd be very surprised if the problem was mul_u64_u64_div_u64() and not
> > > how it's used in your pwm driver.
> > 
> > When I looked last time, it drops precision here[1]. I will recheck again.
> > On RZ/G2L family devices, the PWM rate is 100MHz.
> > 
>  [1]
> https://elixir.bootlin.com/linux/v6.7-rc4/source/lib/math/div64.c#L214
> 
> 
> Please find the bug details in mul_u64_u64_div_u64() compared to mul_u64_u32_div()
> 
> Theoretical calculation:
> 
> Period = 43980465100800 nsec
> Duty_cycle = 23980465100800 nsec
> PWM rate = 100MHz
> 
> period_cycles(tmp) = 43980465100800 * (100 * 10 ^ 6) / (10 ^ 9) = 4398046510080
> prescale = ((43980465100800 >> 32) >= 256) = 5
> period_cycles = min (round_up(4398046510080,( 1 << (2 * 5 )), U32_MAX) = min (4295162607, U32_MAX) = U32_MAX = 0xFFFFFFFF
> duty_cycles = min (2398046510080, ,( 1 << (2 * 5 )), U32_MAX) =  min (2341842295, U32_MAX) = 0x8B95AD77
> 
> 
> with mul_u64_u64_div_u64 (ARM64):
> [   54.551612] ##### period_cycles_norm=43980465100800
> [   54.305923] ##### period_cycles_tmp=4398035251080 ---> This is the bug.

It took me a while to read from your mail that 

	mul_u64_u64_div_u64(43980465100800, 100000000, 1000000000)

yields 4398035251080 on your machine (which isn't the exact result).

I came to the same conclusion, damn, I thought mul_u64_u64_div_u64() was
exact. I wonder if it's worth to improve that. One fun fact is that
while mul_u64_u64_div_u64(43980465100800, 100000000, 1000000000) yields
4398035251080 (which is off by 11259000), swapping the parameters (and
thus using mul_u64_u64_div_u64(100000000, 43980465100800, 1000000000))
yields 4398046510080 which is the exact result.

So this exact issue can be improved by:

diff --git a/lib/math/div64.c b/lib/math/div64.c
index 55a81782e271..9523c3cd37f7 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -188,6 +188,9 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 	u64 res = 0, div, rem;
 	int shift;
 
+	if (a > b)
+		return mul_u64_u64_div_u64(b, a, c);
+
 	/* can a * b overflow ? */
 	if (ilog2(a) + ilog2(b) > 62) {
 		/*

but the issue stays in principle. I'll think about that for a while.

Best regards
Uwe
Biju Das Feb. 9, 2024, 1:39 p.m. UTC | #10
Hi Uwe,

Thanks for the feedback

> -----Original Message-----
> From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> Sent: Wednesday, December 13, 2023 11:40 AM
> Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> On Wed, Dec 13, 2023 at 09:06:56AM +0000, Biju Das wrote:
> > Hi Uwe,
> >
> > > -----Original Message-----
> > > From: Biju Das
> > > Sent: Friday, December 8, 2023 2:12 PM
> > > Subject: RE: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > >
> > > Hi Uwe Kleine-König,
> > >
> > > > -----Original Message-----
> > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > Sent: Friday, December 8, 2023 2:07 PM
> > > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > >
> > > > Hello Biju,
> > > >
> > > > On Fri, Dec 08, 2023 at 10:34:55AM +0000, Biju Das wrote:
> > > > > > -----Original Message-----
> > > > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > > > Sent: Thursday, December 7, 2023 9:42 PM
> > > > > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > > > >
> > > > > > Hello Biju,
> > > > > >
> > > > > > On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > > > > > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is
> > > > > > > not idempotent (ena=1 pol=0 5500000000000/43980352512000) ->
> > > > > > > (ena=1
> > > > > > > pol=0
> > > > > > > 5500000000000/43980239923200)
> > > > > > > 	 High setting##
> > > > > > > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > > > > idempotent
> > > > > > > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0
> > > > > > > 23980465100800/43980239923200)
> > > > > >
> > > > > > Have you tried to understand that? What is the clk rate when
> > > > > > this
> > > > happens?
> > > > > > You're not suggesting that mul_u64_u64_div_u64 is wrong, are
> you?
> > > > >
> > > > > mul_u64_u64_div_u64() works for certain values. But for very
> > > > > high values we are losing precision and is giving unexpected
> values.
> > > >
> > > > Can you reduce the problem to a bogus result of
> mul_u64_u64_div_u64()?
> > > > I'd be very surprised if the problem was mul_u64_u64_div_u64() and
> > > > not how it's used in your pwm driver.
> > >
> > > When I looked last time, it drops precision here[1]. I will recheck
> again.
> > > On RZ/G2L family devices, the PWM rate is 100MHz.
> > >
> >  [1]
> > https://elixir.bootlin.com/linux/v6.7-rc4/source/lib/math/div64.c#L214
> >
> >
> > Please find the bug details in mul_u64_u64_div_u64() compared to
> > mul_u64_u32_div()
> >
> > Theoretical calculation:
> >
> > Period = 43980465100800 nsec
> > Duty_cycle = 23980465100800 nsec
> > PWM rate = 100MHz
> >
> > period_cycles(tmp) = 43980465100800 * (100 * 10 ^ 6) / (10 ^ 9) =
> > 4398046510080 prescale = ((43980465100800 >> 32) >= 256) = 5
> > period_cycles = min (round_up(4398046510080,( 1 << (2 * 5 )), U32_MAX)
> > = min (4295162607, U32_MAX) = U32_MAX = 0xFFFFFFFF duty_cycles = min
> > (2398046510080, ,( 1 << (2 * 5 )), U32_MAX) =  min (2341842295,
> > U32_MAX) = 0x8B95AD77
> >
> >
> > with mul_u64_u64_div_u64 (ARM64):
> > [   54.551612] ##### period_cycles_norm=43980465100800
> > [   54.305923] ##### period_cycles_tmp=4398035251080 ---> This is the
> bug.
> 
> It took me a while to read from your mail that
> 
> 	mul_u64_u64_div_u64(43980465100800, 100000000, 1000000000)
> 
> yields 4398035251080 on your machine (which isn't the exact result).
> 
> I came to the same conclusion, damn, I thought mul_u64_u64_div_u64() was
> exact. I wonder if it's worth to improve that. One fun fact is that while
> mul_u64_u64_div_u64(43980465100800, 100000000, 1000000000) yields
> 4398035251080 (which is off by 11259000), swapping the parameters (and
> thus using mul_u64_u64_div_u64(100000000, 43980465100800, 1000000000))
> yields 4398046510080 which is the exact result.
> 
> So this exact issue can be improved by:
> 
> diff --git a/lib/math/div64.c b/lib/math/div64.c index
> 55a81782e271..9523c3cd37f7 100644
> --- a/lib/math/div64.c
> +++ b/lib/math/div64.c
> @@ -188,6 +188,9 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
>  	u64 res = 0, div, rem;
>  	int shift;
> 
> +	if (a > b)
> +		return mul_u64_u64_div_u64(b, a, c);
> +
>  	/* can a * b overflow ? */
>  	if (ilog2(a) + ilog2(b) > 62) {
>  		/*
> 
> but the issue stays in principle. I'll think about that for a while.

OK, I found a way to fix this issue

static inline u64 rzg2l_gpt_mul_u64_u64_div_u64_roundup(u64 a, u64 b, u64 c)
{
	u64 retval;

	if (a > b)
		retval = mul_u64_u64_div_u64(b, a, c / 2);
	else
		retval = mul_u64_u64_div_u64(a, b, c / 2);

	return DIV64_U64_ROUND_UP(retval, 2);
}

In my case divisor is multiple of 2 as it is clk frequency.

a = 43980465100800, b= 100000000, c = 1000000000, expected result after rounding up = 4398046510080

with using above api,

43980465100800 * 100000000 / 500000000 = 8796093020160. roundup (8796093020160, 2) = 4398046510080

I am planning to send v18 with these changes.

Please let me know if you have any comments.

Cheers,
Biju
Biju Das Feb. 20, 2024, 4:04 p.m. UTC | #11
Hi Uwe,

> -----Original Message-----
> From: Biju Das
> Sent: Friday, February 9, 2024 1:39 PM
> To: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> Cc: Thierry Reding <thierry.reding@gmail.com>; Philipp Zabel
> <p.zabel@pengutronix.de>; Geert Uytterhoeven <geert+renesas@glider.be>;
> Fabrizio Castro <fabrizio.castro.jz@renesas.com>; Magnus Damm
> <magnus.damm@gmail.com>; linux-pwm@vger.kernel.org; linux-renesas-
> soc@vger.kernel.org; Prabhakar Mahadev Lad <prabhakar.mahadev-
> lad.rj@bp.renesas.com>
> Subject: RE: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> 
> Hi Uwe,
> 
> Thanks for the feedback
> 
> > -----Original Message-----
> > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > Sent: Wednesday, December 13, 2023 11:40 AM
> > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> >
> > On Wed, Dec 13, 2023 at 09:06:56AM +0000, Biju Das wrote:
> > > Hi Uwe,
> > >
> > > > -----Original Message-----
> > > > From: Biju Das
> > > > Sent: Friday, December 8, 2023 2:12 PM
> > > > Subject: RE: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > >
> > > > Hi Uwe Kleine-König,
> > > >
> > > > > -----Original Message-----
> > > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > > Sent: Friday, December 8, 2023 2:07 PM
> > > > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > > >
> > > > > Hello Biju,
> > > > >
> > > > > On Fri, Dec 08, 2023 at 10:34:55AM +0000, Biju Das wrote:
> > > > > > > -----Original Message-----
> > > > > > > From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > > > > > > Sent: Thursday, December 7, 2023 9:42 PM
> > > > > > > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT
> > > > > > >
> > > > > > > Hello Biju,
> > > > > > >
> > > > > > > On Thu, Dec 07, 2023 at 06:26:44PM +0000, Biju Das wrote:
> > > > > > > > ######[  304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is
> > > > > > > > not idempotent (ena=1 pol=0 5500000000000/43980352512000)
> > > > > > > > ->
> > > > > > > > (ena=1
> > > > > > > > pol=0
> > > > > > > > 5500000000000/43980239923200)
> > > > > > > > 	 High setting##
> > > > > > > > 	[  304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not
> > > > > > > > idempotent
> > > > > > > > (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1
> > > > > > > > pol=0
> > > > > > > > 23980465100800/43980239923200)
> > > > > > >
> > > > > > > Have you tried to understand that? What is the clk rate when
> > > > > > > this
> > > > > happens?
> > > > > > > You're not suggesting that mul_u64_u64_div_u64 is wrong, are
> > you?
> > > > > >
> > > > > > mul_u64_u64_div_u64() works for certain values. But for very
> > > > > > high values we are losing precision and is giving unexpected
> > values.
> > > > >
> > > > > Can you reduce the problem to a bogus result of
> > mul_u64_u64_div_u64()?
> > > > > I'd be very surprised if the problem was mul_u64_u64_div_u64()
> > > > > and not how it's used in your pwm driver.
> > > >
> > > > When I looked last time, it drops precision here[1]. I will
> > > > recheck
> > again.
> > > > On RZ/G2L family devices, the PWM rate is 100MHz.
> > > >
> > >  [1]
> > > https://elixir.bootlin.com/linux/v6.7-rc4/source/lib/math/div64.c#L2
> > > 14
> > >
> > >
> > > Please find the bug details in mul_u64_u64_div_u64() compared to
> > > mul_u64_u32_div()
> > >
> > > Theoretical calculation:
> > >
> > > Period = 43980465100800 nsec
> > > Duty_cycle = 23980465100800 nsec
> > > PWM rate = 100MHz
> > >
> > > period_cycles(tmp) = 43980465100800 * (100 * 10 ^ 6) / (10 ^ 9) =
> > > 4398046510080 prescale = ((43980465100800 >> 32) >= 256) = 5
> > > period_cycles = min (round_up(4398046510080,( 1 << (2 * 5 )),
> > > U32_MAX) = min (4295162607, U32_MAX) = U32_MAX = 0xFFFFFFFF
> > > duty_cycles = min (2398046510080, ,( 1 << (2 * 5 )), U32_MAX) =  min
> > > (2341842295,
> > > U32_MAX) = 0x8B95AD77
> > >
> > >
> > > with mul_u64_u64_div_u64 (ARM64):
> > > [   54.551612] ##### period_cycles_norm=43980465100800
> > > [   54.305923] ##### period_cycles_tmp=4398035251080 ---> This is the
> > bug.
> >
> > It took me a while to read from your mail that
> >
> > 	mul_u64_u64_div_u64(43980465100800, 100000000, 1000000000)
> >
> > yields 4398035251080 on your machine (which isn't the exact result).
> >
> > I came to the same conclusion, damn, I thought mul_u64_u64_div_u64()
> > was exact. I wonder if it's worth to improve that. One fun fact is
> > that while mul_u64_u64_div_u64(43980465100800, 100000000, 1000000000)
> > yields
> > 4398035251080 (which is off by 11259000), swapping the parameters (and
> > thus using mul_u64_u64_div_u64(100000000, 43980465100800, 1000000000))
> > yields 4398046510080 which is the exact result.
> >
> > So this exact issue can be improved by:
> >
> > diff --git a/lib/math/div64.c b/lib/math/div64.c index
> > 55a81782e271..9523c3cd37f7 100644
> > --- a/lib/math/div64.c
> > +++ b/lib/math/div64.c
> > @@ -188,6 +188,9 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
> >  	u64 res = 0, div, rem;
> >  	int shift;
> >
> > +	if (a > b)
> > +		return mul_u64_u64_div_u64(b, a, c);
> > +
> >  	/* can a * b overflow ? */
> >  	if (ilog2(a) + ilog2(b) > 62) {
> >  		/*
> >
> > but the issue stays in principle. I'll think about that for a while.
> 
> OK, I found a way to fix this issue
> 
> static inline u64 rzg2l_gpt_mul_u64_u64_div_u64_roundup(u64 a, u64 b, u64
> c) {
> 	u64 retval;
> 
> 	if (a > b)
> 		retval = mul_u64_u64_div_u64(b, a, c / 2);
> 	else
> 		retval = mul_u64_u64_div_u64(a, b, c / 2);
> 
> 	return DIV64_U64_ROUND_UP(retval, 2);
> }
> 
> In my case divisor is multiple of 2 as it is clk frequency.
> 
> a = 43980465100800, b= 100000000, c = 1000000000, expected result after
> rounding up = 4398046510080
> 
> with using above api,
> 
> 43980465100800 * 100000000 / 500000000 = 8796093020160. roundup
> (8796093020160, 2) = 4398046510080
> 
> I am planning to send v18 with these changes.
> 
> Please let me know if you have any comments.


I found another way to avoid overflow and also we are not losing any precision.

+	 * Rate is in MHz and is always integer for peripheral clk
+	 * 2^32(val) * 2^10 (prescalar) * 10^9 > 2^64
+	 * 2^32(val) * 2^10 (prescalar) * 10^6 < 2^64
+	 * Multiply val with prescalar first, if the result is less than
+	 * 2^34, then multiply by 10^9. Otherwise divide nr and dr by 10^3
+	 * so that it will never overflow.
 	 */

Here I can useDIV64_U64_ROUND_UP() instead. I will send v18 based on this.

Cheers,
Biju
diff mbox series

Patch

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 4b956d661755..bf658bb472f5 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -513,6 +513,17 @@  config PWM_ROCKCHIP
 	  Generic PWM framework driver for the PWM controller found on
 	  Rockchip SoCs.
 
+config PWM_RZG2L_GPT
+	tristate "Renesas RZ/G2L General PWM Timer support"
+	depends on ARCH_RZG2L || COMPILE_TEST
+	depends on HAS_IOMEM
+	help
+	  This driver exposes the General PWM Timer controller found in Renesas
+	  RZ/G2L like chips through the PWM API.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-rzg2l-gpt.
+
 config PWM_RZ_MTU3
 	tristate "Renesas RZ/G2L MTU3a PWM Timer support"
 	depends on RZ_MTU3
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index c5ec9e168ee7..50a6520363aa 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -47,6 +47,7 @@  obj-$(CONFIG_PWM_RASPBERRYPI_POE)	+= pwm-raspberrypi-poe.o
 obj-$(CONFIG_PWM_RCAR)		+= pwm-rcar.o
 obj-$(CONFIG_PWM_RENESAS_TPU)	+= pwm-renesas-tpu.o
 obj-$(CONFIG_PWM_ROCKCHIP)	+= pwm-rockchip.o
+obj-$(CONFIG_PWM_RZG2L_GPT)	+= pwm-rzg2l-gpt.o
 obj-$(CONFIG_PWM_RZ_MTU3)	+= pwm-rz-mtu3.o
 obj-$(CONFIG_PWM_SAMSUNG)	+= pwm-samsung.o
 obj-$(CONFIG_PWM_SIFIVE)	+= pwm-sifive.o
diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c
new file mode 100644
index 000000000000..428e6e577db6
--- /dev/null
+++ b/drivers/pwm/pwm-rzg2l-gpt.c
@@ -0,0 +1,572 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L General PWM Timer (GPT) driver
+ *
+ * Copyright (C) 2023 Renesas Electronics Corporation
+ *
+ * Hardware manual for this IP can be found here
+ * https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-users-manual-hardware-0?language=en
+ *
+ * Limitations:
+ * - Counter must be stopped before modifying Mode and Prescaler.
+ * - When PWM is disabled, the output is driven to inactive.
+ * - While the hardware supports both polarities, the driver (for now)
+ *   only handles normal polarity.
+ * - When both channels are used, disabling the channel on one stops the
+ *   other.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/pwm.h>
+#include <linux/reset.h>
+#include <linux/time.h>
+
+#define RZG2L_GTCR		0x2c
+#define RZG2L_GTUDDTYC		0x30
+#define RZG2L_GTIOR		0x34
+#define RZG2L_GTBER		0x40
+#define RZG2L_GTCNT		0x48
+#define RZG2L_GTCCRA		0x4c
+#define RZG2L_GTCCRB		0x50
+#define RZG2L_GTPR		0x64
+
+#define RZG2L_GTCR_CST		BIT(0)
+#define RZG2L_GTCR_MD		GENMASK(18, 16)
+#define RZG2L_GTCR_TPCS		GENMASK(26, 24)
+
+#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE	FIELD_PREP(RZG2L_GTCR_MD, 0)
+
+#define RZG2L_GTUDDTYC_UP	BIT(0)
+#define RZG2L_GTUDDTYC_UDF	BIT(1)
+#define RZG2L_UP_COUNTING	(RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF)
+
+#define RZG2L_GTIOR_GTIOA	GENMASK(4, 0)
+#define RZG2L_GTIOR_GTIOB	GENMASK(20, 16)
+#define RZG2L_GTIOR_OAE		BIT(8)
+#define RZG2L_GTIOR_OBE		BIT(24)
+
+#define RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE	0x07
+#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE	0x1b
+
+#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \
+	(RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE)
+#define RZG2L_GTIOR_GTIOA_OUT_LO_END_TOGGLE_CMP_MATCH \
+	(RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE | RZG2L_GTIOR_OAE)
+#define RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \
+	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE) | RZG2L_GTIOR_OBE)
+#define RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH \
+	(FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE) | RZG2L_GTIOR_OBE)
+
+#define RZG2L_GTCCR(i) (0x4c + 4 * (i))
+
+#define RZG2L_MAX_HW_CHANNELS	8
+#define RZG2L_CHANNELS_PER_IO	2
+#define RZG2L_MAX_PWM_CHANNELS	(RZG2L_MAX_HW_CHANNELS * RZG2L_CHANNELS_PER_IO)
+#define RZG2L_MAX_SCALE_FACTOR	1024
+
+#define RZG2L_IS_IOB(a)	((a) & 0x1)
+#define RZG2L_GET_CH(a)	((a) / 2)
+
+#define RZG2L_GET_CH_OFFS(i) (0x100 * (i))
+
+struct rzg2l_gpt_chip {
+	struct pwm_chip chip;
+	void __iomem *mmio;
+	struct reset_control *rstc;
+	struct clk *clk;
+	struct mutex lock; /* lock to protect shared channel resources */
+	unsigned long rate;
+	u64 max_val;
+	u32 state_period[RZG2L_MAX_HW_CHANNELS];
+	u32 user_count[RZG2L_MAX_HW_CHANNELS];
+	u32 enable_count[RZG2L_MAX_HW_CHANNELS];
+	DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS);
+};
+
+static inline struct rzg2l_gpt_chip *to_rzg2l_gpt_chip(struct pwm_chip *chip)
+{
+	return container_of(chip, struct rzg2l_gpt_chip, chip);
+}
+
+static void rzg2l_gpt_write(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg, u32 data)
+{
+	writel(data, rzg2l_gpt->mmio + reg);
+}
+
+static u32 rzg2l_gpt_read(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg)
+{
+	return readl(rzg2l_gpt->mmio + reg);
+}
+
+static void rzg2l_gpt_modify(struct rzg2l_gpt_chip *rzg2l_gpt, u32 reg, u32 clr,
+			     u32 set)
+{
+	rzg2l_gpt_write(rzg2l_gpt, reg,
+			(rzg2l_gpt_read(rzg2l_gpt, reg) & ~clr) | set);
+}
+
+static u8 rzg2l_gpt_calculate_prescale(struct rzg2l_gpt_chip *rzg2l_gpt,
+				       u64 period_cycles)
+{
+	u32 prescaled_period_cycles;
+	u8 prescale;
+
+	prescaled_period_cycles = period_cycles >> 32;
+	if (prescaled_period_cycles >= 256)
+		prescale = 5;
+	else
+		prescale = (fls(prescaled_period_cycles) + 1) / 2;
+
+	return prescale;
+}
+
+static int rzg2l_gpt_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+	u32 ch = RZG2L_GET_CH(pwm->hwpwm);
+
+	mutex_lock(&rzg2l_gpt->lock);
+	rzg2l_gpt->user_count[ch]++;
+	mutex_unlock(&rzg2l_gpt->lock);
+
+	return 0;
+}
+
+static void rzg2l_gpt_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+	u32 ch = RZG2L_GET_CH(pwm->hwpwm);
+
+	mutex_lock(&rzg2l_gpt->lock);
+	rzg2l_gpt->user_count[ch]--;
+	mutex_unlock(&rzg2l_gpt->lock);
+}
+
+static bool rzg2l_gpt_is_ch_enabled(struct rzg2l_gpt_chip *rzg2l_gpt, u8 hwpwm)
+{
+	u8 ch = RZG2L_GET_CH(hwpwm);
+	u32 offs = RZG2L_GET_CH_OFFS(ch);
+	bool is_counter_running, is_output_en;
+	u32 val;
+
+	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR);
+	is_counter_running = val & RZG2L_GTCR_CST;
+
+	val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTIOR);
+	if (RZG2L_IS_IOB(hwpwm))
+		is_output_en = val & RZG2L_GTIOR_OBE;
+	else
+		is_output_en = val & RZG2L_GTIOR_OAE;
+
+	return (is_counter_running && is_output_en);
+}
+
+static int rzg2l_gpt_enable(struct rzg2l_gpt_chip *rzg2l_gpt,
+			    struct pwm_device *pwm)
+{
+	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
+	u32 offs = RZG2L_GET_CH_OFFS(ch);
+
+	/* Enable pin output */
+	if (RZG2L_IS_IOB(pwm->hwpwm))
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
+				 RZG2L_GTIOR_GTIOB | RZG2L_GTIOR_OBE,
+				 RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH);
+	else
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR,
+				 RZG2L_GTIOR_GTIOA | RZG2L_GTIOR_OAE,
+				 RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH);
+
+	mutex_lock(&rzg2l_gpt->lock);
+	if (!rzg2l_gpt->enable_count[ch])
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, 0, RZG2L_GTCR_CST);
+
+	rzg2l_gpt->enable_count[ch]++;
+	mutex_unlock(&rzg2l_gpt->lock);
+
+	return 0;
+}
+
+static void rzg2l_gpt_disable(struct rzg2l_gpt_chip *rzg2l_gpt,
+			      struct pwm_device *pwm)
+{
+	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
+	u32 offs = RZG2L_GET_CH_OFFS(ch);
+
+	/* Disable pin output */
+	if (RZG2L_IS_IOB(pwm->hwpwm))
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR, RZG2L_GTIOR_OBE, 0);
+	else
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR, RZG2L_GTIOR_OAE, 0);
+
+	/* Stop count, Output low on GTIOCx pin when counting stops */
+	mutex_lock(&rzg2l_gpt->lock);
+	rzg2l_gpt->enable_count[ch]--;
+	if (!rzg2l_gpt->enable_count[ch])
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_CST, 0);
+
+	mutex_unlock(&rzg2l_gpt->lock);
+
+	/*
+	 * Probe() set these bits, if pwm is enabled by bootloader. In such
+	 * case, clearing the bits will avoid errors during unbind.
+	 */
+	if (test_bit(pwm->hwpwm, rzg2l_gpt->ch_en_bits))
+		clear_bit(pwm->hwpwm, rzg2l_gpt->ch_en_bits);
+}
+
+static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt, u32 val, u8 prescale)
+{
+	u64 retval;
+	u64 tmp;
+
+	tmp = NSEC_PER_SEC * (u64)val;
+	/*
+	 * To avoid losing precision for smaller period/duty cycle values
+	 * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded values.
+	 */
+	if (prescale < 2)
+		retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale), rzg2l_gpt->rate);
+	else
+		retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2 * prescale);
+
+	return retval;
+}
+
+static int rzg2l_gpt_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+			       struct pwm_state *state)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+	int rc;
+
+	rc = pm_runtime_resume_and_get(chip->dev);
+	if (rc)
+		return rc;
+
+	state->enabled = rzg2l_gpt_is_ch_enabled(rzg2l_gpt, pwm->hwpwm);
+	if (state->enabled) {
+		u32 ch = RZG2L_GET_CH(pwm->hwpwm);
+		u32 offs = RZG2L_GET_CH_OFFS(ch);
+		u8 prescale;
+		u32 val;
+
+		val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR);
+		prescale = FIELD_GET(RZG2L_GTCR_TPCS, val);
+
+		val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTPR);
+		state->period = calculate_period_or_duty(rzg2l_gpt, val, prescale);
+
+		val = rzg2l_gpt_read(rzg2l_gpt,
+				     offs + RZG2L_GTCCR(RZG2L_IS_IOB(pwm->hwpwm)));
+		state->duty_cycle = calculate_period_or_duty(rzg2l_gpt, val, prescale);
+		if (state->duty_cycle > state->period)
+			state->duty_cycle = state->period;
+	}
+
+	state->polarity = PWM_POLARITY_NORMAL;
+	pm_runtime_put(chip->dev);
+
+	return 0;
+}
+
+static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8 prescale)
+{
+	return min_t(u64, DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 * prescale)),
+		     (u64)U32_MAX);
+}
+
+/* Caller holds the lock while calling rzg2l_gpt_config() */
+static int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			    const struct pwm_state *state)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+	u8 ch = RZG2L_GET_CH(pwm->hwpwm);
+	u32 offs = RZG2L_GET_CH_OFFS(ch);
+	unsigned long pv, dc;
+	u64 period_cycles;
+	u64 duty_cycles;
+	u8 prescale;
+
+	/*
+	 * GPT counter is shared by multiple channels, so prescale and period
+	 * can NOT be modified when there are multiple channels in use with
+	 * different settings.
+	 */
+	if (state->period != rzg2l_gpt->state_period[ch] && rzg2l_gpt->user_count[ch] > 1)
+		return -EBUSY;
+
+	/* Limit period/duty cycle to max value supported by the HW */
+	if (state->period > rzg2l_gpt->max_val)
+		period_cycles = rzg2l_gpt->max_val;
+	else
+		period_cycles = state->period;
+
+	period_cycles = mul_u64_u32_div(period_cycles, rzg2l_gpt->rate, NSEC_PER_SEC);
+	prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles);
+
+	pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale);
+
+	if (state->duty_cycle > rzg2l_gpt->max_val)
+		duty_cycles = rzg2l_gpt->max_val;
+	else
+		duty_cycles = state->duty_cycle;
+
+	duty_cycles = mul_u64_u32_div(duty_cycles, rzg2l_gpt->rate, NSEC_PER_SEC);
+	dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale);
+
+	/*
+	 * GPT counter is shared by multiple channels, we cache the period value
+	 * from the first enabled channel and use the same value for both
+	 * channels.
+	 */
+	rzg2l_gpt->state_period[ch] = state->period;
+
+	/*
+	 * Counter must be stopped before modifying mode, prescaler, timer
+	 * counter and buffer enable registers. These registers are shared
+	 * between both channels. So allow updating these registers only for the
+	 * first enabled channel.
+	 */
+	if (rzg2l_gpt->enable_count[ch] <= 1)
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_CST, 0);
+
+	/* GPT set operating mode (saw-wave up-counting) */
+	rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_MD,
+			 RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE);
+
+	/* Set count direction */
+	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTUDDTYC, RZG2L_UP_COUNTING);
+	/* Select count clock */
+	rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_TPCS,
+			 FIELD_PREP(RZG2L_GTCR_TPCS, prescale));
+
+	/* Set period */
+	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTPR, pv);
+
+	/* Set duty cycle */
+	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTCCR(RZG2L_IS_IOB(pwm->hwpwm)),
+			dc);
+
+	/* Set initial value for counter */
+	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTCNT, 0);
+
+	/* Set no buffer operation */
+	rzg2l_gpt_write(rzg2l_gpt, offs + RZG2L_GTBER, 0);
+
+	/* Restart the counter after updating the registers */
+	if (rzg2l_gpt->enable_count[ch] <= 1)
+		rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR,
+				 RZG2L_GTCR_CST, RZG2L_GTCR_CST);
+
+	return 0;
+}
+
+static int rzg2l_gpt_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			   const struct pwm_state *state)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip);
+	bool enabled = pwm->state.enabled;
+	int ret;
+
+	if (state->polarity != PWM_POLARITY_NORMAL)
+		return -EINVAL;
+
+	if (!state->enabled) {
+		if (enabled) {
+			rzg2l_gpt_disable(rzg2l_gpt, pwm);
+			pm_runtime_put_sync(rzg2l_gpt->chip.dev);
+		}
+
+		return 0;
+	}
+
+	if (!enabled) {
+		ret = pm_runtime_resume_and_get(rzg2l_gpt->chip.dev);
+		if (ret)
+			return ret;
+	}
+
+	mutex_lock(&rzg2l_gpt->lock);
+	ret = rzg2l_gpt_config(chip, pwm, state);
+	mutex_unlock(&rzg2l_gpt->lock);
+	if (ret)
+		return ret;
+
+	if (!enabled)
+		ret = rzg2l_gpt_enable(rzg2l_gpt, pwm);
+
+	return ret;
+}
+
+static const struct pwm_ops rzg2l_gpt_ops = {
+	.request = rzg2l_gpt_request,
+	.free = rzg2l_gpt_free,
+	.get_state = rzg2l_gpt_get_state,
+	.apply = rzg2l_gpt_apply,
+};
+
+static int rzg2l_gpt_pm_runtime_suspend(struct device *dev)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(rzg2l_gpt->clk);
+
+	return 0;
+}
+
+static int rzg2l_gpt_pm_runtime_resume(struct device *dev)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(rzg2l_gpt->clk);
+}
+
+static DEFINE_RUNTIME_DEV_PM_OPS(rzg2l_gpt_pm_ops,
+				 rzg2l_gpt_pm_runtime_suspend,
+				 rzg2l_gpt_pm_runtime_resume, NULL);
+
+static void rzg2l_gpt_reset_assert_pm_disable(void *data)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt = data;
+	u32 i;
+
+	clk_rate_exclusive_put(rzg2l_gpt->clk);
+	/*
+	 * The below check is for making balanced PM usage count
+	 * eg: boot loader is turning on PWM and probe increments the PM usage
+	 * count. Before apply, if there is unbind/remove callback we need to
+	 * decrement the PM usage count.
+	 */
+	for (i = 0; i < RZG2L_MAX_PWM_CHANNELS; i++) {
+		if (test_bit(i, rzg2l_gpt->ch_en_bits))
+			pm_runtime_put(rzg2l_gpt->chip.dev);
+	}
+
+	pm_runtime_disable(rzg2l_gpt->chip.dev);
+	pm_runtime_set_suspended(rzg2l_gpt->chip.dev);
+	reset_control_assert(rzg2l_gpt->rstc);
+}
+
+static int rzg2l_gpt_probe(struct platform_device *pdev)
+{
+	struct rzg2l_gpt_chip *rzg2l_gpt;
+	int ret;
+	u32 i;
+
+	rzg2l_gpt = devm_kzalloc(&pdev->dev, sizeof(*rzg2l_gpt), GFP_KERNEL);
+	if (!rzg2l_gpt)
+		return -ENOMEM;
+
+	rzg2l_gpt->mmio = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(rzg2l_gpt->mmio))
+		return PTR_ERR(rzg2l_gpt->mmio);
+
+	rzg2l_gpt->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+	if (IS_ERR(rzg2l_gpt->rstc))
+		return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->rstc),
+				     "get reset failed\n");
+
+	rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(rzg2l_gpt->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk),
+				     "cannot get clock\n");
+
+	ret = reset_control_deassert(rzg2l_gpt->rstc);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret,
+				     "cannot deassert reset control\n");
+
+	ret = clk_prepare_enable(rzg2l_gpt->clk);
+	if (ret)
+		goto err_reset;
+
+	ret = clk_rate_exclusive_get(rzg2l_gpt->clk);
+	if (ret)
+		goto err_clk_disable;
+
+	rzg2l_gpt->rate = clk_get_rate(rzg2l_gpt->clk);
+	if (!rzg2l_gpt->rate) {
+		ret = dev_err_probe(&pdev->dev, -EINVAL, "gpt clk rate is 0");
+		goto err_clk_rate_put;
+	}
+
+	/*
+	 * Refuse clk rates > 1 GHz to prevent overflow later for computing
+	 * period and duty cycle.
+	 */
+	if (rzg2l_gpt->rate > NSEC_PER_SEC) {
+		ret = -EINVAL;
+		goto err_clk_rate_put;
+	}
+
+	rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX, NSEC_PER_SEC,
+						 rzg2l_gpt->rate) * RZG2L_MAX_SCALE_FACTOR;
+
+	/*
+	 *  We need to keep the clock on, in case the bootloader has enabled the
+	 *  PWM and is running during probe().
+	 */
+	for (i = 0; i < RZG2L_MAX_PWM_CHANNELS; i++) {
+		if (rzg2l_gpt_is_ch_enabled(rzg2l_gpt, i)) {
+			set_bit(i, rzg2l_gpt->ch_en_bits);
+			pm_runtime_get_sync(&pdev->dev);
+		}
+	}
+
+	mutex_init(&rzg2l_gpt->lock);
+	platform_set_drvdata(pdev, rzg2l_gpt);
+	rzg2l_gpt->chip.dev = &pdev->dev;
+
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	ret = devm_add_action_or_reset(&pdev->dev,
+				       rzg2l_gpt_reset_assert_pm_disable,
+				       rzg2l_gpt);
+	if (ret < 0)
+		return ret;
+
+	rzg2l_gpt->chip.ops = &rzg2l_gpt_ops;
+	rzg2l_gpt->chip.npwm = RZG2L_MAX_PWM_CHANNELS;
+	ret = devm_pwmchip_add(&pdev->dev, &rzg2l_gpt->chip);
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
+
+	pm_runtime_idle(&pdev->dev);
+
+	return 0;
+
+err_clk_rate_put:
+	clk_rate_exclusive_put(rzg2l_gpt->clk);
+err_clk_disable:
+	clk_disable_unprepare(rzg2l_gpt->clk);
+err_reset:
+	reset_control_assert(rzg2l_gpt->rstc);
+	return ret;
+}
+
+static const struct of_device_id rzg2l_gpt_of_table[] = {
+	{ .compatible = "renesas,rzg2l-gpt", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rzg2l_gpt_of_table);
+
+static struct platform_driver rzg2l_gpt_driver = {
+	.driver = {
+		.name = "pwm-rzg2l-gpt",
+		.pm = pm_ptr(&rzg2l_gpt_pm_ops),
+		.of_match_table = rzg2l_gpt_of_table,
+	},
+	.probe = rzg2l_gpt_probe,
+};
+module_platform_driver(rzg2l_gpt_driver);
+
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_DESCRIPTION("Renesas RZ/G2L General PWM Timer (GPT) Driver");
+MODULE_LICENSE("GPL");