diff mbox series

[-next,v20,20/26] riscv: Add prctl controls for userspace vector management

Message ID 20230518161949.11203-21-andy.chiu@sifive.com (mailing list archive)
State Superseded
Headers show
Series riscv: Add vector ISA support | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD ac9a78681b92
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 6 and now 6
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig fail Errors and warnings before: 2849 this patch: 2850
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 16381 this patch: 16381
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 3 this patch: 3
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: extern prototypes should be avoided in .h files
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Andy Chiu May 18, 2023, 4:19 p.m. UTC
This patch add two riscv-specific prctls, to allow usespace control the
use of vector unit:

 * PR_RISCV_V_SET_CONTROL: control the permission to use Vector at next,
   or all following execve for a thread. Turning off a thread's Vector
   live is not possible since libraries may have registered ifunc that
   may execute Vector instructions.
 * PR_RISCV_V_GET_CONTROL: get the same permission setting for the
   current thread, and the setting for following execve(s).

Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Reviewed-by: Greentime Hu <greentime.hu@sifive.com>
Reviewed-by: Vincent Chen <vincent.chen@sifive.com>
---
Changelog v20:
 - address build issue when KVM is compile as a module (Heiko)
 - s/RISCV_V_DISABLE/RISCV_ISA_V_DEFAULT_ENABLE/ (Conor)
 - change function names to have better scoping
 - check has_vector() before accessing vstate_ctrl
 - use proper return type for prctl calls (long instead of uint)
---
 arch/riscv/include/asm/processor.h |  13 ++++
 arch/riscv/include/asm/vector.h    |   4 +
 arch/riscv/kernel/process.c        |   1 +
 arch/riscv/kernel/vector.c         | 118 +++++++++++++++++++++++++++++
 arch/riscv/kvm/vcpu.c              |   2 +
 include/uapi/linux/prctl.h         |  11 +++
 kernel/sys.c                       |  12 +++
 7 files changed, 161 insertions(+)

Comments

kernel test robot May 20, 2023, 2:11 p.m. UTC | #1
Hi Andy,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20230518]

url:    https://github.com/intel-lab-lkp/linux/commits/Andy-Chiu/riscv-Rename-__switch_to_aux-fpu/20230519-005938
base:   next-20230518
patch link:    https://lore.kernel.org/r/20230518161949.11203-21-andy.chiu%40sifive.com
patch subject: [PATCH -next v20 20/26] riscv: Add prctl controls for userspace vector management
config: microblaze-randconfig-m031-20230517
compiler: microblaze-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/eef6095228f3323db8f2bddd5bde768976888558
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Andy-Chiu/riscv-Rename-__switch_to_aux-fpu/20230519-005938
        git checkout eef6095228f3323db8f2bddd5bde768976888558
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=microblaze olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=microblaze SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305202157.62W33UKT-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/sys.c: In function '__do_sys_prctl':
>> kernel/sys.c:2718:25: error: implicit declaration of function 'RISCV_V_SET_CONTROL'; did you mean 'PR_RISCV_V_SET_CONTROL'? [-Werror=implicit-function-declaration]
    2718 |                 error = RISCV_V_SET_CONTROL(arg2);
         |                         ^~~~~~~~~~~~~~~~~~~
         |                         PR_RISCV_V_SET_CONTROL
>> kernel/sys.c:2721:25: error: implicit declaration of function 'RISCV_V_GET_CONTROL'; did you mean 'PR_RISCV_V_GET_CONTROL'? [-Werror=implicit-function-declaration]
    2721 |                 error = RISCV_V_GET_CONTROL();
         |                         ^~~~~~~~~~~~~~~~~~~
         |                         PR_RISCV_V_GET_CONTROL
   cc1: some warnings being treated as errors


vim +2718 kernel/sys.c

  2407	
  2408	SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
  2409			unsigned long, arg4, unsigned long, arg5)
  2410	{
  2411		struct task_struct *me = current;
  2412		unsigned char comm[sizeof(me->comm)];
  2413		long error;
  2414	
  2415		error = security_task_prctl(option, arg2, arg3, arg4, arg5);
  2416		if (error != -ENOSYS)
  2417			return error;
  2418	
  2419		error = 0;
  2420		switch (option) {
  2421		case PR_SET_PDEATHSIG:
  2422			if (!valid_signal(arg2)) {
  2423				error = -EINVAL;
  2424				break;
  2425			}
  2426			me->pdeath_signal = arg2;
  2427			break;
  2428		case PR_GET_PDEATHSIG:
  2429			error = put_user(me->pdeath_signal, (int __user *)arg2);
  2430			break;
  2431		case PR_GET_DUMPABLE:
  2432			error = get_dumpable(me->mm);
  2433			break;
  2434		case PR_SET_DUMPABLE:
  2435			if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
  2436				error = -EINVAL;
  2437				break;
  2438			}
  2439			set_dumpable(me->mm, arg2);
  2440			break;
  2441	
  2442		case PR_SET_UNALIGN:
  2443			error = SET_UNALIGN_CTL(me, arg2);
  2444			break;
  2445		case PR_GET_UNALIGN:
  2446			error = GET_UNALIGN_CTL(me, arg2);
  2447			break;
  2448		case PR_SET_FPEMU:
  2449			error = SET_FPEMU_CTL(me, arg2);
  2450			break;
  2451		case PR_GET_FPEMU:
  2452			error = GET_FPEMU_CTL(me, arg2);
  2453			break;
  2454		case PR_SET_FPEXC:
  2455			error = SET_FPEXC_CTL(me, arg2);
  2456			break;
  2457		case PR_GET_FPEXC:
  2458			error = GET_FPEXC_CTL(me, arg2);
  2459			break;
  2460		case PR_GET_TIMING:
  2461			error = PR_TIMING_STATISTICAL;
  2462			break;
  2463		case PR_SET_TIMING:
  2464			if (arg2 != PR_TIMING_STATISTICAL)
  2465				error = -EINVAL;
  2466			break;
  2467		case PR_SET_NAME:
  2468			comm[sizeof(me->comm) - 1] = 0;
  2469			if (strncpy_from_user(comm, (char __user *)arg2,
  2470					      sizeof(me->comm) - 1) < 0)
  2471				return -EFAULT;
  2472			set_task_comm(me, comm);
  2473			proc_comm_connector(me);
  2474			break;
  2475		case PR_GET_NAME:
  2476			get_task_comm(comm, me);
  2477			if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
  2478				return -EFAULT;
  2479			break;
  2480		case PR_GET_ENDIAN:
  2481			error = GET_ENDIAN(me, arg2);
  2482			break;
  2483		case PR_SET_ENDIAN:
  2484			error = SET_ENDIAN(me, arg2);
  2485			break;
  2486		case PR_GET_SECCOMP:
  2487			error = prctl_get_seccomp();
  2488			break;
  2489		case PR_SET_SECCOMP:
  2490			error = prctl_set_seccomp(arg2, (char __user *)arg3);
  2491			break;
  2492		case PR_GET_TSC:
  2493			error = GET_TSC_CTL(arg2);
  2494			break;
  2495		case PR_SET_TSC:
  2496			error = SET_TSC_CTL(arg2);
  2497			break;
  2498		case PR_TASK_PERF_EVENTS_DISABLE:
  2499			error = perf_event_task_disable();
  2500			break;
  2501		case PR_TASK_PERF_EVENTS_ENABLE:
  2502			error = perf_event_task_enable();
  2503			break;
  2504		case PR_GET_TIMERSLACK:
  2505			if (current->timer_slack_ns > ULONG_MAX)
  2506				error = ULONG_MAX;
  2507			else
  2508				error = current->timer_slack_ns;
  2509			break;
  2510		case PR_SET_TIMERSLACK:
  2511			if (arg2 <= 0)
  2512				current->timer_slack_ns =
  2513						current->default_timer_slack_ns;
  2514			else
  2515				current->timer_slack_ns = arg2;
  2516			break;
  2517		case PR_MCE_KILL:
  2518			if (arg4 | arg5)
  2519				return -EINVAL;
  2520			switch (arg2) {
  2521			case PR_MCE_KILL_CLEAR:
  2522				if (arg3 != 0)
  2523					return -EINVAL;
  2524				current->flags &= ~PF_MCE_PROCESS;
  2525				break;
  2526			case PR_MCE_KILL_SET:
  2527				current->flags |= PF_MCE_PROCESS;
  2528				if (arg3 == PR_MCE_KILL_EARLY)
  2529					current->flags |= PF_MCE_EARLY;
  2530				else if (arg3 == PR_MCE_KILL_LATE)
  2531					current->flags &= ~PF_MCE_EARLY;
  2532				else if (arg3 == PR_MCE_KILL_DEFAULT)
  2533					current->flags &=
  2534							~(PF_MCE_EARLY|PF_MCE_PROCESS);
  2535				else
  2536					return -EINVAL;
  2537				break;
  2538		case PR_GET_AUXV:
  2539			if (arg4 || arg5)
  2540				return -EINVAL;
  2541			error = prctl_get_auxv((void __user *)arg2, arg3);
  2542			break;
  2543			default:
  2544				return -EINVAL;
  2545			}
  2546			break;
  2547		case PR_MCE_KILL_GET:
  2548			if (arg2 | arg3 | arg4 | arg5)
  2549				return -EINVAL;
  2550			if (current->flags & PF_MCE_PROCESS)
  2551				error = (current->flags & PF_MCE_EARLY) ?
  2552					PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
  2553			else
  2554				error = PR_MCE_KILL_DEFAULT;
  2555			break;
  2556		case PR_SET_MM:
  2557			error = prctl_set_mm(arg2, arg3, arg4, arg5);
  2558			break;
  2559		case PR_GET_TID_ADDRESS:
  2560			error = prctl_get_tid_address(me, (int __user * __user *)arg2);
  2561			break;
  2562		case PR_SET_CHILD_SUBREAPER:
  2563			me->signal->is_child_subreaper = !!arg2;
  2564			if (!arg2)
  2565				break;
  2566	
  2567			walk_process_tree(me, propagate_has_child_subreaper, NULL);
  2568			break;
  2569		case PR_GET_CHILD_SUBREAPER:
  2570			error = put_user(me->signal->is_child_subreaper,
  2571					 (int __user *)arg2);
  2572			break;
  2573		case PR_SET_NO_NEW_PRIVS:
  2574			if (arg2 != 1 || arg3 || arg4 || arg5)
  2575				return -EINVAL;
  2576	
  2577			task_set_no_new_privs(current);
  2578			break;
  2579		case PR_GET_NO_NEW_PRIVS:
  2580			if (arg2 || arg3 || arg4 || arg5)
  2581				return -EINVAL;
  2582			return task_no_new_privs(current) ? 1 : 0;
  2583		case PR_GET_THP_DISABLE:
  2584			if (arg2 || arg3 || arg4 || arg5)
  2585				return -EINVAL;
  2586			error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
  2587			break;
  2588		case PR_SET_THP_DISABLE:
  2589			if (arg3 || arg4 || arg5)
  2590				return -EINVAL;
  2591			if (mmap_write_lock_killable(me->mm))
  2592				return -EINTR;
  2593			if (arg2)
  2594				set_bit(MMF_DISABLE_THP, &me->mm->flags);
  2595			else
  2596				clear_bit(MMF_DISABLE_THP, &me->mm->flags);
  2597			mmap_write_unlock(me->mm);
  2598			break;
  2599		case PR_MPX_ENABLE_MANAGEMENT:
  2600		case PR_MPX_DISABLE_MANAGEMENT:
  2601			/* No longer implemented: */
  2602			return -EINVAL;
  2603		case PR_SET_FP_MODE:
  2604			error = SET_FP_MODE(me, arg2);
  2605			break;
  2606		case PR_GET_FP_MODE:
  2607			error = GET_FP_MODE(me);
  2608			break;
  2609		case PR_SVE_SET_VL:
  2610			error = SVE_SET_VL(arg2);
  2611			break;
  2612		case PR_SVE_GET_VL:
  2613			error = SVE_GET_VL();
  2614			break;
  2615		case PR_SME_SET_VL:
  2616			error = SME_SET_VL(arg2);
  2617			break;
  2618		case PR_SME_GET_VL:
  2619			error = SME_GET_VL();
  2620			break;
  2621		case PR_GET_SPECULATION_CTRL:
  2622			if (arg3 || arg4 || arg5)
  2623				return -EINVAL;
  2624			error = arch_prctl_spec_ctrl_get(me, arg2);
  2625			break;
  2626		case PR_SET_SPECULATION_CTRL:
  2627			if (arg4 || arg5)
  2628				return -EINVAL;
  2629			error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
  2630			break;
  2631		case PR_PAC_RESET_KEYS:
  2632			if (arg3 || arg4 || arg5)
  2633				return -EINVAL;
  2634			error = PAC_RESET_KEYS(me, arg2);
  2635			break;
  2636		case PR_PAC_SET_ENABLED_KEYS:
  2637			if (arg4 || arg5)
  2638				return -EINVAL;
  2639			error = PAC_SET_ENABLED_KEYS(me, arg2, arg3);
  2640			break;
  2641		case PR_PAC_GET_ENABLED_KEYS:
  2642			if (arg2 || arg3 || arg4 || arg5)
  2643				return -EINVAL;
  2644			error = PAC_GET_ENABLED_KEYS(me);
  2645			break;
  2646		case PR_SET_TAGGED_ADDR_CTRL:
  2647			if (arg3 || arg4 || arg5)
  2648				return -EINVAL;
  2649			error = SET_TAGGED_ADDR_CTRL(arg2);
  2650			break;
  2651		case PR_GET_TAGGED_ADDR_CTRL:
  2652			if (arg2 || arg3 || arg4 || arg5)
  2653				return -EINVAL;
  2654			error = GET_TAGGED_ADDR_CTRL();
  2655			break;
  2656		case PR_SET_IO_FLUSHER:
  2657			if (!capable(CAP_SYS_RESOURCE))
  2658				return -EPERM;
  2659	
  2660			if (arg3 || arg4 || arg5)
  2661				return -EINVAL;
  2662	
  2663			if (arg2 == 1)
  2664				current->flags |= PR_IO_FLUSHER;
  2665			else if (!arg2)
  2666				current->flags &= ~PR_IO_FLUSHER;
  2667			else
  2668				return -EINVAL;
  2669			break;
  2670		case PR_GET_IO_FLUSHER:
  2671			if (!capable(CAP_SYS_RESOURCE))
  2672				return -EPERM;
  2673	
  2674			if (arg2 || arg3 || arg4 || arg5)
  2675				return -EINVAL;
  2676	
  2677			error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
  2678			break;
  2679		case PR_SET_SYSCALL_USER_DISPATCH:
  2680			error = set_syscall_user_dispatch(arg2, arg3, arg4,
  2681							  (char __user *) arg5);
  2682			break;
  2683	#ifdef CONFIG_SCHED_CORE
  2684		case PR_SCHED_CORE:
  2685			error = sched_core_share_pid(arg2, arg3, arg4, arg5);
  2686			break;
  2687	#endif
  2688		case PR_SET_MDWE:
  2689			error = prctl_set_mdwe(arg2, arg3, arg4, arg5);
  2690			break;
  2691		case PR_GET_MDWE:
  2692			error = prctl_get_mdwe(arg2, arg3, arg4, arg5);
  2693			break;
  2694		case PR_SET_VMA:
  2695			error = prctl_set_vma(arg2, arg3, arg4, arg5);
  2696			break;
  2697	#ifdef CONFIG_KSM
  2698		case PR_SET_MEMORY_MERGE:
  2699			if (arg3 || arg4 || arg5)
  2700				return -EINVAL;
  2701			if (mmap_write_lock_killable(me->mm))
  2702				return -EINTR;
  2703	
  2704			if (arg2)
  2705				error = ksm_enable_merge_any(me->mm);
  2706			else
  2707				error = ksm_disable_merge_any(me->mm);
  2708			mmap_write_unlock(me->mm);
  2709			break;
  2710		case PR_GET_MEMORY_MERGE:
  2711			if (arg2 || arg3 || arg4 || arg5)
  2712				return -EINVAL;
  2713	
  2714			error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
  2715			break;
  2716	#endif
  2717		case PR_RISCV_V_SET_CONTROL:
> 2718			error = RISCV_V_SET_CONTROL(arg2);
  2719			break;
  2720		case PR_RISCV_V_GET_CONTROL:
> 2721			error = RISCV_V_GET_CONTROL();
  2722			break;
  2723		default:
  2724			error = -EINVAL;
  2725			break;
  2726		}
  2727		return error;
  2728	}
  2729
kernel test robot May 21, 2023, 1:50 a.m. UTC | #2
Hi Andy,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20230518]

url:    https://github.com/intel-lab-lkp/linux/commits/Andy-Chiu/riscv-Rename-__switch_to_aux-fpu/20230519-005938
base:   next-20230518
patch link:    https://lore.kernel.org/r/20230518161949.11203-21-andy.chiu%40sifive.com
patch subject: [PATCH -next v20 20/26] riscv: Add prctl controls for userspace vector management
config: arm-sp7021_defconfig
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project b0fb98227c90adf2536c9ad644a74d5e92961111)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # https://github.com/intel-lab-lkp/linux/commit/eef6095228f3323db8f2bddd5bde768976888558
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Andy-Chiu/riscv-Rename-__switch_to_aux-fpu/20230519-005938
        git checkout eef6095228f3323db8f2bddd5bde768976888558
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305210917.aS7cWlKv-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sys.c:2718:11: error: call to undeclared function 'RISCV_V_SET_CONTROL'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
                   error = RISCV_V_SET_CONTROL(arg2);
                           ^
>> kernel/sys.c:2721:11: error: call to undeclared function 'RISCV_V_GET_CONTROL'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
                   error = RISCV_V_GET_CONTROL();
                           ^
   2 errors generated.


vim +/RISCV_V_SET_CONTROL +2718 kernel/sys.c

  2407	
  2408	SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
  2409			unsigned long, arg4, unsigned long, arg5)
  2410	{
  2411		struct task_struct *me = current;
  2412		unsigned char comm[sizeof(me->comm)];
  2413		long error;
  2414	
  2415		error = security_task_prctl(option, arg2, arg3, arg4, arg5);
  2416		if (error != -ENOSYS)
  2417			return error;
  2418	
  2419		error = 0;
  2420		switch (option) {
  2421		case PR_SET_PDEATHSIG:
  2422			if (!valid_signal(arg2)) {
  2423				error = -EINVAL;
  2424				break;
  2425			}
  2426			me->pdeath_signal = arg2;
  2427			break;
  2428		case PR_GET_PDEATHSIG:
  2429			error = put_user(me->pdeath_signal, (int __user *)arg2);
  2430			break;
  2431		case PR_GET_DUMPABLE:
  2432			error = get_dumpable(me->mm);
  2433			break;
  2434		case PR_SET_DUMPABLE:
  2435			if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
  2436				error = -EINVAL;
  2437				break;
  2438			}
  2439			set_dumpable(me->mm, arg2);
  2440			break;
  2441	
  2442		case PR_SET_UNALIGN:
  2443			error = SET_UNALIGN_CTL(me, arg2);
  2444			break;
  2445		case PR_GET_UNALIGN:
  2446			error = GET_UNALIGN_CTL(me, arg2);
  2447			break;
  2448		case PR_SET_FPEMU:
  2449			error = SET_FPEMU_CTL(me, arg2);
  2450			break;
  2451		case PR_GET_FPEMU:
  2452			error = GET_FPEMU_CTL(me, arg2);
  2453			break;
  2454		case PR_SET_FPEXC:
  2455			error = SET_FPEXC_CTL(me, arg2);
  2456			break;
  2457		case PR_GET_FPEXC:
  2458			error = GET_FPEXC_CTL(me, arg2);
  2459			break;
  2460		case PR_GET_TIMING:
  2461			error = PR_TIMING_STATISTICAL;
  2462			break;
  2463		case PR_SET_TIMING:
  2464			if (arg2 != PR_TIMING_STATISTICAL)
  2465				error = -EINVAL;
  2466			break;
  2467		case PR_SET_NAME:
  2468			comm[sizeof(me->comm) - 1] = 0;
  2469			if (strncpy_from_user(comm, (char __user *)arg2,
  2470					      sizeof(me->comm) - 1) < 0)
  2471				return -EFAULT;
  2472			set_task_comm(me, comm);
  2473			proc_comm_connector(me);
  2474			break;
  2475		case PR_GET_NAME:
  2476			get_task_comm(comm, me);
  2477			if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
  2478				return -EFAULT;
  2479			break;
  2480		case PR_GET_ENDIAN:
  2481			error = GET_ENDIAN(me, arg2);
  2482			break;
  2483		case PR_SET_ENDIAN:
  2484			error = SET_ENDIAN(me, arg2);
  2485			break;
  2486		case PR_GET_SECCOMP:
  2487			error = prctl_get_seccomp();
  2488			break;
  2489		case PR_SET_SECCOMP:
  2490			error = prctl_set_seccomp(arg2, (char __user *)arg3);
  2491			break;
  2492		case PR_GET_TSC:
  2493			error = GET_TSC_CTL(arg2);
  2494			break;
  2495		case PR_SET_TSC:
  2496			error = SET_TSC_CTL(arg2);
  2497			break;
  2498		case PR_TASK_PERF_EVENTS_DISABLE:
  2499			error = perf_event_task_disable();
  2500			break;
  2501		case PR_TASK_PERF_EVENTS_ENABLE:
  2502			error = perf_event_task_enable();
  2503			break;
  2504		case PR_GET_TIMERSLACK:
  2505			if (current->timer_slack_ns > ULONG_MAX)
  2506				error = ULONG_MAX;
  2507			else
  2508				error = current->timer_slack_ns;
  2509			break;
  2510		case PR_SET_TIMERSLACK:
  2511			if (arg2 <= 0)
  2512				current->timer_slack_ns =
  2513						current->default_timer_slack_ns;
  2514			else
  2515				current->timer_slack_ns = arg2;
  2516			break;
  2517		case PR_MCE_KILL:
  2518			if (arg4 | arg5)
  2519				return -EINVAL;
  2520			switch (arg2) {
  2521			case PR_MCE_KILL_CLEAR:
  2522				if (arg3 != 0)
  2523					return -EINVAL;
  2524				current->flags &= ~PF_MCE_PROCESS;
  2525				break;
  2526			case PR_MCE_KILL_SET:
  2527				current->flags |= PF_MCE_PROCESS;
  2528				if (arg3 == PR_MCE_KILL_EARLY)
  2529					current->flags |= PF_MCE_EARLY;
  2530				else if (arg3 == PR_MCE_KILL_LATE)
  2531					current->flags &= ~PF_MCE_EARLY;
  2532				else if (arg3 == PR_MCE_KILL_DEFAULT)
  2533					current->flags &=
  2534							~(PF_MCE_EARLY|PF_MCE_PROCESS);
  2535				else
  2536					return -EINVAL;
  2537				break;
  2538		case PR_GET_AUXV:
  2539			if (arg4 || arg5)
  2540				return -EINVAL;
  2541			error = prctl_get_auxv((void __user *)arg2, arg3);
  2542			break;
  2543			default:
  2544				return -EINVAL;
  2545			}
  2546			break;
  2547		case PR_MCE_KILL_GET:
  2548			if (arg2 | arg3 | arg4 | arg5)
  2549				return -EINVAL;
  2550			if (current->flags & PF_MCE_PROCESS)
  2551				error = (current->flags & PF_MCE_EARLY) ?
  2552					PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
  2553			else
  2554				error = PR_MCE_KILL_DEFAULT;
  2555			break;
  2556		case PR_SET_MM:
  2557			error = prctl_set_mm(arg2, arg3, arg4, arg5);
  2558			break;
  2559		case PR_GET_TID_ADDRESS:
  2560			error = prctl_get_tid_address(me, (int __user * __user *)arg2);
  2561			break;
  2562		case PR_SET_CHILD_SUBREAPER:
  2563			me->signal->is_child_subreaper = !!arg2;
  2564			if (!arg2)
  2565				break;
  2566	
  2567			walk_process_tree(me, propagate_has_child_subreaper, NULL);
  2568			break;
  2569		case PR_GET_CHILD_SUBREAPER:
  2570			error = put_user(me->signal->is_child_subreaper,
  2571					 (int __user *)arg2);
  2572			break;
  2573		case PR_SET_NO_NEW_PRIVS:
  2574			if (arg2 != 1 || arg3 || arg4 || arg5)
  2575				return -EINVAL;
  2576	
  2577			task_set_no_new_privs(current);
  2578			break;
  2579		case PR_GET_NO_NEW_PRIVS:
  2580			if (arg2 || arg3 || arg4 || arg5)
  2581				return -EINVAL;
  2582			return task_no_new_privs(current) ? 1 : 0;
  2583		case PR_GET_THP_DISABLE:
  2584			if (arg2 || arg3 || arg4 || arg5)
  2585				return -EINVAL;
  2586			error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
  2587			break;
  2588		case PR_SET_THP_DISABLE:
  2589			if (arg3 || arg4 || arg5)
  2590				return -EINVAL;
  2591			if (mmap_write_lock_killable(me->mm))
  2592				return -EINTR;
  2593			if (arg2)
  2594				set_bit(MMF_DISABLE_THP, &me->mm->flags);
  2595			else
  2596				clear_bit(MMF_DISABLE_THP, &me->mm->flags);
  2597			mmap_write_unlock(me->mm);
  2598			break;
  2599		case PR_MPX_ENABLE_MANAGEMENT:
  2600		case PR_MPX_DISABLE_MANAGEMENT:
  2601			/* No longer implemented: */
  2602			return -EINVAL;
  2603		case PR_SET_FP_MODE:
  2604			error = SET_FP_MODE(me, arg2);
  2605			break;
  2606		case PR_GET_FP_MODE:
  2607			error = GET_FP_MODE(me);
  2608			break;
  2609		case PR_SVE_SET_VL:
  2610			error = SVE_SET_VL(arg2);
  2611			break;
  2612		case PR_SVE_GET_VL:
  2613			error = SVE_GET_VL();
  2614			break;
  2615		case PR_SME_SET_VL:
  2616			error = SME_SET_VL(arg2);
  2617			break;
  2618		case PR_SME_GET_VL:
  2619			error = SME_GET_VL();
  2620			break;
  2621		case PR_GET_SPECULATION_CTRL:
  2622			if (arg3 || arg4 || arg5)
  2623				return -EINVAL;
  2624			error = arch_prctl_spec_ctrl_get(me, arg2);
  2625			break;
  2626		case PR_SET_SPECULATION_CTRL:
  2627			if (arg4 || arg5)
  2628				return -EINVAL;
  2629			error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
  2630			break;
  2631		case PR_PAC_RESET_KEYS:
  2632			if (arg3 || arg4 || arg5)
  2633				return -EINVAL;
  2634			error = PAC_RESET_KEYS(me, arg2);
  2635			break;
  2636		case PR_PAC_SET_ENABLED_KEYS:
  2637			if (arg4 || arg5)
  2638				return -EINVAL;
  2639			error = PAC_SET_ENABLED_KEYS(me, arg2, arg3);
  2640			break;
  2641		case PR_PAC_GET_ENABLED_KEYS:
  2642			if (arg2 || arg3 || arg4 || arg5)
  2643				return -EINVAL;
  2644			error = PAC_GET_ENABLED_KEYS(me);
  2645			break;
  2646		case PR_SET_TAGGED_ADDR_CTRL:
  2647			if (arg3 || arg4 || arg5)
  2648				return -EINVAL;
  2649			error = SET_TAGGED_ADDR_CTRL(arg2);
  2650			break;
  2651		case PR_GET_TAGGED_ADDR_CTRL:
  2652			if (arg2 || arg3 || arg4 || arg5)
  2653				return -EINVAL;
  2654			error = GET_TAGGED_ADDR_CTRL();
  2655			break;
  2656		case PR_SET_IO_FLUSHER:
  2657			if (!capable(CAP_SYS_RESOURCE))
  2658				return -EPERM;
  2659	
  2660			if (arg3 || arg4 || arg5)
  2661				return -EINVAL;
  2662	
  2663			if (arg2 == 1)
  2664				current->flags |= PR_IO_FLUSHER;
  2665			else if (!arg2)
  2666				current->flags &= ~PR_IO_FLUSHER;
  2667			else
  2668				return -EINVAL;
  2669			break;
  2670		case PR_GET_IO_FLUSHER:
  2671			if (!capable(CAP_SYS_RESOURCE))
  2672				return -EPERM;
  2673	
  2674			if (arg2 || arg3 || arg4 || arg5)
  2675				return -EINVAL;
  2676	
  2677			error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
  2678			break;
  2679		case PR_SET_SYSCALL_USER_DISPATCH:
  2680			error = set_syscall_user_dispatch(arg2, arg3, arg4,
  2681							  (char __user *) arg5);
  2682			break;
  2683	#ifdef CONFIG_SCHED_CORE
  2684		case PR_SCHED_CORE:
  2685			error = sched_core_share_pid(arg2, arg3, arg4, arg5);
  2686			break;
  2687	#endif
  2688		case PR_SET_MDWE:
  2689			error = prctl_set_mdwe(arg2, arg3, arg4, arg5);
  2690			break;
  2691		case PR_GET_MDWE:
  2692			error = prctl_get_mdwe(arg2, arg3, arg4, arg5);
  2693			break;
  2694		case PR_SET_VMA:
  2695			error = prctl_set_vma(arg2, arg3, arg4, arg5);
  2696			break;
  2697	#ifdef CONFIG_KSM
  2698		case PR_SET_MEMORY_MERGE:
  2699			if (arg3 || arg4 || arg5)
  2700				return -EINVAL;
  2701			if (mmap_write_lock_killable(me->mm))
  2702				return -EINTR;
  2703	
  2704			if (arg2)
  2705				error = ksm_enable_merge_any(me->mm);
  2706			else
  2707				error = ksm_disable_merge_any(me->mm);
  2708			mmap_write_unlock(me->mm);
  2709			break;
  2710		case PR_GET_MEMORY_MERGE:
  2711			if (arg2 || arg3 || arg4 || arg5)
  2712				return -EINVAL;
  2713	
  2714			error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
  2715			break;
  2716	#endif
  2717		case PR_RISCV_V_SET_CONTROL:
> 2718			error = RISCV_V_SET_CONTROL(arg2);
  2719			break;
  2720		case PR_RISCV_V_GET_CONTROL:
> 2721			error = RISCV_V_GET_CONTROL();
  2722			break;
  2723		default:
  2724			error = -EINVAL;
  2725			break;
  2726		}
  2727		return error;
  2728	}
  2729
Rémi Denis-Courmont May 21, 2023, 5:38 a.m. UTC | #3
Hi all,

Le torstaina 18. toukokuuta 2023 19.19.43 EEST, vous avez écrit :
> This patch add two riscv-specific prctls, to allow usespace control the
> use of vector unit:
> 
>  * PR_RISCV_V_SET_CONTROL: control the permission to use Vector at next,
>    or all following execve for a thread. Turning off a thread's Vector
>    live is not possible since libraries may have registered ifunc that
>    may execute Vector instructions.
>  * PR_RISCV_V_GET_CONTROL: get the same permission setting for the
>    current thread, and the setting for following execve(s).

So far the story was that if the nth bit in the ELF HWCAP auxillary vector was 
set, then the nth single lettered extension was supported. There is already 
userspace code out there that expects this of the V bit. (I know I have 
written such code, and I also know others did likewise.) This is how it 
already works for the D and F bits.

Admittedly, upstream Linux has never ever set that bit to this day. But still, 
if we end up with the bit set in a process that has had V support disabled by 
the parent (or the sysctl), existing userspace will encounter SIGILL and 
break.

IMO, the bit must be masked not only whence the kernel lacks V support (as 
PATCH 02 does), but also if the process starts with V disabled.

There are two ways to achieve this:
1) V is never ever set, and userspace is forced to use hwprobe() instead.
2) V is set only in processes starting with V enabled (and it's their own 
fault if they disabled it in future child threads).

Br,
Andy Chiu May 22, 2023, 8:28 a.m. UTC | #4
On Sun, May 21, 2023 at 1:41 PM Rémi Denis-Courmont <remi@remlab.net> wrote:
>
>         Hi all,
>
> Le torstaina 18. toukokuuta 2023 19.19.43 EEST, vous avez écrit :
> > This patch add two riscv-specific prctls, to allow usespace control the
> > use of vector unit:
> >
> >  * PR_RISCV_V_SET_CONTROL: control the permission to use Vector at next,
> >    or all following execve for a thread. Turning off a thread's Vector
> >    live is not possible since libraries may have registered ifunc that
> >    may execute Vector instructions.
> >  * PR_RISCV_V_GET_CONTROL: get the same permission setting for the
> >    current thread, and the setting for following execve(s).
>
> So far the story was that if the nth bit in the ELF HWCAP auxillary vector was
> set, then the nth single lettered extension was supported. There is already
> userspace code out there that expects this of the V bit. (I know I have
> written such code, and I also know others did likewise.) This is how it
> already works for the D and F bits.

Yes, the V bit in ELF_HWCAP becomes vague in this series.

>
> Admittedly, upstream Linux has never ever set that bit to this day. But still,
> if we end up with the bit set in a process that has had V support disabled by
> the parent (or the sysctl), existing userspace will encounter SIGILL and
> break.
>
> IMO, the bit must be masked not only whence the kernel lacks V support (as
> PATCH 02 does), but also if the process starts with V disabled.

This is going to change ELF_HWCAP from a macro to a function. The
function will turn on COMPAT_HWCAP_ISA_V iff V is supported and
allowed. I am going to do this in v21 If this looks sane. i.e.
Currently I don't see other architectures which give different
ELF_HWCAP values on each execve. If ELF_HWCAP is not a right place to
encode the information then userspace has to make the prctl() call to
be certain on whether V is usable.

>
> There are two ways to achieve this:
> 1) V is never ever set, and userspace is forced to use hwprobe() instead.
> 2) V is set only in processes starting with V enabled (and it's their own
> fault if they disabled it in future child threads).

The prctl() interface does not allow processes to turn off V once it
is enabled in its current (execve) context. The process can only
disable V when the next execve() happens. Then, if we implement
ELF_HWCAP as mentioned above, the kernel will reload a new HWCAP for
the process. By then, the new HWCAP will have V masked since it is not
allowed.

>
> Br,
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Thanks,
Andy
Björn Töpel May 23, 2023, 1:56 p.m. UTC | #5
Andy Chiu <andy.chiu@sifive.com> writes:

> This patch add two riscv-specific prctls, to allow usespace control the
> use of vector unit:
>
>  * PR_RISCV_V_SET_CONTROL: control the permission to use Vector at next,
>    or all following execve for a thread. Turning off a thread's Vector
>    live is not possible since libraries may have registered ifunc that
>    may execute Vector instructions.
>  * PR_RISCV_V_GET_CONTROL: get the same permission setting for the
>    current thread, and the setting for following execve(s).
>
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> Reviewed-by: Greentime Hu <greentime.hu@sifive.com>
> Reviewed-by: Vincent Chen <vincent.chen@sifive.com>
> ---
> Changelog v20:
>  - address build issue when KVM is compile as a module (Heiko)
>  - s/RISCV_V_DISABLE/RISCV_ISA_V_DEFAULT_ENABLE/ (Conor)
>  - change function names to have better scoping
>  - check has_vector() before accessing vstate_ctrl
>  - use proper return type for prctl calls (long instead of uint)
> ---
>  arch/riscv/include/asm/processor.h |  13 ++++
>  arch/riscv/include/asm/vector.h    |   4 +
>  arch/riscv/kernel/process.c        |   1 +
>  arch/riscv/kernel/vector.c         | 118 +++++++++++++++++++++++++++++
>  arch/riscv/kvm/vcpu.c              |   2 +
>  include/uapi/linux/prctl.h         |  11 +++
>  kernel/sys.c                       |  12 +++
>  7 files changed, 161 insertions(+)
>
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index 38ded8c5f207..17829c3003c8 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -40,6 +40,7 @@ struct thread_struct {
>  	unsigned long s[12];	/* s[0]: frame pointer */
>  	struct __riscv_d_ext_state fstate;
>  	unsigned long bad_cause;
> +	unsigned long vstate_ctrl;
>  	struct __riscv_v_ext_state vstate;
>  };
>  
> @@ -83,6 +84,18 @@ extern void riscv_fill_hwcap(void);
>  extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
>  
>  extern unsigned long signal_minsigstksz __ro_after_init;
> +
> +#ifdef CONFIG_RISCV_ISA_V
> +/* Userspace interface for PR_RISCV_V_{SET,GET}_VS prctl()s: */
> +#define RISCV_V_SET_CONTROL(arg)	riscv_v_vstate_ctrl_set_current(arg)
> +#define RISCV_V_GET_CONTROL()		riscv_v_vstate_ctrl_get_current()
> +extern long riscv_v_vstate_ctrl_set_current(unsigned long arg);
> +extern long riscv_v_vstate_ctrl_get_current(void);
> +#else /* !CONFIG_RISCV_ISA_V */
> +#define RISCV_V_SET_CONTROL(arg)	(-EINVAL)
> +#define RISCV_V_GET_CONTROL()		(-EINVAL)

This version doesn't fix the issue I pointed out in [1]. Let me try to
be more explicit.

RISCV_V_GET_CONTROL and RISCV_V_SET_CONTROL are a function (if
CONFIG_RISCV_ISA_V is defined), otherwise (-EINVAL). However, they are
redefined below, so you can remove the whole #else to #endif... 

[...]

> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index f23d9a16507f..3c36aeade991 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -294,4 +294,15 @@ struct prctl_mm_map {
>  
>  #define PR_SET_MEMORY_MERGE		67
>  #define PR_GET_MEMORY_MERGE		68
> +
> +#define PR_RISCV_V_SET_CONTROL		69
> +#define PR_RISCV_V_GET_CONTROL		70
> +# define PR_RISCV_V_VSTATE_CTRL_DEFAULT		0
> +# define PR_RISCV_V_VSTATE_CTRL_OFF		1
> +# define PR_RISCV_V_VSTATE_CTRL_ON		2
> +# define PR_RISCV_V_VSTATE_CTRL_INHERIT		(1 << 4)
> +# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK	0x3
> +# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK	0xc
> +# define PR_RISCV_V_VSTATE_CTRL_MASK		0x1f
> +
>  #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 339fee3eff6a..d0d3106698a1 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -140,6 +140,12 @@
>  #ifndef GET_TAGGED_ADDR_CTRL
>  # define GET_TAGGED_ADDR_CTRL()		(-EINVAL)
>  #endif
> +#ifndef PR_RISCV_V_SET_CONTROL
> +# define RISCV_V_SET_CONTROL(a)		(-EINVAL)
> +#endif
> +#ifndef PR_RISCV_V_GET_CONTROL
> +# define RISCV_V_GET_CONTROL()		(-EINVAL)
> +#endif

...because they are defined to EINVAL here. Or at least they are
supposed to. Now, the 2nd issue was that #ifndef PR_RISCV_V_SET_CONTROL
should be #ifndef RISCV_V_SET_CONTROL (and dito for GET).

PR_RISCV_V_SET_CONTROL is *always* defined in the uapi header above.

So, change to:

  | #ifndef RISCV_V_SET_CONTROL
  | # define RISCV_V_SET_CONTROL(a)		(-EINVAL)
  | #endif
  | #ifndef RISCV_V_GET_CONTROL
  | # define RISCV_V_GET_CONTROL()		(-EINVAL)
  | #endif

and remove the #else above.

>  
>  /*
>   * this is where the system-wide overflow UID and GID are defined, for
> @@ -2708,6 +2714,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>  		error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
>  		break;
>  #endif
> +	case PR_RISCV_V_SET_CONTROL:
> +		error = RISCV_V_SET_CONTROL(arg2);
> +		break;
> +	case PR_RISCV_V_GET_CONTROL:
> +		error = RISCV_V_GET_CONTROL();

PR_RISCV_V_{GET,SET}_CONTROL is always set!


Björn

[1] https://lore.kernel.org/linux-riscv/87ttwdhljn.fsf@all.your.base.are.belong.to.us/
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 38ded8c5f207..17829c3003c8 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -40,6 +40,7 @@  struct thread_struct {
 	unsigned long s[12];	/* s[0]: frame pointer */
 	struct __riscv_d_ext_state fstate;
 	unsigned long bad_cause;
+	unsigned long vstate_ctrl;
 	struct __riscv_v_ext_state vstate;
 };
 
@@ -83,6 +84,18 @@  extern void riscv_fill_hwcap(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 
 extern unsigned long signal_minsigstksz __ro_after_init;
+
+#ifdef CONFIG_RISCV_ISA_V
+/* Userspace interface for PR_RISCV_V_{SET,GET}_VS prctl()s: */
+#define RISCV_V_SET_CONTROL(arg)	riscv_v_vstate_ctrl_set_current(arg)
+#define RISCV_V_GET_CONTROL()		riscv_v_vstate_ctrl_get_current()
+extern long riscv_v_vstate_ctrl_set_current(unsigned long arg);
+extern long riscv_v_vstate_ctrl_get_current(void);
+#else /* !CONFIG_RISCV_ISA_V */
+#define RISCV_V_SET_CONTROL(arg)	(-EINVAL)
+#define RISCV_V_GET_CONTROL()		(-EINVAL)
+#endif /* CONFIG_RISCV_ISA_V */
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PROCESSOR_H */
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 8e56da67b5cf..04c0b07bf6cd 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -160,6 +160,9 @@  static inline void __switch_to_vector(struct task_struct *prev,
 	riscv_v_vstate_restore(next, task_pt_regs(next));
 }
 
+void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
+bool riscv_v_vstate_ctrl_user_allowed(void);
+
 #else /* ! CONFIG_RISCV_ISA_V  */
 
 struct pt_regs;
@@ -168,6 +171,7 @@  static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
 static __always_inline bool has_vector(void) { return false; }
 static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
+static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 #define riscv_v_vsize (0)
 #define riscv_v_vstate_save(task, regs)		do {} while (0)
 #define riscv_v_vstate_restore(task, regs)	do {} while (0)
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index b7a10361ddc6..60278233926c 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -149,6 +149,7 @@  void flush_thread(void)
 #endif
 #ifdef CONFIG_RISCV_ISA_V
 	/* Reset vector state */
+	riscv_v_vstate_ctrl_init(current);
 	riscv_v_vstate_off(task_pt_regs(current));
 	kfree(current->thread.vstate.datap);
 	memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 0080798e8d2e..9bee7a201106 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -9,6 +9,7 @@ 
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
+#include <linux/prctl.h>
 
 #include <asm/thread_info.h>
 #include <asm/processor.h>
@@ -19,6 +20,8 @@ 
 #include <asm/ptrace.h>
 #include <asm/bug.h>
 
+static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
+
 unsigned long riscv_v_vsize __read_mostly;
 EXPORT_SYMBOL_GPL(riscv_v_vsize);
 
@@ -91,6 +94,43 @@  static int riscv_v_thread_zalloc(void)
 	return 0;
 }
 
+#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
+#define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
+#define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
+#define VSTATE_CTRL_GET_INHERIT(x) (!!((x) & PR_RISCV_V_VSTATE_CTRL_INHERIT))
+static inline int riscv_v_ctrl_get_cur(struct task_struct *tsk)
+{
+	return VSTATE_CTRL_GET_CUR(tsk->thread.vstate_ctrl);
+}
+
+static inline int riscv_v_ctrl_get_next(struct task_struct *tsk)
+{
+	return VSTATE_CTRL_GET_NEXT(tsk->thread.vstate_ctrl);
+}
+
+static inline bool riscv_v_ctrl_test_inherit(struct task_struct *tsk)
+{
+	return VSTATE_CTRL_GET_INHERIT(tsk->thread.vstate_ctrl);
+}
+
+static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
+				    bool inherit)
+{
+	unsigned long ctrl;
+
+	ctrl = cur & PR_RISCV_V_VSTATE_CTRL_CUR_MASK;
+	ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
+	if (inherit)
+		ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+	tsk->thread.vstate_ctrl = ctrl;
+}
+
+bool riscv_v_vstate_ctrl_user_allowed(void)
+{
+	return riscv_v_ctrl_get_cur(current) == PR_RISCV_V_VSTATE_CTRL_ON;
+}
+EXPORT_SYMBOL_GPL(riscv_v_vstate_ctrl_user_allowed);
+
 bool riscv_v_first_use_handler(struct pt_regs *regs)
 {
 	u32 __user *epc = (u32 __user *)regs->epc;
@@ -100,6 +140,10 @@  bool riscv_v_first_use_handler(struct pt_regs *regs)
 	if (!has_vector() || !(elf_hwcap & COMPAT_HWCAP_ISA_V))
 		return false;
 
+	/* Do not handle the trap if V is not allowed for this process*/
+	if (!riscv_v_vstate_ctrl_user_allowed())
+		return false;
+
 	/* If V has been enabled then it is not the first-use trap */
 	if (riscv_v_vstate_query(regs))
 		return false;
@@ -129,3 +173,77 @@  bool riscv_v_first_use_handler(struct pt_regs *regs)
 	riscv_v_vstate_on(regs);
 	return true;
 }
+
+void riscv_v_vstate_ctrl_init(struct task_struct *tsk)
+{
+	bool inherit;
+	int cur, next;
+
+	if (!has_vector())
+		return;
+
+	next = riscv_v_ctrl_get_next(tsk);
+	if (!next) {
+		if (riscv_v_implicit_uacc)
+			cur = PR_RISCV_V_VSTATE_CTRL_ON;
+		else
+			cur = PR_RISCV_V_VSTATE_CTRL_OFF;
+	} else {
+		cur = next;
+	}
+	/* Clear next mask if inherit-bit is not set */
+	inherit = riscv_v_ctrl_test_inherit(tsk);
+	if (!inherit)
+		next = PR_RISCV_V_VSTATE_CTRL_DEFAULT;
+
+	riscv_v_ctrl_set(tsk, cur, next, inherit);
+}
+
+long riscv_v_vstate_ctrl_get_current(void)
+{
+	if (!has_vector())
+		return -EINVAL;
+
+	return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK;
+}
+
+long riscv_v_vstate_ctrl_set_current(unsigned long arg)
+{
+	bool inherit;
+	int cur, next;
+
+	if (!has_vector())
+		return -EINVAL;
+
+	if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK)
+		return -EINVAL;
+
+	cur = VSTATE_CTRL_GET_CUR(arg);
+	switch (cur) {
+	case PR_RISCV_V_VSTATE_CTRL_OFF:
+		/* Do not allow user to turn off V if current is not off */
+		if (riscv_v_ctrl_get_cur(current) != PR_RISCV_V_VSTATE_CTRL_OFF)
+			return -EPERM;
+
+		break;
+	case PR_RISCV_V_VSTATE_CTRL_ON:
+		break;
+	case PR_RISCV_V_VSTATE_CTRL_DEFAULT:
+		cur = riscv_v_ctrl_get_cur(current);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	next = VSTATE_CTRL_GET_NEXT(arg);
+	inherit = VSTATE_CTRL_GET_INHERIT(arg);
+	switch (next) {
+	case PR_RISCV_V_VSTATE_CTRL_DEFAULT:
+	case PR_RISCV_V_VSTATE_CTRL_OFF:
+	case PR_RISCV_V_VSTATE_CTRL_ON:
+		riscv_v_ctrl_set(current, cur, next, inherit);
+		return 0;
+	}
+
+	return -EINVAL;
+}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e5e045852e6a..de24127e7e93 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -88,6 +88,8 @@  static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
 	switch (ext) {
 	case KVM_RISCV_ISA_EXT_H:
 		return false;
+	case KVM_RISCV_ISA_EXT_V:
+		return riscv_v_vstate_ctrl_user_allowed();
 	default:
 		break;
 	}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index f23d9a16507f..3c36aeade991 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -294,4 +294,15 @@  struct prctl_mm_map {
 
 #define PR_SET_MEMORY_MERGE		67
 #define PR_GET_MEMORY_MERGE		68
+
+#define PR_RISCV_V_SET_CONTROL		69
+#define PR_RISCV_V_GET_CONTROL		70
+# define PR_RISCV_V_VSTATE_CTRL_DEFAULT		0
+# define PR_RISCV_V_VSTATE_CTRL_OFF		1
+# define PR_RISCV_V_VSTATE_CTRL_ON		2
+# define PR_RISCV_V_VSTATE_CTRL_INHERIT		(1 << 4)
+# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK	0x3
+# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK	0xc
+# define PR_RISCV_V_VSTATE_CTRL_MASK		0x1f
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 339fee3eff6a..d0d3106698a1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -140,6 +140,12 @@ 
 #ifndef GET_TAGGED_ADDR_CTRL
 # define GET_TAGGED_ADDR_CTRL()		(-EINVAL)
 #endif
+#ifndef PR_RISCV_V_SET_CONTROL
+# define RISCV_V_SET_CONTROL(a)		(-EINVAL)
+#endif
+#ifndef PR_RISCV_V_GET_CONTROL
+# define RISCV_V_GET_CONTROL()		(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2708,6 +2714,12 @@  SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
 		break;
 #endif
+	case PR_RISCV_V_SET_CONTROL:
+		error = RISCV_V_SET_CONTROL(arg2);
+		break;
+	case PR_RISCV_V_GET_CONTROL:
+		error = RISCV_V_GET_CONTROL();
+		break;
 	default:
 		error = -EINVAL;
 		break;