Message ID | 20230127044500.680329-2-stevensd@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: replace kvm_vcpu_map usage in vmx | expand |
Hi David, Thank you for the patch! Yet something to improve: [auto build test ERROR on kvm/queue] [also build test ERROR on linus/master v6.2-rc5 next-20230127] [cannot apply to kvm/linux-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/David-Stevens/KVM-Support-sharing-gpc-locks/20230128-161425 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20230127044500.680329-2-stevensd%40google.com patch subject: [PATCH 1/3] KVM: Support sharing gpc locks config: i386-randconfig-a003 (https://download.01.org/0day-ci/archive/20230129/202301290541.8nmbLtFC-lkp@intel.com/config) compiler: gcc-11 (Debian 11.3.0-8) 11.3.0 reproduce (this is a W=1 build): # https://github.com/intel-lab-lkp/linux/commit/14d62b317199184bb71256cc5f652b04fb2f9107 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review David-Stevens/KVM-Support-sharing-gpc-locks/20230128-161425 git checkout 14d62b317199184bb71256cc5f652b04fb2f9107 # save the config file mkdir build_dir && cp config build_dir/.config make W=1 O=build_dir ARCH=i386 olddefconfig make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash arch/x86/ drivers/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): arch/x86/kvm/xen.c: In function 'kvm_xen_update_runstate_guest': >> arch/x86/kvm/xen.c:319:45: error: 'gpc1->lock' is a pointer; did you mean to use '->'? 319 | lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); | ^ | -> vim +319 arch/x86/kvm/xen.c 172 173 static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) 174 { 175 struct kvm_vcpu_xen *vx = &v->arch.xen; 176 struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; 177 struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; 178 size_t user_len, user_len1, user_len2; 179 struct vcpu_runstate_info rs; 180 unsigned long flags; 181 size_t times_ofs; 182 uint8_t *update_bit = NULL; 183 uint64_t entry_time; 184 uint64_t *rs_times; 185 int *rs_state; 186 187 /* 188 * The only difference between 32-bit and 64-bit versions of the 189 * runstate struct is the alignment of uint64_t in 32-bit, which 190 * means that the 64-bit version has an additional 4 bytes of 191 * padding after the first field 'state'. Let's be really really 192 * paranoid about that, and matching it with our internal data 193 * structures that we memcpy into it... 194 */ 195 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); 196 BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); 197 BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); 198 #ifdef CONFIG_X86_64 199 /* 200 * The 64-bit structure has 4 bytes of padding before 'state_entry_time' 201 * so each subsequent field is shifted by 4, and it's 4 bytes longer. 202 */ 203 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 204 offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); 205 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != 206 offsetof(struct compat_vcpu_runstate_info, time) + 4); 207 BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); 208 #endif 209 /* 210 * The state field is in the same place at the start of both structs, 211 * and is the same size (int) as vx->current_runstate. 212 */ 213 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 214 offsetof(struct compat_vcpu_runstate_info, state)); 215 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != 216 sizeof(vx->current_runstate)); 217 BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != 218 sizeof(vx->current_runstate)); 219 220 /* 221 * The state_entry_time field is 64 bits in both versions, and the 222 * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 223 * is little-endian means that it's in the last *byte* of the word. 224 * That detail is important later. 225 */ 226 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != 227 sizeof(uint64_t)); 228 BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != 229 sizeof(uint64_t)); 230 BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); 231 232 /* 233 * The time array is four 64-bit quantities in both versions, matching 234 * the vx->runstate_times and immediately following state_entry_time. 235 */ 236 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 237 offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); 238 BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != 239 offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); 240 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != 241 sizeof_field(struct compat_vcpu_runstate_info, time)); 242 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != 243 sizeof(vx->runstate_times)); 244 245 if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { 246 user_len = sizeof(struct vcpu_runstate_info); 247 times_ofs = offsetof(struct vcpu_runstate_info, 248 state_entry_time); 249 } else { 250 user_len = sizeof(struct compat_vcpu_runstate_info); 251 times_ofs = offsetof(struct compat_vcpu_runstate_info, 252 state_entry_time); 253 } 254 255 /* 256 * There are basically no alignment constraints. The guest can set it 257 * up so it crosses from one page to the next, and at arbitrary byte 258 * alignment (and the 32-bit ABI doesn't align the 64-bit integers 259 * anyway, even if the overall struct had been 64-bit aligned). 260 */ 261 if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { 262 user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); 263 user_len2 = user_len - user_len1; 264 } else { 265 user_len1 = user_len; 266 user_len2 = 0; 267 } 268 BUG_ON(user_len1 + user_len2 != user_len); 269 270 retry: 271 /* 272 * Attempt to obtain the GPC lock on *both* (if there are two) 273 * gfn_to_pfn caches that cover the region. 274 */ 275 if (atomic) { 276 local_irq_save(flags); 277 if (!read_trylock(gpc1->lock)) { 278 local_irq_restore(flags); 279 return; 280 } 281 } else { 282 read_lock_irqsave(gpc1->lock, flags); 283 } 284 while (!kvm_gpc_check(gpc1, user_len1)) { 285 read_unlock_irqrestore(gpc1->lock, flags); 286 287 /* When invoked from kvm_sched_out() we cannot sleep */ 288 if (atomic) 289 return; 290 291 if (kvm_gpc_refresh(gpc1, user_len1)) 292 return; 293 294 read_lock_irqsave(gpc1->lock, flags); 295 } 296 297 if (likely(!user_len2)) { 298 /* 299 * Set up three pointers directly to the runstate_info 300 * struct in the guest (via the GPC). 301 * 302 * • @rs_state → state field 303 * • @rs_times → state_entry_time field. 304 * • @update_bit → last byte of state_entry_time, which 305 * contains the XEN_RUNSTATE_UPDATE bit. 306 */ 307 rs_state = gpc1->khva; 308 rs_times = gpc1->khva + times_ofs; 309 if (v->kvm->arch.xen.runstate_update_flag) 310 update_bit = ((void *)(&rs_times[1])) - 1; 311 } else { 312 /* 313 * The guest's runstate_info is split across two pages and we 314 * need to hold and validate both GPCs simultaneously. We can 315 * declare a lock ordering GPC1 > GPC2 because nothing else 316 * takes them more than one at a time. Set a subclass on the 317 * gpc1 lock to make lockdep shut up about it. 318 */ > 319 lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); 320 if (atomic) { 321 if (!read_trylock(gpc2->lock)) { 322 read_unlock_irqrestore(gpc1->lock, flags); 323 return; 324 } 325 } else { 326 read_lock(gpc2->lock); 327 } 328 329 if (!kvm_gpc_check(gpc2, user_len2)) { 330 read_unlock(gpc2->lock); 331 read_unlock_irqrestore(gpc1->lock, flags); 332 333 /* When invoked from kvm_sched_out() we cannot sleep */ 334 if (atomic) 335 return; 336 337 /* 338 * Use kvm_gpc_activate() here because if the runstate 339 * area was configured in 32-bit mode and only extends 340 * to the second page now because the guest changed to 341 * 64-bit mode, the second GPC won't have been set up. 342 */ 343 if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, 344 user_len2)) 345 return; 346 347 /* 348 * We dropped the lock on GPC1 so we have to go all the 349 * way back and revalidate that too. 350 */ 351 goto retry; 352 } 353 354 /* 355 * In this case, the runstate_info struct will be assembled on 356 * the kernel stack (compat or not as appropriate) and will 357 * be copied to GPC1/GPC2 with a dual memcpy. Set up the three 358 * rs pointers accordingly. 359 */ 360 rs_times = &rs.state_entry_time; 361 362 /* 363 * The rs_state pointer points to the start of what we'll 364 * copy to the guest, which in the case of a compat guest 365 * is the 32-bit field that the compiler thinks is padding. 366 */ 367 rs_state = ((void *)rs_times) - times_ofs; 368 369 /* 370 * The update_bit is still directly in the guest memory, 371 * via one GPC or the other. 372 */ 373 if (v->kvm->arch.xen.runstate_update_flag) { 374 if (user_len1 >= times_ofs + sizeof(uint64_t)) 375 update_bit = gpc1->khva + times_ofs + 376 sizeof(uint64_t) - 1; 377 else 378 update_bit = gpc2->khva + times_ofs + 379 sizeof(uint64_t) - 1 - user_len1; 380 } 381
Hi David, Thank you for the patch! Yet something to improve: [auto build test ERROR on kvm/queue] [also build test ERROR on linus/master v6.2-rc5 next-20230127] [cannot apply to kvm/linux-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/David-Stevens/KVM-Support-sharing-gpc-locks/20230128-161425 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20230127044500.680329-2-stevensd%40google.com patch subject: [PATCH 1/3] KVM: Support sharing gpc locks config: x86_64-randconfig-a014 (https://download.01.org/0day-ci/archive/20230129/202301290827.aRAvXuk9-lkp@intel.com/config) compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/14d62b317199184bb71256cc5f652b04fb2f9107 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review David-Stevens/KVM-Support-sharing-gpc-locks/20230128-161425 git checkout 14d62b317199184bb71256cc5f652b04fb2f9107 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> arch/x86/kvm/xen.c:319:31: error: member reference type 'rwlock_t *' is a pointer; did you mean to use '->'? lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); ~~~~~~~~~~^ -> >> arch/x86/kvm/xen.c:319:21: error: passing 'struct lockdep_map' to parameter of incompatible type 'struct lockdep_map *'; take the address with & lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); ^~~~~~~~~~~~~~~~~~ & include/linux/lockdep.h:296:58: note: passing argument to parameter 'lock' here static inline void lock_set_subclass(struct lockdep_map *lock, ^ 2 errors generated. vim +319 arch/x86/kvm/xen.c 172 173 static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) 174 { 175 struct kvm_vcpu_xen *vx = &v->arch.xen; 176 struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; 177 struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; 178 size_t user_len, user_len1, user_len2; 179 struct vcpu_runstate_info rs; 180 unsigned long flags; 181 size_t times_ofs; 182 uint8_t *update_bit = NULL; 183 uint64_t entry_time; 184 uint64_t *rs_times; 185 int *rs_state; 186 187 /* 188 * The only difference between 32-bit and 64-bit versions of the 189 * runstate struct is the alignment of uint64_t in 32-bit, which 190 * means that the 64-bit version has an additional 4 bytes of 191 * padding after the first field 'state'. Let's be really really 192 * paranoid about that, and matching it with our internal data 193 * structures that we memcpy into it... 194 */ 195 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); 196 BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); 197 BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); 198 #ifdef CONFIG_X86_64 199 /* 200 * The 64-bit structure has 4 bytes of padding before 'state_entry_time' 201 * so each subsequent field is shifted by 4, and it's 4 bytes longer. 202 */ 203 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 204 offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); 205 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != 206 offsetof(struct compat_vcpu_runstate_info, time) + 4); 207 BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); 208 #endif 209 /* 210 * The state field is in the same place at the start of both structs, 211 * and is the same size (int) as vx->current_runstate. 212 */ 213 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 214 offsetof(struct compat_vcpu_runstate_info, state)); 215 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != 216 sizeof(vx->current_runstate)); 217 BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != 218 sizeof(vx->current_runstate)); 219 220 /* 221 * The state_entry_time field is 64 bits in both versions, and the 222 * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 223 * is little-endian means that it's in the last *byte* of the word. 224 * That detail is important later. 225 */ 226 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != 227 sizeof(uint64_t)); 228 BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != 229 sizeof(uint64_t)); 230 BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); 231 232 /* 233 * The time array is four 64-bit quantities in both versions, matching 234 * the vx->runstate_times and immediately following state_entry_time. 235 */ 236 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 237 offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); 238 BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != 239 offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); 240 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != 241 sizeof_field(struct compat_vcpu_runstate_info, time)); 242 BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != 243 sizeof(vx->runstate_times)); 244 245 if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { 246 user_len = sizeof(struct vcpu_runstate_info); 247 times_ofs = offsetof(struct vcpu_runstate_info, 248 state_entry_time); 249 } else { 250 user_len = sizeof(struct compat_vcpu_runstate_info); 251 times_ofs = offsetof(struct compat_vcpu_runstate_info, 252 state_entry_time); 253 } 254 255 /* 256 * There are basically no alignment constraints. The guest can set it 257 * up so it crosses from one page to the next, and at arbitrary byte 258 * alignment (and the 32-bit ABI doesn't align the 64-bit integers 259 * anyway, even if the overall struct had been 64-bit aligned). 260 */ 261 if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { 262 user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); 263 user_len2 = user_len - user_len1; 264 } else { 265 user_len1 = user_len; 266 user_len2 = 0; 267 } 268 BUG_ON(user_len1 + user_len2 != user_len); 269 270 retry: 271 /* 272 * Attempt to obtain the GPC lock on *both* (if there are two) 273 * gfn_to_pfn caches that cover the region. 274 */ 275 if (atomic) { 276 local_irq_save(flags); 277 if (!read_trylock(gpc1->lock)) { 278 local_irq_restore(flags); 279 return; 280 } 281 } else { 282 read_lock_irqsave(gpc1->lock, flags); 283 } 284 while (!kvm_gpc_check(gpc1, user_len1)) { 285 read_unlock_irqrestore(gpc1->lock, flags); 286 287 /* When invoked from kvm_sched_out() we cannot sleep */ 288 if (atomic) 289 return; 290 291 if (kvm_gpc_refresh(gpc1, user_len1)) 292 return; 293 294 read_lock_irqsave(gpc1->lock, flags); 295 } 296 297 if (likely(!user_len2)) { 298 /* 299 * Set up three pointers directly to the runstate_info 300 * struct in the guest (via the GPC). 301 * 302 * • @rs_state → state field 303 * • @rs_times → state_entry_time field. 304 * • @update_bit → last byte of state_entry_time, which 305 * contains the XEN_RUNSTATE_UPDATE bit. 306 */ 307 rs_state = gpc1->khva; 308 rs_times = gpc1->khva + times_ofs; 309 if (v->kvm->arch.xen.runstate_update_flag) 310 update_bit = ((void *)(&rs_times[1])) - 1; 311 } else { 312 /* 313 * The guest's runstate_info is split across two pages and we 314 * need to hold and validate both GPCs simultaneously. We can 315 * declare a lock ordering GPC1 > GPC2 because nothing else 316 * takes them more than one at a time. Set a subclass on the 317 * gpc1 lock to make lockdep shut up about it. 318 */ > 319 lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); 320 if (atomic) { 321 if (!read_trylock(gpc2->lock)) { 322 read_unlock_irqrestore(gpc1->lock, flags); 323 return; 324 } 325 } else { 326 read_lock(gpc2->lock); 327 } 328 329 if (!kvm_gpc_check(gpc2, user_len2)) { 330 read_unlock(gpc2->lock); 331 read_unlock_irqrestore(gpc1->lock, flags); 332 333 /* When invoked from kvm_sched_out() we cannot sleep */ 334 if (atomic) 335 return; 336 337 /* 338 * Use kvm_gpc_activate() here because if the runstate 339 * area was configured in 32-bit mode and only extends 340 * to the second page now because the guest changed to 341 * 64-bit mode, the second GPC won't have been set up. 342 */ 343 if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, 344 user_len2)) 345 return; 346 347 /* 348 * We dropped the lock on GPC1 so we have to go all the 349 * way back and revalidate that too. 350 */ 351 goto retry; 352 } 353 354 /* 355 * In this case, the runstate_info struct will be assembled on 356 * the kernel stack (compat or not as appropriate) and will 357 * be copied to GPC1/GPC2 with a dual memcpy. Set up the three 358 * rs pointers accordingly. 359 */ 360 rs_times = &rs.state_entry_time; 361 362 /* 363 * The rs_state pointer points to the start of what we'll 364 * copy to the guest, which in the case of a compat guest 365 * is the 32-bit field that the compiler thinks is padding. 366 */ 367 rs_state = ((void *)rs_times) - times_ofs; 368 369 /* 370 * The update_bit is still directly in the guest memory, 371 * via one GPC or the other. 372 */ 373 if (v->kvm->arch.xen.runstate_update_flag) { 374 if (user_len1 >= times_ofs + sizeof(uint64_t)) 375 update_bit = gpc1->khva + times_ofs + 376 sizeof(uint64_t) - 1; 377 else 378 update_bit = gpc2->khva + times_ofs + 379 sizeof(uint64_t) - 1 - user_len1; 380 } 381
(Resending as the corporate email system has taken to adding redundant information in transit as an HTML part — to a message that didn't need it, which already had it in the Organization: header anyway, and was previously plain text only.) On Fri, 2023-01-27 at 13:44 +0900, David Stevens wrote: > @@ -316,19 +316,19 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) > * takes them more than one at a time. Set a subclass on the > * gpc1 lock to make lockdep shut up about it. > */ > - lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); > + lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); The test robot already pointed out this should be gpc1->lock->dep_map, but if we do what I said below in the first attempt at this email, this all goes away anyway. > if (atomic) { > - if (!read_trylock(&gpc2->lock)) { > - read_unlock_irqrestore(&gpc1->lock, flags); > + if (!read_trylock(gpc2->lock)) { > + read_unlock_irqrestore(gpc1->lock, flags); > return; > } LGTM without having had time to go through it in detail. As Sean said, we were waiting for someone (perhaps even ourselves) to do this. Thanks. For the runstate one above, this is crying out for gpc1 and gpc2 to *share* a lock, and ditch the relocking and the whole comment about being able to declare a lock ordering between the two, which is half shown in the context cited above. Probably worth doing that in a separate patch on top of this; I'll take a look at it when my feet next hit the ground on Tuesday if you haven't already done so. It's easy to test with the xen_shinfo_test self-test. Or with this if you want more fun: https://lore.kernel.org/qemu-devel/20230128081113.1615111-1-dwmw2@infradead.org/ :)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 508074e47bc0..ec0de9bc2eae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3047,14 +3047,14 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, struct pvclock_vcpu_time_info *guest_hv_clock; unsigned long flags; - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) { - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock))) return; - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); } guest_hv_clock = (void *)(gpc->khva + offset); @@ -3083,7 +3083,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, guest_hv_clock->version = ++vcpu->hv_clock.version; mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); } diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2681e2007e39..fa8ab23271d3 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -59,12 +59,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm); /* It could be invalid again already, so we need to check */ - read_lock_irq(&gpc->lock); + read_lock_irq(gpc->lock); if (gpc->valid) break; - read_unlock_irq(&gpc->lock); + read_unlock_irq(gpc->lock); } while (1); /* Paranoia checks on the 32-bit struct layout */ @@ -101,7 +101,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) smp_wmb(); wc->version = wc_version + 1; - read_unlock_irq(&gpc->lock); + read_unlock_irq(gpc->lock); kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); @@ -274,15 +274,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) */ if (atomic) { local_irq_save(flags); - if (!read_trylock(&gpc1->lock)) { + if (!read_trylock(gpc1->lock)) { local_irq_restore(flags); return; } } else { - read_lock_irqsave(&gpc1->lock, flags); + read_lock_irqsave(gpc1->lock, flags); } while (!kvm_gpc_check(gpc1, user_len1)) { - read_unlock_irqrestore(&gpc1->lock, flags); + read_unlock_irqrestore(gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) @@ -291,7 +291,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) if (kvm_gpc_refresh(gpc1, user_len1)) return; - read_lock_irqsave(&gpc1->lock, flags); + read_lock_irqsave(gpc1->lock, flags); } if (likely(!user_len2)) { @@ -316,19 +316,19 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * takes them more than one at a time. Set a subclass on the * gpc1 lock to make lockdep shut up about it. */ - lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); + lock_set_subclass(gpc1->lock.dep_map, 1, _THIS_IP_); if (atomic) { - if (!read_trylock(&gpc2->lock)) { - read_unlock_irqrestore(&gpc1->lock, flags); + if (!read_trylock(gpc2->lock)) { + read_unlock_irqrestore(gpc1->lock, flags); return; } } else { - read_lock(&gpc2->lock); + read_lock(gpc2->lock); } if (!kvm_gpc_check(gpc2, user_len2)) { - read_unlock(&gpc2->lock); - read_unlock_irqrestore(&gpc1->lock, flags); + read_unlock(gpc2->lock); + read_unlock_irqrestore(gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) @@ -428,9 +428,9 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) } if (user_len2) - read_unlock(&gpc2->lock); + read_unlock(gpc2->lock); - read_unlock_irqrestore(&gpc1->lock, flags); + read_unlock_irqrestore(gpc1->lock, flags); mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); if (user_len2) @@ -505,14 +505,14 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) * does anyway. Page it in and retry the instruction. We're just a * little more honest about it. */ - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) return; - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); } /* Now gpc->khva is a valid kernel address for the vcpu_info */ @@ -540,7 +540,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) : "0" (evtchn_pending_sel32)); WRITE_ONCE(vi->evtchn_upcall_pending, 1); } - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); /* For the per-vCPU lapic vector, deliver it as MSI. */ if (v->arch.xen.upcall_vector) @@ -568,9 +568,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) BUILD_BUG_ON(sizeof(rc) != sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); /* * This function gets called from kvm_vcpu_block() after setting the @@ -590,11 +590,11 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) */ return 0; } - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); } rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending; - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); return rc; } @@ -1172,7 +1172,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, int idx, i; idx = srcu_read_lock(&kvm->srcu); - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; @@ -1193,7 +1193,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, } out_rcu: - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); return ret; @@ -1576,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; @@ -1607,10 +1607,10 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) } else { rc = 1; /* Delivered to the bitmap in shared_info. */ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); gpc = &vcpu->arch.xen.vcpu_info_cache; - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(gpc->lock, flags); if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel @@ -1644,7 +1644,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) } out_rcu: - read_unlock_irqrestore(&gpc->lock, flags); + read_unlock_irqrestore(gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); if (kick_vcpu) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 109b18e2789c..7d1f9c6561e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1279,6 +1279,18 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); +/** + * kvm_gpc_init_with_lock - initialize gfn_to_pfn_cache with an external lock. + * + * @lock: an initialized rwlock + * + * See kvm_gpc_init. Allows multiple gfn_to_pfn_cache structs to share the + * same lock. + */ +void kvm_gpc_init_with_lock(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, + rwlock_t *lock); + /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest * physical address. diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 76de36e56cdf..b6432c8cc19c 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -70,7 +70,8 @@ struct gfn_to_pfn_cache { struct kvm *kvm; struct kvm_vcpu *vcpu; struct list_head list; - rwlock_t lock; + rwlock_t *lock; + rwlock_t _lock; struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 2d6aba677830..2c6a2edaca9f 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -31,7 +31,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, spin_lock(&kvm->gpc_lock); list_for_each_entry(gpc, &kvm->gpc_list, list) { - write_lock_irq(&gpc->lock); + write_lock_irq(gpc->lock); /* Only a single page so no need to care about length */ if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && @@ -50,7 +50,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap); } } - write_unlock_irq(&gpc->lock); + write_unlock_irq(gpc->lock); } spin_unlock(&kvm->gpc_lock); @@ -147,7 +147,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) lockdep_assert_held(&gpc->refresh_lock); - lockdep_assert_held_write(&gpc->lock); + lockdep_assert_held_write(gpc->lock); /* * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva @@ -160,7 +160,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) mmu_seq = gpc->kvm->mmu_invalidate_seq; smp_rmb(); - write_unlock_irq(&gpc->lock); + write_unlock_irq(gpc->lock); /* * If the previous iteration "failed" due to an mmu_notifier @@ -208,7 +208,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) } } - write_lock_irq(&gpc->lock); + write_lock_irq(gpc->lock); /* * Other tasks must wait for _this_ refresh to complete before @@ -231,7 +231,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) return 0; out_error: - write_lock_irq(&gpc->lock); + write_lock_irq(gpc->lock); return -EFAULT; } @@ -261,7 +261,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, */ mutex_lock(&gpc->refresh_lock); - write_lock_irq(&gpc->lock); + write_lock_irq(gpc->lock); if (!gpc->active) { ret = -EINVAL; @@ -321,7 +321,7 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unmap_old = (old_pfn != gpc->pfn); out_unlock: - write_unlock_irq(&gpc->lock); + write_unlock_irq(gpc->lock); mutex_unlock(&gpc->refresh_lock); @@ -339,20 +339,29 @@ EXPORT_SYMBOL_GPL(kvm_gpc_refresh); void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, struct kvm_vcpu *vcpu, enum pfn_cache_usage usage) +{ + rwlock_init(&gpc->_lock); + kvm_gpc_init_with_lock(gpc, kvm, vcpu, usage, &gpc->_lock); +} +EXPORT_SYMBOL_GPL(kvm_gpc_init); + +void kvm_gpc_init_with_lock(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, + rwlock_t *lock) { WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage); WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu); - rwlock_init(&gpc->lock); mutex_init(&gpc->refresh_lock); gpc->kvm = kvm; gpc->vcpu = vcpu; + gpc->lock = lock; gpc->usage = usage; gpc->pfn = KVM_PFN_ERR_FAULT; gpc->uhva = KVM_HVA_ERR_BAD; } -EXPORT_SYMBOL_GPL(kvm_gpc_init); +EXPORT_SYMBOL_GPL(kvm_gpc_init_with_lock); int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) { @@ -371,9 +380,9 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) * refresh must not establish a mapping until the cache is * reachable by mmu_notifier events. */ - write_lock_irq(&gpc->lock); + write_lock_irq(gpc->lock); gpc->active = true; - write_unlock_irq(&gpc->lock); + write_unlock_irq(gpc->lock); } return __kvm_gpc_refresh(gpc, gpa, len); } @@ -391,7 +400,7 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) * must stall mmu_notifier events until all users go away, i.e. * until gpc->lock is dropped and refresh is guaranteed to fail. */ - write_lock_irq(&gpc->lock); + write_lock_irq(gpc->lock); gpc->active = false; gpc->valid = false; @@ -406,7 +415,7 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) old_pfn = gpc->pfn; gpc->pfn = KVM_PFN_ERR_FAULT; - write_unlock_irq(&gpc->lock); + write_unlock_irq(gpc->lock); spin_lock(&kvm->gpc_lock); list_del(&gpc->list);