[dm-devel] AMD-Vi IO_PAGE_FAULTs and ata3.00: failed command: READ FPDMA QUEUED errors since Linux 4.0
diff mbox

Message ID 20151006101356.GE12506@8bytes.org
State New, archived
Headers show

Commit Message

Joerg Roedel Oct. 6, 2015, 10:13 a.m. UTC
On Wed, Sep 30, 2015 at 04:52:47PM +0200, Andreas Hartmann wrote:
> > Alternativly someone who can reproduce it should trace the calls to
> > __map_single and __unmap_single in the AMD IOMMU driver to find out
> > whether the addresses which the faults happen on are really mapped, or
> > at least requested from the AMD IOMMU driver.
> 
> How can I trace it?

Please apply the attached debug patch on-top of Linux v4.3-rc3 and boot
the machine. After boot you run (as root):


	# cat /sys/kernel/debug/tracing/trace_pipe > trace-data

Please run this in a seperate shell an keep it running.

Then trigger the problem while the above command is running. When you
triggered it, please send me the (compressed) trace-data file, full
dmesg and output of lspci on the box.

Please let me know if you have further questions.


Thanks,

	Joerg

Comments

Andreas Hartmann Oct. 6, 2015, 6:37 p.m. UTC | #1
On 10/06/2015 at 12:13 PM, Joerg Roedel wrote:
> On Wed, Sep 30, 2015 at 04:52:47PM +0200, Andreas Hartmann wrote:
>>> Alternativly someone who can reproduce it should trace the calls to
>>> __map_single and __unmap_single in the AMD IOMMU driver to find out
>>> whether the addresses which the faults happen on are really mapped, or
>>> at least requested from the AMD IOMMU driver.
>>
>> How can I trace it?
> 
> Please apply the attached debug patch on-top of Linux v4.3-rc3 and boot
> the machine. After boot you run (as root):
> 
> 
> 	# cat /sys/kernel/debug/tracing/trace_pipe > trace-data
> 
> Please run this in a seperate shell an keep it running.
> 
> Then trigger the problem while the above command is running. When you
> triggered it, please send me the (compressed) trace-data file, full
> dmesg and output of lspci on the box.

Hmm, *seems* to work fine w/ 4.3-rc2. But I have to do some more tests
to be really sure.


W/ 4.1.10, the problem can be seen most always during boot (systemd) -
but at this point, it is difficult to trace. I have to take a closer
look to find a place to start the trace already during boot process.


But there is another problem w/ 4.3-rc2: Starting a VM w/ PCIe
passthrough doesn't work any more. I'm getting the attached null pointer
dereference and the machine hangs.


Thanks,
regards,
Andreas
Oct  6 20:11:18 localhost kernel: [   32.461794] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8
Oct  6 20:11:18 localhost kernel: [   32.461853] IP: [<ffffffff8147a8a4>] do_detach+0x24/0xa0
Oct  6 20:11:18 localhost kernel: [   32.461888] PGD 0 
Oct  6 20:11:18 localhost kernel: [   32.461902] Oops: 0002 [#1] PREEMPT SMP 
Oct  6 20:11:18 localhost kernel: [   32.461929] Modules linked in: nf_log_ipv4 nf_log_common xt_LOG ipt_REJECT xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables vfio_iommu_type1 vfio_pci vfio vfio_virqfd drbg ansi_cprng nfsd lockd grace nfs_acl auth_rpcgss sunrpc bridge stp llc tun it87 hwmon_vid snd_hda_codec_hdmi kvm_amd snd_hda_codec_realtek kvm snd_hda_codec_generic fam15h_power usb_storage snd_hda_intel pcspkr serio_raw snd_hda_codec edac_core snd_hda_core edac_mce_amd k10temp snd_hwdep snd_pcm firewire_ohci snd_seq e100 firewire_core crc_itu_t amdkfd sp5100_tco amd_iommu_v2 i2c_piix4 mxm_wmi sr_mod cdrom radeon snd_timer snd_seq_device snd ttm drm_kms_helper xhci_pci drm r8169 xhci_hcd mii fb_sys_fops sysimgblt sysfillrect syscopyarea soundcore i2c_algo_bit shpchp tpm_infineon tpm_tis tpm fjes 8250_fintek wmi button acpi_cpufreq sg thermal xfs libcrc32c linear crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ohci_pci processor scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh_alua raid456 async_raid6_recov async_pq async_xor xor async_memcpy async_tx raid6_pq raid10 raid1 raid0 md_mod dm_snapshot dm_bufio dm_mirror dm_region_hash dm_log dm_crypt dm_mod aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 ata_generic pata_atiixp
Oct  6 20:11:18 localhost kernel: [   32.462728] CPU: 0 PID: 9374 Comm: qemu-system-x86 Not tainted 4.3.0-rc2-4-desktop #1
Oct  6 20:11:18 localhost kernel: [   32.462767] Hardware name: Gigabyte Technology Co., Ltd. GA-990XA-UD3/GA-990XA-UD3, BIOS F14b 01/24/2013
Oct  6 20:11:18 localhost kernel: [   32.462814] task: ffff8805f4ecc080 ti: ffff8805e1ec4000 task.ti: ffff8805e1ec4000
Oct  6 20:11:18 localhost kernel: [   32.462851] RIP: 0010:[<ffffffff8147a8a4>]  [<ffffffff8147a8a4>] do_detach+0x24/0xa0
Oct  6 20:11:18 localhost kernel: [   32.462894] RSP: 0018:ffff8805e1ec7ca0  EFLAGS: 00010006
Oct  6 20:11:18 localhost kernel: [   32.462922] RAX: 0000000000000000 RBX: ffff880614bef640 RCX: 00000000000000ff
Oct  6 20:11:18 localhost kernel: [   32.462959] RDX: 0000000000000000 RSI: ffff88062e70c098 RDI: ffff880614bef640
Oct  6 20:11:18 localhost kernel: [   32.462998] RBP: ffff8805e1ec7ca8 R08: ffff880614befc40 R09: 0000000000000000
Oct  6 20:11:18 localhost kernel: [   32.463033] R10: 0000000000000000 R11: ffffffff81a58df8 R12: ffff880614befc40
Oct  6 20:11:18 localhost kernel: [   32.463071] R13: ffff8806144b9858 R14: 0000000000000082 R15: ffff88062e70c098
Oct  6 20:11:18 localhost kernel: [   32.463110] FS:  00007f27cc824b80(0000) GS:ffff88062ec00000(0000) knlGS:0000000000000000
Oct  6 20:11:18 localhost kernel: [   32.463148] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Oct  6 20:11:18 localhost kernel: [   32.463177] CR2: 00000000000000b8 CR3: 00000000c8011000 CR4: 00000000000406f0
Oct  6 20:11:18 localhost kernel: [   32.463211] Stack:
Oct  6 20:11:18 localhost kernel: [   32.463223]  ffff880614bef640 ffff8805e1ec7cd8 ffffffff8147a96c ffff880614befc40
Oct  6 20:11:18 localhost kernel: [   32.463268]  ffff88062e70c098 0000000000000286 ffff8806144b9800 ffff8805e1ec7d08
Oct  6 20:11:18 localhost kernel: [   32.463310]  ffffffff8147aaf5 ffff880615dd0bc0 ffff8805e8354a00 ffff880614befc40
Oct  6 20:11:18 localhost kernel: [   32.463355] Call Trace:
Oct  6 20:11:18 localhost kernel: [   32.463377]  [<ffffffff8147a96c>] __detach_device+0x4c/0x80
Oct  6 20:11:18 localhost kernel: [   32.463412]  [<ffffffff8147aaf5>] detach_device+0x35/0xa0
Oct  6 20:11:18 localhost kernel: [   32.463444]  [<ffffffff8147b706>] amd_iommu_attach_device+0x66/0x2b0
Oct  6 20:11:18 localhost kernel: [   32.463481]  [<ffffffff81475d8e>] __iommu_attach_device+0x1e/0x80
Oct  6 20:11:18 localhost kernel: [   32.463513]  [<ffffffff81477013>] __iommu_attach_group+0x53/0x80
Oct  6 20:11:18 localhost kernel: [   32.463547]  [<ffffffff8147706b>] iommu_attach_group+0x2b/0x40
Oct  6 20:11:18 localhost kernel: [   32.463583]  [<ffffffffa07e9407>] vfio_iommu_type1_attach_group+0x187/0x4f8 [vfio_iommu_type1]
Oct  6 20:11:18 localhost kernel: [   32.463655]  [<ffffffffa07297e8>] vfio_fops_unl_ioctl+0x1b8/0x290 [vfio]
Oct  6 20:11:18 localhost kernel: [   32.463699]  [<ffffffff811f81cd>] do_vfs_ioctl+0x2cd/0x4c0
Oct  6 20:11:18 localhost kernel: [   32.463740]  [<ffffffff811f8439>] SyS_ioctl+0x79/0x90
Oct  6 20:11:18 localhost kernel: [   32.463775]  [<ffffffff816b3936>] entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:18 localhost kernel: [   32.466223] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:18 localhost kernel: [   32.466258] 
Oct  6 20:11:18 localhost kernel: [   32.466267] Leftover inexact backtrace:
Oct  6 20:11:18 localhost kernel: [   32.466267] 
Oct  6 20:11:18 localhost kernel: [   32.466298] Code: 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 8b 05 23 c7 d9 00 48 89 e5 53 0f b7 57 40 48 89 fb 48 8b 04 d0 48 63 50 10 48 8b 47 38 <83> ac 90 b8 00 00 00 01 48 8b 47 38 83 a8 b4 00 00 00 01 48 8b 
Oct  6 20:11:18 localhost kernel: [   32.466496] RIP  [<ffffffff8147a8a4>] do_detach+0x24/0xa0
Oct  6 20:11:18 localhost kernel: [   32.466527]  RSP <ffff8805e1ec7ca0>
Oct  6 20:11:18 localhost kernel: [   32.466547] CR2: 00000000000000b8
Oct  6 20:11:18 localhost kernel: [   32.476984] ---[ end trace 09ac28af2000b365 ]---
Oct  6 20:11:18 localhost kernel: [   32.477031] note: qemu-system-x86[9374] exited with preempt_count 2
Oct  6 20:11:19 localhost kernel: [   32.577276] walk_shadow_page_get_mmio_spte: detect reserved bits on spte, addr 0xb8000, dump hierarchy:
Oct  6 20:11:19 localhost kernel: [   32.577279] ------ spte 0x5db618027 level 4.
Oct  6 20:11:19 localhost kernel: [   32.577281] ------ spte 0x5db619027 level 3.
Oct  6 20:11:19 localhost kernel: [   32.577281] ------ spte 0x5db61a027 level 2.
Oct  6 20:11:19 localhost kernel: [   32.577282] ------ spte 0xffff0000000b8f6f level 1.
Oct  6 20:11:19 localhost kernel: [   32.577283] ------------[ cut here ]------------
Oct  6 20:11:19 localhost kernel: [   32.577301] WARNING: CPU: 2 PID: 9389 at ../arch/x86/kvm/mmu.c:3385 handle_mmio_page_fault.isra.85+0x2c/0x31 [kvm]()
Oct  6 20:11:19 localhost kernel: [   32.577302] Modules linked in: nf_log_ipv4 nf_log_common xt_LOG ipt_REJECT xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables vfio_iommu_type1 vfio_pci vfio vfio_virqfd drbg ansi_cprng nfsd lockd grace nfs_acl auth_rpcgss sunrpc bridge stp llc tun it87 hwmon_vid snd_hda_codec_hdmi kvm_amd snd_hda_codec_realtek kvm snd_hda_codec_generic fam15h_power usb_storage snd_hda_intel pcspkr serio_raw snd_hda_codec edac_core snd_hda_core edac_mce_amd k10temp snd_hwdep snd_pcm firewire_ohci snd_seq e100 firewire_core crc_itu_t amdkfd sp5100_tco amd_iommu_v2 i2c_piix4 mxm_wmi sr_mod cdrom radeon snd_timer snd_seq_device snd ttm drm_kms_helper xhci_pci drm r8169 xhci_hcd mii fb_sys_fops sysimgblt sysfillrect syscopyarea soundcore i2c_algo_bit shpchp tpm_infineon tpm_tis tpm fjes 8250_fintek wmi button acpi_cpufreq sg thermal xfs libcrc32c linear crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ohci_pci processor scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh_alua raid456 async_raid6_recov async_pq async_xor xor async_memcpy async_tx raid6_pq raid10 raid1 raid0 md_mod dm_snapshot dm_bufio dm_mirror dm_region_hash dm_log dm_crypt dm_mod aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 ata_generic pata_atiixp
Oct  6 20:11:19 localhost kernel: [   32.577357] CPU: 2 PID: 9389 Comm: qemu-system-x86 Tainted: G      D         4.3.0-rc2-4-desktop #1
Oct  6 20:11:19 localhost kernel: [   32.577358] Hardware name: Gigabyte Technology Co., Ltd. GA-990XA-UD3/GA-990XA-UD3, BIOS F14b 01/24/2013
Oct  6 20:11:19 localhost kernel: [   32.577360]  ffffffffa083f3ce ffff8805e0f43ba0 ffffffff81368633 0000000000000000
Oct  6 20:11:19 localhost kernel: [   32.577362]  ffff8805e0f43bd8 ffffffff8106a628 ffff8805e2a70040 00000000000b8000
Oct  6 20:11:19 localhost kernel: [   32.577364]  0000000000000000 000000000000000f 000000000000000f ffff8805e0f43be8
Oct  6 20:11:19 localhost kernel: [   32.577366] Call Trace:
Oct  6 20:11:19 localhost kernel: [   32.577374]  [<ffffffff810085ee>] try_stack_unwind+0x17e/0x190
Oct  6 20:11:19 localhost kernel: [   32.577379]  [<ffffffff8100737f>] dump_trace+0x8f/0x3b0
Oct  6 20:11:19 localhost kernel: [   32.577382]  [<ffffffff8100864d>] show_trace_log_lvl+0x4d/0x60
Oct  6 20:11:19 localhost kernel: [   32.577385]  [<ffffffff810077a1>] show_stack_log_lvl+0x101/0x190
Oct  6 20:11:19 localhost kernel: [   32.577387]  [<ffffffff810086a5>] show_stack+0x25/0x50
Oct  6 20:11:19 localhost kernel: [   32.577390]  [<ffffffff81368633>] dump_stack+0x4b/0x78
Oct  6 20:11:19 localhost kernel: [   32.577394]  [<ffffffff8106a628>] warn_slowpath_common+0x88/0xc0
Oct  6 20:11:19 localhost kernel: [   32.577397]  [<ffffffff8106a71a>] warn_slowpath_null+0x1a/0x20
Oct  6 20:11:19 localhost kernel: [   32.577408]  [<ffffffffa08318eb>] handle_mmio_page_fault.isra.85+0x2c/0x31 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.577427]  [<ffffffffa0816106>] tdp_page_fault+0x246/0x260 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.577441]  [<ffffffffa080fbd4>] kvm_mmu_page_fault+0x24/0x110 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.577446]  [<ffffffffa0cdbfc9>] pf_interception+0xc9/0x150 [kvm_amd]
Oct  6 20:11:19 localhost kernel: [   32.577451]  [<ffffffffa0cdf5a0>] handle_exit+0x180/0x9b0 [kvm_amd]
Oct  6 20:11:19 localhost kernel: [   32.577462]  [<ffffffffa0805dd9>] vcpu_enter_guest+0x769/0xde0 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.577475]  [<ffffffffa080c62a>] kvm_arch_vcpu_ioctl_run+0x2da/0x400 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.577486]  [<ffffffffa07f4d8f>] kvm_vcpu_ioctl+0x30f/0x5c0 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.577490]  [<ffffffff811f81cd>] do_vfs_ioctl+0x2cd/0x4c0
Oct  6 20:11:19 localhost kernel: [   32.577497]  [<ffffffff811f8439>] SyS_ioctl+0x79/0x90
Oct  6 20:11:19 localhost kernel: [   32.577500]  [<ffffffff816b3936>] entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:19 localhost kernel: [   32.578805] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:19 localhost kernel: [   32.578806] 
Oct  6 20:11:19 localhost kernel: [   32.578806] Leftover inexact backtrace:
Oct  6 20:11:19 localhost kernel: [   32.578806] 
Oct  6 20:11:19 localhost kernel: [   32.578808] ---[ end trace 09ac28af2000b366 ]---
Oct  6 20:11:19 localhost kernel: [   32.655728] walk_shadow_page_get_mmio_spte: detect reserved bits on spte, addr 0xb8000, dump hierarchy:
Oct  6 20:11:19 localhost kernel: [   32.655731] ------ spte 0x5e6703027 level 4.
Oct  6 20:11:19 localhost kernel: [   32.655732] ------ spte 0x5e5fb8027 level 3.
Oct  6 20:11:19 localhost kernel: [   32.655733] ------ spte 0x5e5fb9027 level 2.
Oct  6 20:11:19 localhost kernel: [   32.655734] ------ spte 0xffff0000000b8f67 level 1.
Oct  6 20:11:19 localhost kernel: [   32.655735] ------------[ cut here ]------------
Oct  6 20:11:19 localhost kernel: [   32.655764] WARNING: CPU: 2 PID: 9390 at ../arch/x86/kvm/mmu.c:3385 handle_mmio_page_fault.isra.85+0x2c/0x31 [kvm]()
Oct  6 20:11:19 localhost kernel: [   32.655765] Modules linked in: nf_log_ipv4 nf_log_common xt_LOG ipt_REJECT xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables vfio_iommu_type1 vfio_pci vfio vfio_virqfd drbg ansi_cprng nfsd lockd grace nfs_acl auth_rpcgss sunrpc bridge stp llc tun it87 hwmon_vid snd_hda_codec_hdmi kvm_amd snd_hda_codec_realtek kvm snd_hda_codec_generic fam15h_power usb_storage snd_hda_intel pcspkr serio_raw snd_hda_codec edac_core snd_hda_core edac_mce_amd k10temp snd_hwdep snd_pcm firewire_ohci snd_seq e100 firewire_core crc_itu_t amdkfd sp5100_tco amd_iommu_v2 i2c_piix4 mxm_wmi sr_mod cdrom radeon snd_timer snd_seq_device snd ttm drm_kms_helper xhci_pci drm r8169 xhci_hcd mii fb_sys_fops sysimgblt sysfillrect syscopyarea soundcore i2c_algo_bit shpchp tpm_infineon tpm_tis tpm fjes 8250_fintek wmi button acpi_cpufreq sg thermal xfs libcrc32c linear crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ohci_pci processor scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh_alua raid456 async_raid6_recov async_pq async_xor xor async_memcpy async_tx raid6_pq raid10 raid1 raid0 md_mod dm_snapshot dm_bufio dm_mirror dm_region_hash dm_log dm_crypt dm_mod aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 ata_generic pata_atiixp
Oct  6 20:11:19 localhost kernel: [   32.655823] CPU: 2 PID: 9390 Comm: qemu-system-x86 Tainted: G      D W       4.3.0-rc2-4-desktop #1
Oct  6 20:11:19 localhost kernel: [   32.655824] Hardware name: Gigabyte Technology Co., Ltd. GA-990XA-UD3/GA-990XA-UD3, BIOS F14b 01/24/2013
Oct  6 20:11:19 localhost kernel: [   32.655826]  ffffffffa083f3ce ffff8805e221fba0 ffffffff81368633 0000000000000000
Oct  6 20:11:19 localhost kernel: [   32.655828]  ffff8805e221fbd8 ffffffff8106a628 ffff8805e1e30080 00000000000b8000
Oct  6 20:11:19 localhost kernel: [   32.655830]  0000000000000000 000000000000000f 000000000000000f ffff8805e221fbe8
Oct  6 20:11:19 localhost kernel: [   32.655832] Call Trace:
Oct  6 20:11:19 localhost kernel: [   32.655840]  [<ffffffff810085ee>] try_stack_unwind+0x17e/0x190
Oct  6 20:11:19 localhost kernel: [   32.655845]  [<ffffffff8100737f>] dump_trace+0x8f/0x3b0
Oct  6 20:11:19 localhost kernel: [   32.655848]  [<ffffffff8100864d>] show_trace_log_lvl+0x4d/0x60
Oct  6 20:11:19 localhost kernel: [   32.655852]  [<ffffffff810077a1>] show_stack_log_lvl+0x101/0x190
Oct  6 20:11:19 localhost kernel: [   32.655864]  [<ffffffff810086a5>] show_stack+0x25/0x50
Oct  6 20:11:19 localhost kernel: [   32.655869]  [<ffffffff81368633>] dump_stack+0x4b/0x78
Oct  6 20:11:19 localhost kernel: [   32.655877]  [<ffffffff8106a628>] warn_slowpath_common+0x88/0xc0
Oct  6 20:11:19 localhost kernel: [   32.655881]  [<ffffffff8106a71a>] warn_slowpath_null+0x1a/0x20
Oct  6 20:11:19 localhost kernel: [   32.655894]  [<ffffffffa08318eb>] handle_mmio_page_fault.isra.85+0x2c/0x31 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.655917]  [<ffffffffa0816106>] tdp_page_fault+0x246/0x260 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.655940]  [<ffffffffa080fbd4>] kvm_mmu_page_fault+0x24/0x110 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.655956]  [<ffffffffa0cdbfc9>] pf_interception+0xc9/0x150 [kvm_amd]
Oct  6 20:11:19 localhost kernel: [   32.655965]  [<ffffffffa0cdf5a0>] handle_exit+0x180/0x9b0 [kvm_amd]
Oct  6 20:11:19 localhost kernel: [   32.655983]  [<ffffffffa0805dd9>] vcpu_enter_guest+0x769/0xde0 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.656000]  [<ffffffffa080c62a>] kvm_arch_vcpu_ioctl_run+0x2da/0x400 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.656012]  [<ffffffffa07f4d8f>] kvm_vcpu_ioctl+0x30f/0x5c0 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.656017]  [<ffffffff811f81cd>] do_vfs_ioctl+0x2cd/0x4c0
Oct  6 20:11:19 localhost kernel: [   32.656023]  [<ffffffff811f8439>] SyS_ioctl+0x79/0x90
Oct  6 20:11:19 localhost kernel: [   32.656028]  [<ffffffff816b3936>] entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:19 localhost kernel: [   32.657322] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:19 localhost kernel: [   32.657324] 
Oct  6 20:11:19 localhost kernel: [   32.657324] Leftover inexact backtrace:
Oct  6 20:11:19 localhost kernel: [   32.657324] 
Oct  6 20:11:19 localhost kernel: [   32.657327] ---[ end trace 09ac28af2000b367 ]---
Oct  6 20:11:19 localhost kernel: [   32.715379] walk_shadow_page_get_mmio_spte: detect reserved bits on spte, addr 0xb8000, dump hierarchy:
Oct  6 20:11:19 localhost kernel: [   32.715382] ------ spte 0x5e6707027 level 4.
Oct  6 20:11:19 localhost kernel: [   32.715383] ------ spte 0x5e5781027 level 3.
Oct  6 20:11:19 localhost kernel: [   32.715384] ------ spte 0x5e2291027 level 2.
Oct  6 20:11:19 localhost kernel: [   32.715385] ------ spte 0xffff0000000b8f67 level 1.
Oct  6 20:11:19 localhost kernel: [   32.715386] ------------[ cut here ]------------
Oct  6 20:11:19 localhost kernel: [   32.715405] WARNING: CPU: 2 PID: 9393 at ../arch/x86/kvm/mmu.c:3385 handle_mmio_page_fault.isra.85+0x2c/0x31 [kvm]()
Oct  6 20:11:19 localhost kernel: [   32.715407] Modules linked in: nf_log_ipv4 nf_log_common xt_LOG ipt_REJECT xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack iptable_filter ip_tables x_tables vfio_iommu_type1 vfio_pci vfio vfio_virqfd drbg ansi_cprng nfsd lockd grace nfs_acl auth_rpcgss sunrpc bridge stp llc tun it87 hwmon_vid snd_hda_codec_hdmi kvm_amd snd_hda_codec_realtek kvm snd_hda_codec_generic fam15h_power usb_storage snd_hda_intel pcspkr serio_raw snd_hda_codec edac_core snd_hda_core edac_mce_amd k10temp snd_hwdep snd_pcm firewire_ohci snd_seq e100 firewire_core crc_itu_t amdkfd sp5100_tco amd_iommu_v2 i2c_piix4 mxm_wmi sr_mod cdrom radeon snd_timer snd_seq_device snd ttm drm_kms_helper xhci_pci drm r8169 xhci_hcd mii fb_sys_fops sysimgblt sysfillrect syscopyarea soundcore i2c_algo_bit shpchp tpm_infineon tpm_tis tpm fjes 8250_fintek wmi button acpi_cpufreq sg thermal xfs libcrc32c linear crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ohci_pci processor scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh_alua raid456 async_raid6_recov async_pq async_xor xor async_memcpy async_tx raid6_pq raid10 raid1 raid0 md_mod dm_snapshot dm_bufio dm_mirror dm_region_hash dm_log dm_crypt dm_mod aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 ata_generic pata_atiixp
Oct  6 20:11:19 localhost kernel: [   32.715461] CPU: 2 PID: 9393 Comm: qemu-system-x86 Tainted: G      D W       4.3.0-rc2-4-desktop #1
Oct  6 20:11:19 localhost kernel: [   32.715462] Hardware name: Gigabyte Technology Co., Ltd. GA-990XA-UD3/GA-990XA-UD3, BIOS F14b 01/24/2013
Oct  6 20:11:19 localhost kernel: [   32.715464]  ffffffffa083f3ce ffff8805e93afba0 ffffffff81368633 0000000000000000
Oct  6 20:11:19 localhost kernel: [   32.715466]  ffff8805e93afbd8 ffffffff8106a628 ffff8805ec604140 00000000000b8000
Oct  6 20:11:19 localhost kernel: [   32.715468]  0000000000000000 000000000000000f 000000000000000f ffff8805e93afbe8
Oct  6 20:11:19 localhost kernel: [   32.715470] Call Trace:
Oct  6 20:11:19 localhost kernel: [   32.715478]  [<ffffffff810085ee>] try_stack_unwind+0x17e/0x190
Oct  6 20:11:19 localhost kernel: [   32.715483]  [<ffffffff8100737f>] dump_trace+0x8f/0x3b0
Oct  6 20:11:19 localhost kernel: [   32.715486]  [<ffffffff8100864d>] show_trace_log_lvl+0x4d/0x60
Oct  6 20:11:19 localhost kernel: [   32.715488]  [<ffffffff810077a1>] show_stack_log_lvl+0x101/0x190
Oct  6 20:11:19 localhost kernel: [   32.715491]  [<ffffffff810086a5>] show_stack+0x25/0x50
Oct  6 20:11:19 localhost kernel: [   32.715494]  [<ffffffff81368633>] dump_stack+0x4b/0x78
Oct  6 20:11:19 localhost kernel: [   32.715498]  [<ffffffff8106a628>] warn_slowpath_common+0x88/0xc0
Oct  6 20:11:19 localhost kernel: [   32.715501]  [<ffffffff8106a71a>] warn_slowpath_null+0x1a/0x20
Oct  6 20:11:19 localhost kernel: [   32.715512]  [<ffffffffa08318eb>] handle_mmio_page_fault.isra.85+0x2c/0x31 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.715532]  [<ffffffffa0816106>] tdp_page_fault+0x246/0x260 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.715546]  [<ffffffffa080fbd4>] kvm_mmu_page_fault+0x24/0x110 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.715551]  [<ffffffffa0cdbfc9>] pf_interception+0xc9/0x150 [kvm_amd]
Oct  6 20:11:19 localhost kernel: [   32.715556]  [<ffffffffa0cdf5a0>] handle_exit+0x180/0x9b0 [kvm_amd]
Oct  6 20:11:19 localhost kernel: [   32.715567]  [<ffffffffa0805dd9>] vcpu_enter_guest+0x769/0xde0 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.715580]  [<ffffffffa080c62a>] kvm_arch_vcpu_ioctl_run+0x2da/0x400 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.715590]  [<ffffffffa07f4d8f>] kvm_vcpu_ioctl+0x30f/0x5c0 [kvm]
Oct  6 20:11:19 localhost kernel: [   32.715595]  [<ffffffff811f81cd>] do_vfs_ioctl+0x2cd/0x4c0
Oct  6 20:11:19 localhost kernel: [   32.715602]  [<ffffffff811f8439>] SyS_ioctl+0x79/0x90
Oct  6 20:11:19 localhost kernel: [   32.715606]  [<ffffffff816b3936>] entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:19 localhost kernel: [   32.716901] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x16/0x75
Oct  6 20:11:19 localhost kernel: [   32.716902] 
Oct  6 20:11:19 localhost kernel: [   32.716902] Leftover inexact backtrace:
Oct  6 20:11:19 localhost kernel: [   32.716902] 
Oct  6 20:11:19 localhost kernel: [   32.716904] ---[ end trace 09ac28af2000b368 ]---
Joerg Roedel Oct. 7, 2015, 3:40 p.m. UTC | #2
On Tue, Oct 06, 2015 at 08:37:59PM +0200, Andreas Hartmann wrote:
> But there is another problem w/ 4.3-rc2: Starting a VM w/ PCIe
> passthrough doesn't work any more. I'm getting the attached null pointer
> dereference and the machine hangs.

Weird, probably a do_detach call for a device that is already detached.
Anyway, I can't reproduce this here on my two AMD IOMMU machines.  Can
you please boot the machine with amd_iommu_dump on the kernel command
line and send me dmesg after boot?

Also, which device are you trying to attach to the guest (pci
bus/device/function)?

Output of lspci might also be helpful.


Thanks,

	Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Oct. 7, 2015, 4:10 p.m. UTC | #3
On Wed, Oct 07, 2015 at 04:57:31AM +0200, Andreas Hartmann wrote:
> Got it during a single mount (I booted with massively reduced mounts and
> did the mount afterwards manually. During the second manually mount, the
> problem can be seen).
> 
> I attached the requested files. The mount starts at 80 seconds.
> Hope this helps.

Okay, the lowest dma-addr the AMD IOMMU driver returns is 0x1000 and
the highest is 0x7ff4000. All fault addresses are outside of this range,
so the AMD IOMMU driver never returned these addresses.

This doesn't mean that it is not at fault, but it looks still unlikely.
Maybe I can reproduce the problem here. Can you please tell me some
details about the partitions you mounted to trigger this?

I remember something about a xfs->lvm->dm_crypt->md_raid->sata setup, but
having more details may help me to reproduce.



	Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 7, 2015, 4:52 p.m. UTC | #4
On 10/07/2015 at 06:10 PM, Joerg Roedel wrote:
> On Wed, Oct 07, 2015 at 04:57:31AM +0200, Andreas Hartmann wrote:
>> Got it during a single mount (I booted with massively reduced mounts and
>> did the mount afterwards manually. During the second manually mount, the
>> problem can be seen).
>>
>> I attached the requested files. The mount starts at 80 seconds.
>> Hope this helps.
> 
> Okay, the lowest dma-addr the AMD IOMMU driver returns is 0x1000 and
> the highest is 0x7ff4000. All fault addresses are outside of this range,
> so the AMD IOMMU driver never returned these addresses.
> 
> This doesn't mean that it is not at fault, but it looks still unlikely.
> Maybe I can reproduce the problem here. Can you please tell me some
> details about the partitions you mounted to trigger this?
> 
> I remember something about a xfs->lvm->dm_crypt->md_raid->sata setup, but
> having more details may help me to reproduce.

See attachments in http://article.gmane.org/gmane.linux.kernel.pci/43975


To reproduce the error:
First I mounted /daten2, afterwards /raid/mt, which produces the errors.
The ssd mounts have been already active (during boot by fstab).

If I mount all of them already during boot, the system mostly starts to
emergency mode which unfortunately is broken here.


Regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 7, 2015, 5:02 p.m. UTC | #5
On 10/07/2015 at 05:40 PM Joerg Roedel wrote:
> On Tue, Oct 06, 2015 at 08:37:59PM +0200, Andreas Hartmann wrote:
>> But there is another problem w/ 4.3-rc2: Starting a VM w/ PCIe
>> passthrough doesn't work any more. I'm getting the attached null pointer
>> dereference and the machine hangs.
> 
> Weird, probably a do_detach call for a device that is already detached.
> Anyway, I can't reproduce this here on my two AMD IOMMU machines.  Can
> you please boot the machine with amd_iommu_dump on the kernel command
> line and send me dmesg after boot?

Binding the device to vfio isn't a problem (it's done before the vm is
started). The problem occurs during start of qemu-system-x86_64 (2.3.0).

The attached dmesg.out doesn't show the trace, but the desired iommu dump.

> Also, which device are you trying to attach to the guest (pci
> bus/device/function)?

See attached ath9k.device.

> Output of lspci might also be helpful.

I attached lscpi and dmesg.


Hope that helps,
thanks,
Andreas
Joerg Roedel Oct. 8, 2015, 4:39 p.m. UTC | #6
On Wed, Oct 07, 2015 at 06:52:58PM +0200, Andreas Hartmann wrote:
> To reproduce the error:
> First I mounted /daten2, afterwards /raid/mt, which produces the errors.
> The ssd mounts have been already active (during boot by fstab).

Okay, I spent the day on that problem, and managed to reproduce it here
on one of my AMD IOMMU boxes. I wasn't an easy journey, as I can only
reproduce it if I setup the crypto partition and everything above that
(like mounting the lvm volumes) _after_ the system has finished booting.
If everything is setup during system boot it works fine and I don't see
any IO_PAGE_FAULTS.

I also tried kernel v4.3-rc4 first, to have it tested with a
self-compiled kernel. It didn't show up there, so I built a 4.1.0, where
it showed up again. Something seems to have fixed the issue in the
latest kernels.

So I looked a little bit around at the commits that were merged into the
respective parts involved here, and found this one:

	586b286 dm crypt: constrain crypt device's max_segment_size to PAGE_SIZE

The problem fixed with this commit looks quite similar to what you have
seen (execpt that there was no IOMMU involved). So I cherry-picked that
commit on 4.1.0 and tested that. The problem was gone.

So it looks like it was a dm-crypt issue, the patch went into v4.3-rc3,
either this kernel of rc4 should fix the problem for you too. Can you
please verify this is fixed for you too with v4.3-rc4?


Thanks,

	Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Oct. 8, 2015, 5:30 p.m. UTC | #7
On Wed, Oct 07, 2015 at 07:02:32PM +0200, Andreas Hartmann wrote:
> Binding the device to vfio isn't a problem (it's done before the vm is
> started). The problem occurs during start of qemu-system-x86_64 (2.3.0).
> 
> The attached dmesg.out doesn't show the trace, but the desired iommu dump.
> 
> > Also, which device are you trying to attach to the guest (pci
> > bus/device/function)?
> 
> See attached ath9k.device.

Hmm, can you also test this again with the v4.3-rc4 please? The device
you are attaching has its own group and no aliases, so I really can't
see how the trace could happen, and I can't reproduce it here either.

So I just want to make sure it is not a follow-on bug from the previous
problem.


Thanks,

	Joerg

P.S.: When you build the kernel with debug symbols and the problem
      occurs again, you can find out the source file and line where the
      bug happened with

      	$ objdump -Dlz --start-address=<rip> vmlinux | head

      Replace <rip> with the RIP from the kernel oops and the output
      should show you where in the source the bug comes from. The
      vmlinux file is in the kernels build directory.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 8, 2015, 6:21 p.m. UTC | #8
Am 08.10.2015 um 18:39 schrieb Joerg Roedel:
> On Wed, Oct 07, 2015 at 06:52:58PM +0200, Andreas Hartmann wrote:
>> To reproduce the error:
>> First I mounted /daten2, afterwards /raid/mt, which produces the errors.
>> The ssd mounts have been already active (during boot by fstab).
>
> Okay, I spent the day on that problem, and managed to reproduce it here
> on one of my AMD IOMMU boxes. I wasn't an easy journey, as I can only
> reproduce it if I setup the crypto partition and everything above that
> (like mounting the lvm volumes) _after_ the system has finished booting.
> If everything is setup during system boot it works fine and I don't see
> any IO_PAGE_FAULTS.

Thank you very much for spending so much of your time to reproduce the 
problem!

> I also tried kernel v4.3-rc4 first, to have it tested with a
> self-compiled kernel. It didn't show up there, so I built a 4.1.0, where
> it showed up again. Something seems to have fixed the issue in the
> latest kernels.
>
> So I looked a little bit around at the commits that were merged into the
> respective parts involved here, and found this one:
>
> 	586b286 dm crypt: constrain crypt device's max_segment_size to PAGE_SIZE
 >
> The problem fixed with this commit looks quite similar to what you have
> seen (execpt that there was no IOMMU involved). So I cherry-picked that
> commit on 4.1.0 and tested that. The problem was gone.

That's true - I already knew this patch and tested it some weeks ago - 
unfortunately it doesn't fix the problem here.

To be really sure, I just retested it now again. I couldn't see any 
IO_PAGE_FAULTS errors today (unfortunately I can't remember anymore if I 
didn't see them too a few weeks ago) - but the ata errors remain. 
Therefore, this patch isn't a solution for the problem I encounter here.

> So it looks like it was a dm-crypt issue, the patch went into v4.3-rc3,
> either this kernel of rc4 should fix the problem for you too. Can you
> please verify this is fixed for you too with v4.3-rc4?

As I already wrote, I even couldn't see the problem with v4.3-rc2 any 
more (as far as I was able to test because of the other problem). I have 
to do some more tests now with this kernel to be really sure.


Kind regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 8, 2015, 6:59 p.m. UTC | #9
On 10/08/2015 at 07:30 PM Joerg Roedel wrote:
> On Wed, Oct 07, 2015 at 07:02:32PM +0200, Andreas Hartmann wrote:
>> Binding the device to vfio isn't a problem (it's done before the vm is
>> started). The problem occurs during start of qemu-system-x86_64 (2.3.0).
>>
>> The attached dmesg.out doesn't show the trace, but the desired iommu dump.
>>
>>> Also, which device are you trying to attach to the guest (pci
>>> bus/device/function)?
>>
>> See attached ath9k.device.
> 
> Hmm, can you also test this again with the v4.3-rc4 please? The device
> you are attaching has its own group and no aliases, so I really can't
> see how the trace could happen, and I can't reproduce it here either.

Unchanged - this time hard locked machine and no trace at all because of
data loss after reboot :-(.

Btw: Linux 4.2 doesn't show this problem.

Nevertheless I'll try to get a trace - maybe I'm lucky and the machine
doesn't lock up completely another time :-).


Regards,
Andreas

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 8, 2015, 7:47 p.m. UTC | #10
On 10/08/2015 at 08:59 PM, Andreas Hartmann wrote:
> On 10/08/2015 at 07:30 PM Joerg Roedel wrote:
>> On Wed, Oct 07, 2015 at 07:02:32PM +0200, Andreas Hartmann wrote:
>>> Binding the device to vfio isn't a problem (it's done before the vm is
>>> started). The problem occurs during start of qemu-system-x86_64 (2.3.0).
>>>
>>> The attached dmesg.out doesn't show the trace, but the desired iommu dump.
>>>
>>>> Also, which device are you trying to attach to the guest (pci
>>>> bus/device/function)?
>>>
>>> See attached ath9k.device.
>>
>> Hmm, can you also test this again with the v4.3-rc4 please? The device
>> you are attaching has its own group and no aliases, so I really can't
>> see how the trace could happen, and I can't reproduce it here either.
> 
> Unchanged - this time hard locked machine and no trace at all because of
> data loss after reboot :-(.
> 
> Btw: Linux 4.2 doesn't show this problem.
> 
> Nevertheless I'll try to get a trace - maybe I'm lucky and the machine
> doesn't lock up completely another time :-).

Got it. I attached the complete oops and the output of objdump.

Kernel was linux 4.3-rc4


This time, the oops was caused by the second PCI card I'm passing
through to another VM (the ath9k card worked fine this time - chance?).
I added the lspci output to the attached file, too.


Thanks,
regards,
Andreas
Andreas Hartmann Oct. 8, 2015, 7:52 p.m. UTC | #11
On 10/08/2015 at 08:21 PM, Andreas Hartmann wrote:
> Am 08.10.2015 um 18:39 schrieb Joerg Roedel:
>> On Wed, Oct 07, 2015 at 06:52:58PM +0200, Andreas Hartmann wrote:
>>> To reproduce the error:
>>> First I mounted /daten2, afterwards /raid/mt, which produces the errors.
>>> The ssd mounts have been already active (during boot by fstab).
>>
>> Okay, I spent the day on that problem, and managed to reproduce it here
>> on one of my AMD IOMMU boxes. I wasn't an easy journey, as I can only
>> reproduce it if I setup the crypto partition and everything above that
>> (like mounting the lvm volumes) _after_ the system has finished booting.
>> If everything is setup during system boot it works fine and I don't see
>> any IO_PAGE_FAULTS.
> 
> Thank you very much for spending so much of your time to reproduce the
> problem!
> 
>> I also tried kernel v4.3-rc4 first, to have it tested with a
>> self-compiled kernel. It didn't show up there, so I built a 4.1.0, where
>> it showed up again. Something seems to have fixed the issue in the
>> latest kernels.
>>
>> So I looked a little bit around at the commits that were merged into the
>> respective parts involved here, and found this one:
>>
>>     586b286 dm crypt: constrain crypt device's max_segment_size to
>> PAGE_SIZE
>>
>> The problem fixed with this commit looks quite similar to what you have
>> seen (execpt that there was no IOMMU involved). So I cherry-picked that
>> commit on 4.1.0 and tested that. The problem was gone.
> 
> That's true - I already knew this patch and tested it some weeks ago -
> unfortunately it doesn't fix the problem here.
> 
> To be really sure, I just retested it now again. I couldn't see any
> IO_PAGE_FAULTS errors today (unfortunately I can't remember anymore if I
> didn't see them too a few weeks ago) - but the ata errors remain.
> Therefore, this patch isn't a solution for the problem I encounter here.
> 
>> So it looks like it was a dm-crypt issue, the patch went into v4.3-rc3,
>> either this kernel of rc4 should fix the problem for you too. Can you
>> please verify this is fixed for you too with v4.3-rc4?
> 
> As I already wrote, I even couldn't see the problem with v4.3-rc2 any
> more (as far as I was able to test because of the other problem). I have
> to do some more tests now with this kernel to be really sure.

I now tested w/ v4.3-rc4. I couldn't see any IO_PAGE_FAULTS but the ata
errors remain. The ata errors can be easily activated by copying a large
file (> 4 GB) from one partition on the raid to another partition on the
raid.



Thanks,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 9, 2015, 5:20 a.m. UTC | #12
On 10/08/2015 at 09:52 PM, Andreas Hartmann wrote:
> On 10/08/2015 at 08:21 PM, Andreas Hartmann wrote:
>> Am 08.10.2015 um 18:39 schrieb Joerg Roedel:
>>> On Wed, Oct 07, 2015 at 06:52:58PM +0200, Andreas Hartmann wrote:
>>>> To reproduce the error:
>>>> First I mounted /daten2, afterwards /raid/mt, which produces the errors.
>>>> The ssd mounts have been already active (during boot by fstab).
>>>
>>> Okay, I spent the day on that problem, and managed to reproduce it here
>>> on one of my AMD IOMMU boxes. I wasn't an easy journey, as I can only
>>> reproduce it if I setup the crypto partition and everything above that
>>> (like mounting the lvm volumes) _after_ the system has finished booting.
>>> If everything is setup during system boot it works fine and I don't see
>>> any IO_PAGE_FAULTS.
>>
>> Thank you very much for spending so much of your time to reproduce the
>> problem!
>>
>>> I also tried kernel v4.3-rc4 first, to have it tested with a
>>> self-compiled kernel. It didn't show up there, so I built a 4.1.0, where
>>> it showed up again. Something seems to have fixed the issue in the
>>> latest kernels.
>>>
>>> So I looked a little bit around at the commits that were merged into the
>>> respective parts involved here, and found this one:
>>>
>>>     586b286 dm crypt: constrain crypt device's max_segment_size to
>>> PAGE_SIZE
>>>
>>> The problem fixed with this commit looks quite similar to what you have
>>> seen (execpt that there was no IOMMU involved). So I cherry-picked that
>>> commit on 4.1.0 and tested that. The problem was gone.
>>
>> That's true - I already knew this patch and tested it some weeks ago -
>> unfortunately it doesn't fix the problem here.
>>
>> To be really sure, I just retested it now again. I couldn't see any
>> IO_PAGE_FAULTS errors today (unfortunately I can't remember anymore if I
>> didn't see them too a few weeks ago) - but the ata errors remain.
>> Therefore, this patch isn't a solution for the problem I encounter here.
>>
>>> So it looks like it was a dm-crypt issue, the patch went into v4.3-rc3,
>>> either this kernel of rc4 should fix the problem for you too. Can you
>>> please verify this is fixed for you too with v4.3-rc4?
>>
>> As I already wrote, I even couldn't see the problem with v4.3-rc2 any
>> more (as far as I was able to test because of the other problem). I have
>> to do some more tests now with this kernel to be really sure.
> 
> I now tested w/ v4.3-rc4. I couldn't see any IO_PAGE_FAULTS but the ata
> errors remain. The ata errors can be easily activated by copying a large
> file (> 4 GB) from one partition on the raid to another partition on the
> raid.

Hmmm, I retested this morning w/ v4.3-rc4 and 4.1.10 (with the above
mentioned patch applied) - and now, I didn't get any more ata errors.

I'm confused now. The only difference between yesterday evening and this
morning was, that the machine was over night completely powerless (via
socket outlet switch). Could this really be the reason? Let's wait and
see if this is a persistent state ... .

But the other new error w/ 4.3-rc-2 or 4 while starting a VM with PCI
passthrough remains even this morning :-(. Would have been nice if it
would have gone over night, too ...


Thanks,
regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 9, 2015, 9:15 a.m. UTC | #13
On 10/09/2015 at 07:20 AM, Andreas Hartmann wrote:
> On 10/08/2015 at 09:52 PM, Andreas Hartmann wrote:
>> On 10/08/2015 at 08:21 PM, Andreas Hartmann wrote:
>>> Am 08.10.2015 um 18:39 schrieb Joerg Roedel:
>>>> On Wed, Oct 07, 2015 at 06:52:58PM +0200, Andreas Hartmann wrote:
>>>>> To reproduce the error:
>>>>> First I mounted /daten2, afterwards /raid/mt, which produces the errors.
>>>>> The ssd mounts have been already active (during boot by fstab).
>>>>
>>>> Okay, I spent the day on that problem, and managed to reproduce it here
>>>> on one of my AMD IOMMU boxes. I wasn't an easy journey, as I can only
>>>> reproduce it if I setup the crypto partition and everything above that
>>>> (like mounting the lvm volumes) _after_ the system has finished booting.
>>>> If everything is setup during system boot it works fine and I don't see
>>>> any IO_PAGE_FAULTS.
>>>
>>> Thank you very much for spending so much of your time to reproduce the
>>> problem!
>>>
>>>> I also tried kernel v4.3-rc4 first, to have it tested with a
>>>> self-compiled kernel. It didn't show up there, so I built a 4.1.0, where
>>>> it showed up again. Something seems to have fixed the issue in the
>>>> latest kernels.
>>>>
>>>> So I looked a little bit around at the commits that were merged into the
>>>> respective parts involved here, and found this one:
>>>>
>>>>     586b286 dm crypt: constrain crypt device's max_segment_size to
>>>> PAGE_SIZE
>>>>
>>>> The problem fixed with this commit looks quite similar to what you have
>>>> seen (execpt that there was no IOMMU involved). So I cherry-picked that
>>>> commit on 4.1.0 and tested that. The problem was gone.
>>>
>>> That's true - I already knew this patch and tested it some weeks ago -
>>> unfortunately it doesn't fix the problem here.
>>>
>>> To be really sure, I just retested it now again. I couldn't see any
>>> IO_PAGE_FAULTS errors today (unfortunately I can't remember anymore if I
>>> didn't see them too a few weeks ago) - but the ata errors remain.
>>> Therefore, this patch isn't a solution for the problem I encounter here.
>>>
>>>> So it looks like it was a dm-crypt issue, the patch went into v4.3-rc3,
>>>> either this kernel of rc4 should fix the problem for you too. Can you
>>>> please verify this is fixed for you too with v4.3-rc4?
>>>
>>> As I already wrote, I even couldn't see the problem with v4.3-rc2 any
>>> more (as far as I was able to test because of the other problem). I have
>>> to do some more tests now with this kernel to be really sure.
>>
>> I now tested w/ v4.3-rc4. I couldn't see any IO_PAGE_FAULTS but the ata
>> errors remain. The ata errors can be easily activated by copying a large
>> file (> 4 GB) from one partition on the raid to another partition on the
>> raid.
> 
> Hmmm, I retested this morning w/ v4.3-rc4 and 4.1.10 (with the above
> mentioned patch applied) - and now, I didn't get any more ata errors.
> 
> I'm confused now. The only difference between yesterday evening and this
> morning was, that the machine was over night completely powerless (via
> socket outlet switch). Could this really be the reason? Let's wait and
> see if this is a persistent state ... .

No - it is not a persistent state. The ata errors are back again (in
3.1.10 w/ the above mentioned patch applied). It just isn't that easy
any more to trigger them. After a short time of intermission w/ power
off / on cycle, the error came up up again doing the first test copy.
This means: there must be something more broken.

If I revert the original culprit of all of the problems (block: remove
artifical max_hw_sectors cap), it is possible to increase max_sectors_kb
to 1024 - any higher value leads to ata or IO_PAGE_FAULTS sooner or later.

v4.3-rc4 isn't usable at all for me as long as is hangs the machine on
the necessary PCI passthrough for VMs (I need them).


Regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Oct. 9, 2015, 10:40 a.m. UTC | #14
On Thu, Oct 08, 2015 at 09:47:28PM +0200, Andreas Hartmann wrote:
> Got it. I attached the complete oops and the output of objdump.
> 
> Kernel was linux 4.3-rc4
> 
> 
> This time, the oops was caused by the second PCI card I'm passing
> through to another VM (the ath9k card worked fine this time - chance?).
> I added the lspci output to the attached file, too.

Okay, thanks, this makes more sense to me. It looks like you are
attaching a 32bit PCI device, which has an alias. This is definitly a
bug in the AMD IOMMU driver and I have an idea how to fix it. I'll look
into this after lunch.



	Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Oct. 9, 2015, 2:59 p.m. UTC | #15
On Fri, Oct 09, 2015 at 11:15:05AM +0200, Andreas Hartmann wrote:
> v4.3-rc4 isn't usable at all for me as long as is hangs the machine on
> the necessary PCI passthrough for VMs (I need them).

If the fix I just sent you works, could you please test this again with
a (patched) v4.3-rc4 kernel?


Thanks,

	Joerg

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 9, 2015, 5:46 p.m. UTC | #16
Hello Jörg,

On 10/09/2015 at 04:59 PM, Joerg Roedel wrote:
> On Fri, Oct 09, 2015 at 11:15:05AM +0200, Andreas Hartmann wrote:
>> v4.3-rc4 isn't usable at all for me as long as is hangs the machine on
>> the necessary PCI passthrough for VMs (I need them).
> 
> If the fix I just sent you works, could you please test this again with
> a (patched) v4.3-rc4 kernel?

Your IOMMU-patch works fine - but the ata-problem can be seen here, too.
Same behavior as with 4.1.10.


Thanks,
regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 11, 2015, 12:23 p.m. UTC | #17
On 10/09/2015 at 07:46 PM, Andreas Hartmann wrote:
> Hello Jörg,
> 
> On 10/09/2015 at 04:59 PM, Joerg Roedel wrote:
>> On Fri, Oct 09, 2015 at 11:15:05AM +0200, Andreas Hartmann wrote:
>>> v4.3-rc4 isn't usable at all for me as long as is hangs the machine on
>>> the necessary PCI passthrough for VMs (I need them).
>>
>> If the fix I just sent you works, could you please test this again with
>> a (patched) v4.3-rc4 kernel?
> 
> Your IOMMU-patch works fine - but the ata-problem can be seen here, too.
> Same behavior as with 4.1.10.
> 

Ok, this patch seems to fix the ata errors (I did a lot of tests until
now w/ v4.1.10 - but anyway I'm cautious):

http://thread.gmane.org/gmane.linux.scsi/104141/focus=104267

Would be nice to have it in all kernels (as stable patch too in 4.1.x).


Regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Hartmann Oct. 12, 2015, 12:07 p.m. UTC | #18
On 10/11/2015 at 02:23 PM, Andreas Hartmann wrote:
> On 10/09/2015 at 07:46 PM, Andreas Hartmann wrote:
>> Hello Jörg,
>>
>> On 10/09/2015 at 04:59 PM, Joerg Roedel wrote:
>>> On Fri, Oct 09, 2015 at 11:15:05AM +0200, Andreas Hartmann wrote:
>>>> v4.3-rc4 isn't usable at all for me as long as is hangs the machine on
>>>> the necessary PCI passthrough for VMs (I need them).
>>>
>>> If the fix I just sent you works, could you please test this again with
>>> a (patched) v4.3-rc4 kernel?
>>
>> Your IOMMU-patch works fine - but the ata-problem can be seen here, too.
>> Same behavior as with 4.1.10.
>>
> 
> Ok, this patch seems to fix the ata errors (I did a lot of tests until
> now w/ v4.1.10 - but anyway I'm cautious):
> 
> http://thread.gmane.org/gmane.linux.scsi/104141/focus=104267

-> Forget it - doesn't fix it.


Regards,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mikulas Patocka Oct. 12, 2015, 12:34 p.m. UTC | #19
On Fri, 9 Oct 2015, Andreas Hartmann wrote:

> Hello Jörg,
> 
> On 10/09/2015 at 04:59 PM, Joerg Roedel wrote:
> > On Fri, Oct 09, 2015 at 11:15:05AM +0200, Andreas Hartmann wrote:
> >> v4.3-rc4 isn't usable at all for me as long as is hangs the machine on
> >> the necessary PCI passthrough for VMs (I need them).
> > 
> > If the fix I just sent you works, could you please test this again with
> > a (patched) v4.3-rc4 kernel?
> 
> Your IOMMU-patch works fine - but the ata-problem can be seen here, too.
> Same behavior as with 4.1.10.

Could you try another ata disk? (copy the whole filesystem to it and run 
the same test)

It may be bug in disk's firmware.

Mikulas

Patch
diff mbox

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f82060e7..0002e79 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2465,6 +2465,7 @@  static dma_addr_t __map_single(struct device *dev,
 {
 	dma_addr_t offset = paddr & ~PAGE_MASK;
 	dma_addr_t address, start, ret;
+	phys_addr_t old_paddr = paddr;
 	unsigned int pages;
 	unsigned long align_mask = 0;
 	int i;
@@ -2521,6 +2522,8 @@  retry:
 		domain_flush_pages(&dma_dom->domain, address, size);
 
 out:
+	trace_printk("%s: mapped %llx paddr %llx size %zu\n",
+			dev_name(dev), address, old_paddr, size);
 	return address;
 
 out_unmap:
@@ -2532,6 +2535,9 @@  out_unmap:
 
 	dma_ops_free_addresses(dma_dom, address, pages);
 
+	trace_printk("%s: return DMA_ERROR_CODE paddr %llx size %zu\n",
+			dev_name(dev), old_paddr, size);
+
 	return DMA_ERROR_CODE;
 }
 
@@ -2628,6 +2634,8 @@  static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	spin_lock_irqsave(&domain->lock, flags);
 
+	trace_printk("%s: unmap dma_addr %llx size %zu\n",
+			dev_name(dev), dma_addr, size);
 	__unmap_single(domain->priv, dma_addr, size, dir);
 
 	domain_flush_complete(domain);
@@ -2683,9 +2691,13 @@  out:
 	return mapped_elems;
 unmap:
 	for_each_sg(sglist, s, mapped_elems, i) {
-		if (s->dma_address)
+		if (s->dma_address) {
+			trace_printk("%s: unmap dma_addr %llx size %u\n",
+					dev_name(dev), s->dma_address,
+					s->dma_length);
 			__unmap_single(domain->priv, s->dma_address,
 				       s->dma_length, dir);
+		}
 		s->dma_address = s->dma_length = 0;
 	}
 
@@ -2716,6 +2728,9 @@  static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 	spin_lock_irqsave(&domain->lock, flags);
 
 	for_each_sg(sglist, s, nelems, i) {
+	trace_printk("%s: unmap dma_addr %llx size %u\n",
+			dev_name(dev), s->dma_address, s->dma_length);
+
 		__unmap_single(domain->priv, s->dma_address,
 			       s->dma_length, dir);
 		s->dma_address = s->dma_length = 0;
@@ -2813,6 +2828,9 @@  static void free_coherent(struct device *dev, size_t size,
 
 	spin_lock_irqsave(&domain->lock, flags);
 
+	trace_printk("%s: unmap dma_addr %llx size %zu\n",
+			dev_name(dev), dma_addr, size);
+
 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
 	domain_flush_complete(domain);