@@ -665,6 +665,7 @@ struct hl_hints_range {
* @first_available_cq: first available CQ for the user.
* @user_interrupt_count: number of user interrupts.
* @user_dec_intr_count: number of decoder interrupts exposed to user.
+ * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
* @cache_line_size: device cache line size.
* @server_type: Server type that the ASIC is currently installed in.
* The value is according to enum hl_server_type in uapi file.
@@ -791,6 +792,7 @@ struct asic_fixed_properties {
u16 first_available_cq[HL_MAX_DCORES];
u16 user_interrupt_count;
u16 user_dec_intr_count;
+ u16 tpc_interrupt_id;
u16 cache_line_size;
u16 server_type;
u8 completion_queues_count;
@@ -1099,6 +1101,7 @@ struct hl_cq {
enum hl_user_interrupt_type {
HL_USR_INTERRUPT_CQ = 0,
HL_USR_INTERRUPT_DECODER,
+ HL_USR_INTERRUPT_TPC
};
/**
@@ -3148,6 +3151,7 @@ struct hl_reset_info {
* @user_interrupt: array of hl_user_interrupt. upon the corresponding user
* interrupt, driver will monitor the list of fences
* registered to this interrupt.
+ * @tpc_interrupt: single TPC interrupt for all TPCs.
* @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts.
* upon any user CQ interrupt, driver will monitor the
* list of fences registered to this common structure.
@@ -3332,6 +3336,7 @@ struct hl_device {
enum hl_asic_type asic_type;
struct hl_cq *completion_queue;
struct hl_user_interrupt *user_interrupt;
+ struct hl_user_interrupt tpc_interrupt;
struct hl_user_interrupt common_user_cq_interrupt;
struct hl_user_interrupt common_decoder_interrupt;
struct hl_cs **shadow_cs_queue;
@@ -102,6 +102,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.mme_master_slave_mode = prop->mme_master_slave_mode;
hw_ip.first_available_interrupt_id = prop->first_available_user_interrupt;
hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
+ hw_ip.tpc_interrupt_id = prop->tpc_interrupt_id;
hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
hw_ip.server_type = prop->server_type;
@@ -325,6 +325,21 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
}
}
+static void handle_tpc_interrupt(struct hl_device *hdev)
+{
+ u64 event_mask;
+ u32 flags;
+
+ event_mask = HL_NOTIFIER_EVENT_TPC_ASSERT |
+ HL_NOTIFIER_EVENT_USER_ENGINE_ERR |
+ HL_NOTIFIER_EVENT_DEVICE_RESET;
+
+ flags = HL_DRV_RESET_DELAY;
+
+ dev_err_ratelimited(hdev->dev, "Received TPC assert\n");
+ hl_device_cond_reset(hdev, flags, event_mask);
+}
+
/**
* hl_irq_handler_user_interrupt - irq handler for user interrupts
*
@@ -367,6 +382,9 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
+ case HL_USR_INTERRUPT_TPC:
+ handle_tpc_interrupt(hdev);
+ break;
default:
break;
}
@@ -679,6 +679,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
(num_sync_stream_queues * HL_RSVD_MONS);
prop->first_available_user_interrupt = USHRT_MAX;
+ prop->tpc_interrupt_id = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
@@ -2348,6 +2348,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
(num_sync_stream_queues * HL_RSVD_MONS);
prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
+ prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
@@ -3235,6 +3236,9 @@ static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i, j, k;
+ /* Initialize TPC interrupt */
+ HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
+
/* Initialize common user CQ interrupt */
HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
@@ -3892,6 +3896,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
return "gaudi2 completion";
case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
+ case GAUDI2_IRQ_NUM_TPC_ASSERT:
+ return "gaudi2 tpc assert";
case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
return "gaudi2 user completion";
default:
@@ -4004,6 +4010,15 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
goto free_event_irq;
}
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
+ rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
+ hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+ gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+ goto free_dec_irq;
+ }
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
user_irq_init_cnt < prop->user_interrupt_count;
i++, j++, user_irq_init_cnt++) {
@@ -4031,9 +4046,8 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
irq = pci_irq_vector(hdev->pdev, i);
free_irq(irq, &hdev->user_interrupt[j]);
}
-
- gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
-
+free_dec_irq:
+ gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
free_event_irq:
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
free_irq(irq, cq);
@@ -4065,6 +4079,8 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
synchronize_irq(irq);
}
+ synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
i++, j++) {
irq = pci_irq_vector(hdev->pdev, i);
@@ -4091,6 +4107,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
+ free_irq(irq, &hdev->tpc_interrupt);
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
@@ -410,9 +410,11 @@ enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_SHARED_DEC0_ABNRM,
GAUDI2_IRQ_NUM_SHARED_DEC1_NRM,
GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM,
+ GAUDI2_IRQ_NUM_DEC_LAST = GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM,
GAUDI2_IRQ_NUM_COMPLETION,
GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
+ GAUDI2_IRQ_NUM_TPC_ASSERT,
GAUDI2_IRQ_NUM_RESERVED_FIRST,
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_USER_FIRST,
@@ -472,6 +472,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
prop->first_available_user_interrupt = USHRT_MAX;
+ prop->tpc_interrupt_id = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
@@ -885,6 +885,7 @@ enum hl_server_type {
* application to use. Relevant for Gaudi2 and later.
* @device_mem_alloc_default_page_size: default page size used in device memory allocation.
* @revision_id: PCI revision ID of the ASIC.
+ * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host.
* @engine_core_interrupt_reg_addr: interrupt register address for engine core to use
* in order to raise events toward FW.
*/
@@ -922,7 +923,7 @@ struct hl_info_hw_ip_info {
__u32 reserved7;
__u8 reserved8;
__u8 revision_id;
- __u8 pad[2];
+ __u16 tpc_interrupt_id;
__u32 reserved9;
__u8 pad3[4];
__u64 engine_core_interrupt_reg_addr;