If the handler took any action to log or deal with the error, set a bit
in mce->kflags so that the default handler on the end of the machine
check chain can see what has been done.
Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers
skip over errors already processed by CEC.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.com
return NOTIFY_DONE;
pfn = mce->addr >> PAGE_SHIFT;
- if (!memory_failure(pfn, 0))
+ if (!memory_failure(pfn, 0)) {
set_mce_nospec(pfn);
+ mce->kflags |= MCE_HANDLED_UC;
+ }
return NOTIFY_OK;
}
struct mce *mce = (struct mce *)data;
unsigned int entry;
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
mutex_lock(&mce_chrdev_read_mutex);
entry = mcelog->next;
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
mcelog->entry[entry].finished = 1;
+ mcelog->entry[entry].kflags = 0;
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
unlock:
mutex_unlock(&mce_chrdev_read_mutex);
+ mce->kflags |= MCE_HANDLED_MCELOG;
return NOTIFY_OK;
}
static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank);
- if (estatus == NULL)
+ if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE;
memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
}
out:
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EXTLOG;
+ return NOTIFY_OK;
}
static bool __init extlog_get_l1addr(void)
*/
acpi_nfit_ars_rescan(acpi_desc, 0);
}
+ mce->kflags |= MCE_HANDLED_NFIT;
break;
}
struct mem_ctl_info *mci;
i7_dev = get_i7core_dev(mce->socketid);
- if (!i7_dev)
+ if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE;
mci = i7_dev->mci;
i7core_check_error(mci, mce);
/* Advise mcelog that the errors were handled */
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block i7_mce_dec = {
unsigned int fam = x86_family(m->cpuid);
int ecc;
+ if (m->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
pr_emerg(HW_ERR "%s\n", decode_error_status(m));
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
err_code:
amd_decode_err_code(m->status & 0xffff);
- return NOTIFY_STOP;
+ m->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block amd_mce_dec_nb = {
return NOTIFY_DONE;
mci = pnd2_mci;
- if (!mci)
+ if (!mci || (mce->kflags & MCE_HANDLED_CEC))
return NOTIFY_DONE;
/*
pnd2_mce_output_error(mci, mce, &daddr);
/* Advice mcelog that the error were handled */
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block pnd2_mce_dec = {
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
/*
* Just let mcelog handle it if the error is
sbridge_mce_output_error(mci, mce);
/* Advice mcelog that the error were handled */
- return NOTIFY_STOP;
+ mce->kflags |= MCE_HANDLED_EDAC;
+ return NOTIFY_OK;
}
static struct notifier_block sbridge_mce_dec = {
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
/* ignore unless this is memory related with an address */
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
skx_mce_output_error(mci, mce, &res);
+ mce->kflags |= MCE_HANDLED_EDAC;
return NOTIFY_DONE;
}
/* We eat only correctable DRAM errors with usable addresses. */
if (mce_is_memory_error(m) &&
mce_is_correctable(m) &&
- mce_usable_address(m))
- if (!cec_add_elem(m->addr >> PAGE_SHIFT))
- return NOTIFY_STOP;
+ mce_usable_address(m)) {
+ if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
+ m->kflags |= MCE_HANDLED_CEC;
+ return NOTIFY_OK;
+ }
+ }
return NOTIFY_DONE;
}