From 133f696e0a75c446b012822473d39b793c77ebc5 Mon Sep 17 00:00:00 2001 From: "Wang.Bin" Date: Sat, 14 Dec 2024 14:42:08 +0800 Subject: [PATCH 5/5] Clear Status of sensors when power on --- fault-monitor/fru-fault-monitor.cpp | 300 +++++++++++++++++++++------- fault-monitor/fru-fault-monitor.hpp | 62 +++++- fault-monitor/meson.build | 1 + 3 files changed, 282 insertions(+), 81 deletions(-) diff --git a/fault-monitor/fru-fault-monitor.cpp b/fault-monitor/fru-fault-monitor.cpp index b9f687c..1bc41aa 100644 --- a/fault-monitor/fru-fault-monitor.cpp +++ b/fault-monitor/fru-fault-monitor.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include extern "C" @@ -67,6 +69,8 @@ using ResourceNotFoundErr = using InvalidArgumentErr = sdbusplus::xyz::openbmc_project::Common::Error::InvalidArgument; +bool Add::isPoweredOn = false; + constexpr const char* discreteSensorNs = "No state defined"; enum AlarmType : uint64_t @@ -430,8 +434,10 @@ std::string getService(sdbusplus::bus_t& bus, const std::string& path) return mapperResponse.cbegin()->first; } +using AdditionalList = std::vector; + static void setDiscreteSensorStatusValue(sdbusplus::bus::bus& bus, - const std::string& sensorPath,const std::string& sensorData) + const std::string& sensorPath) { // IPMI spec Table 35-15 // For discrete reading sensors only. (Optional) @@ -445,13 +451,13 @@ static void setDiscreteSensorStatusValue(sdbusplus::bus::bus& bus, constexpr auto dbusProperties = "org.freedesktop.DBus.Properties"; constexpr auto statusInterface = "xyz.openbmc_project.Configuration.Status"; - unsigned long dataValue = strtoul(sensorData.c_str(), NULL, 16); statusValue = static_cast(statusValue + - (0x01 << ((dataValue & 0x0f0000) >> 16))); + sensorStatusRec[sensorPath].sensorStatus); auto method = bus.new_method_call(service, sensorPath.c_str(), dbusProperties, "Set"); std::variant value{(double)statusValue}; - std::cerr << "SEL:sensorData = " << sensorData << std::endl; + std::cerr << "SEL:sensorData = " << sensorPath << std::endl; + std::cerr << "SEL:sensorStatus = " << sensorStatusRec[sensorPath].sensorStatus << std::endl; method.append(statusInterface, "Status", value); try { @@ -576,18 +582,16 @@ void action(sdbusplus::bus_t& bus, const std::string& path, bool assert) return; } - auto pos = path.rfind("/"); - if (pos == std::string::npos) + std::string ledPath = ledGroups + path; + + if (assert) { - using namespace xyz::openbmc_project::Common; - report( - InvalidArgument::ARGUMENT_NAME("path"), - InvalidArgument::ARGUMENT_VALUE(path.c_str())); - return; + lg2::info("{PATH} is on", "PATH", ledPath); + } + else + { + lg2::info("{PATH} is off", "PATH", ledPath); } - auto unit = path.substr(pos + 1); - - std::string ledPath = ledGroups + unit + '_' + LED_FAULT; auto method = bus.new_method_call(service.c_str(), ledPath.c_str(), "org.freedesktop.DBus.Properties", "Set"); @@ -609,6 +613,24 @@ void action(sdbusplus::bus_t& bus, const std::string& path, bool assert) return; } +enum +{ + NC_L2L_SEVERITY = 0x00, + NC_L2H_SEVERITY = 0x01, + NC_H2L_SEVERITY = 0x06, + NC_H2H_SEVERITY = 0x07, + + CR_L2L_SEVERITY = 0x02, + CR_L2H_SEVERITY = 0x03, + CR_H2L_SEVERITY = 0x08, + CR_H2H_SEVERITY = 0x09, + + NR_L2L_SEVERITY = 0x04, + NR_L2H_SEVERITY = 0x05, + NR_H2L_SEVERITY = 0x0A, + NR_H2H_SEVERITY = 0x0B +}; + enum { UNSPECIFY_SENSOR_TYPE = 0x00, @@ -620,6 +642,41 @@ enum OEM_END_SENSOR_TYPE = 0x7F }; +void lightUpHealthLed(sdbusplus::bus::bus& bus, AlarmType ledStatus) +{ + switch (ledStatus) + { + case NORMAL: + action(bus, "status_ok", true); + action(bus, "status_non_critical", false); + action(bus, "status_critical", false); + action(bus, "status_non_recoverable", false); + break; + case NONCRITICAL: + action(bus, "status_ok", false); + action(bus, "status_non_critical", true); + action(bus, "status_critical", false); + action(bus, "status_non_recoverable", false); + break; + case CRITICAL: + action(bus, "status_ok", false); + action(bus, "status_non_critical", false); + action(bus, "status_critical", true); + action(bus, "status_non_recoverable", false); + break; + case NONRECOV: + action(bus, "status_ok", false); + action(bus, "status_non_critical", false); + action(bus, "status_critical", false); + action(bus, "status_non_recoverable", true); + break; + default: + lg2::error("ERROR: Invalid led status {STATUS}", "STATUS", + static_cast(ledStatus)); + break; + } +} + inline void recordAlarmStatus(const std::string& sensorPath, AlarmType alarmStatus, int evOffset, int ed, bool assertAlarm, @@ -834,12 +891,34 @@ void fillSensorDescription(const std::string& sensorPath, int eventOffset) //} } +// void parseThresholdSelSeverity(sdbusplus::bus::bus& bus, +// const std::string& sensorPath, +// const SensorType& sensorType, +// const std::string& eventData, +// const std::string& eventDir, +// const AlarmType& severity, +// const bool& assertToAlarm) +// { +// std::string eventData1 = eventData.substr(0, 2); +// int ev = std::stol(eventData1, nullptr, 16); +// int ed = std::stol(eventDir); + +// ev = (ev & SENSOR_EVENT_DATA1_EVENT_OFFSET); + +// recordAlarmStatus(sensorPath, severity, ev, ed, assertToAlarm, false); + +// sensorStatusRec[sensorPath].eventType = THRESHOLD_SENSOR_TYPE; +// sensorStatusRec[sensorPath].sensorType = sensorType; + +// } void parseSpecificSensorSelSeverity(sdbusplus::bus::bus& bus, const std::string& sensorPath, const SensorType& sensorType, const std::string& eventData, - const std::string& eventDir) + const std::string& eventDir, + const AlarmType& severity, + const bool& assertAlarm) { std::string eventData1 = eventData.substr(0, 2); int ev = std::stol(eventData1, nullptr, 16); @@ -854,6 +933,8 @@ void parseSpecificSensorSelSeverity(sdbusplus::bus::bus& bus, { case SYSTEM_EVENT_LOG_CLEARED: { + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + false); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; break; } @@ -872,6 +953,8 @@ void parseSpecificSensorSelSeverity(sdbusplus::bus::bus& bus, case SLEEP_BUTTON_PRESSED: case RESET_BUTTON_PRESSED: { + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + false); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; break; } @@ -884,6 +967,8 @@ void parseSpecificSensorSelSeverity(sdbusplus::bus::bus& bus, } case SENSOR_TYPE_POWER_SUPPLY: { + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + true); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; break; } @@ -894,6 +979,11 @@ void parseSpecificSensorSelSeverity(sdbusplus::bus::bus& bus, case ACPI_S0_STATE: case ACPI_S5_STATE: { + // The S0 State and S5 State of ACPI State will only exist + // one, so the passed in arg6: isGenericDiscreteSensor is + // true. + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, + assertAlarm, true); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; break; } @@ -910,26 +1000,34 @@ void parseSpecificSensorSelSeverity(sdbusplus::bus::bus& bus, } case SENSOR_TYPE_MEMORY: { + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + false); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; setDimmLEDState(bus, sensorPath, "FAULT"); break; } case SENSOR_TYPE_PROCESSOR: { + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + false); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; break; } case SENSOR_TYPE_WATCHDOG_2: { + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + false); sensorStatusRec[sensorPath].alarmStatus = alarmStatus; break; } default: + recordAlarmStatus(sensorPath, severity, ev1Offset, ed, assertAlarm, + false); break; } sensorStatusRec[sensorPath].eventType = SPECIFIC_SENSOR_TYPE; sensorStatusRec[sensorPath].sensorType = sensorType; - setDiscreteSensorStatusValue(bus, sensorPath, eventData); + setDiscreteSensorStatusValue(bus, sensorPath); if(0) { fillSensorDescription(sensorPath, ev1Offset); @@ -992,7 +1090,6 @@ using EventAlarm = std::unordered_map; // 'Status'. See: initDiscreteSensorStatusInfo() and sensorMatchHandler(). // If true, it means that the 'Status' has been initialized // and the status sensor object on the entity-manager has been created. - // if (sensorStatusRec.find(sensorPath) == sensorStatusRec.end()) // { // lg2::error( @@ -1034,17 +1131,14 @@ using EventAlarm = std::unordered_map; sensorStatusRec[sensorPath].eventType = eventType; sensorStatusRec[sensorPath].sensorType = sensorType; - - setDiscreteSensorStatusValue(bus, sensorPath, eventData); + setDiscreteSensorStatusValue(bus, sensorPath); //updateSensorStatus(bus, sensorPath); - //controlHealthLed(bus); } void Add::filterSEL(sdbusplus::bus::bus& bus, const std::string& path) { - //AlarmType severity = NORMAL; auto method = bus.new_method_call(logService, path.c_str(), "org.freedesktop.DBus.Properties", "Get"); method.append("xyz.openbmc_project.Logging.Entry"); @@ -1072,7 +1166,7 @@ void Add::filterSEL(sdbusplus::bus::bus& bus, const std::string& path) uint8_t typeCode = 0; uint8_t sensorType = 0; - std::string sensorData, sensorPath, eventDir, assertAlarm; + std::string sensorData, sensorPath, eventDir, assertAlarm, confSeverityStr; bool assertToAlarm; AlarmType confSeverity = AlarmType::NORMAL; for (auto& item : additionalDatas) @@ -1153,7 +1247,7 @@ void Add::filterSEL(sdbusplus::bus::bus& bus, const std::string& path) // discrete parseSpecificSensorSelSeverity( bus, sensorPath, static_cast(sensorType), sensorData, - eventDir); + eventDir, confSeverity, assertToAlarm); }else if (typeCode >= GENERIC_START_SENSOR_TYPE && typeCode <= GENERIC_END_SENSOR_TYPE) { @@ -1166,8 +1260,6 @@ void Add::filterSEL(sdbusplus::bus::bus& bus, const std::string& path) return; } - //controlHealthLed(bus); - return; } @@ -1479,6 +1571,100 @@ void Add::dimmLEDMatchHandler(sdbusplus::message_t& msg) updateSysHealthLED(dimmSensorLEDStatus); } +void Add::clearCPUStatusAlarm(SensorStatusInfo& statusInfo) +{ + constexpr std::array cpuStatToClear{ + IPMI_CPU_IERR, IPMI_CPU_FRB2_HANG, IPMI_CPU_CONFIG_ERROR, + IPMI_CPU_THROTTLED, IPMI_CPU_DISABLED, IPMI_CPU_MCE, + IPMI_CPU_CMCE}; + + for (const auto& offset : cpuStatToClear) + { + statusInfo.sensorStatus &= ~(1 << offset); + statusInfo.alarmStatus &= ~(ALARM_ALL_BITS << (offset * ALARM_BIT_NUM)); + } +} + +void Add::clearMEMStatusAlarm(SensorStatusInfo& statusInfo) +{ + constexpr std::array memStatToClear{ + MEMORY_OFFSET_CORRECTABLE_ECC, MEMORY_OFFSET_UNCORRECTABLE_ECC, + MEMORY_OFFSET_MEMORY_DEVICE_DISABLED, + MEMORY_OFFSET_CORRECTABLE_ECC_LOG_LIMIT_REACHED, + MEMORY_OFFSET_CONFIGURATION_ERROR}; + + for (const auto& offset : memStatToClear) + { + statusInfo.sensorStatus &= ~(1 << offset); + statusInfo.alarmStatus &= ~(ALARM_ALL_BITS << (offset * ALARM_BIT_NUM)); + } +} + +void Add::clearPCIEStatusAlarm(SensorStatusInfo& statusInfo) +{ + constexpr std::array pcieStatToClear{ + CRITICAL_INTERRUPT_OFFSET_PCI_PERR, + CRITICAL_INTERRUPT_OFFSET_PCI_SERR, + CRITICAL_INTERRUPT_OFFSET_BUS_CORRECTABLE_ERROR, + CRITICAL_INTERRUPT_OFFSET_BUS_UNCORRECTABLE_ERRPR, + CRITICAL_INTERRUPT_OFFSET_BUS_FATAL_ERROR, + CRITICAL_INTERRUPT_OFFSET_BUS_DEGRADED}; + + for (const auto& offset : pcieStatToClear) + { + statusInfo.sensorStatus &= ~(1 << offset); + statusInfo.alarmStatus &= ~(ALARM_ALL_BITS << (offset * ALARM_BIT_NUM)); + } +} + +void Add::clearFirmwareProgressAlarm(SensorStatusInfo& statusInfo) +{ + constexpr std::array statusToClear{ + SYSTEM_FIRMWARE_PROGRESS_OFFSET_ERROR, + SYSTEM_FIRMWARE_PROGRESS_OFFSET_HANG}; + + for (const auto& offset : statusToClear) + { + statusInfo.sensorStatus &= ~(1 << offset); + statusInfo.alarmStatus &= ~(ALARM_ALL_BITS << (offset * ALARM_BIT_NUM)); + } +} + +void Add::deassertOnPowerReset(void) +{ + for (auto& [sensorObj, statusInfo] : sensorStatusRec) + { + if (statusInfo.eventType != SPECIFIC_SENSOR_TYPE) + { + continue; + } + + if (SENSOR_TYPE_PROCESSOR == statusInfo.sensorType) + { + clearCPUStatusAlarm(statusInfo); + } + else if (SENSOR_TYPE_MEMORY == statusInfo.sensorType) + { + clearMEMStatusAlarm(statusInfo); + } + else if (SENSOR_TYPE_CRITICAL_INTERRUPT == statusInfo.sensorType) + { + clearPCIEStatusAlarm(statusInfo); + } + else if (SENSOR_TYPE_SYSTEM_FIRMWARE_PROGRESS == statusInfo.sensorType) + { + clearFirmwareProgressAlarm(statusInfo); + } + else + { + continue; + } + + setDiscreteSensorStatusValue(bus, sensorObj); + } + +} + void getLoggingSubTree(sdbusplus::bus_t& bus, MapperResponseType& subtree) { auto depth = 0; @@ -1513,60 +1699,22 @@ void Add::createAssociation(sdbusplus::asio::object_server& objectServer) faultMonitorAssociation->initialize(); } -void Add::processExistingCallouts(sdbusplus::bus_t& bus) +void Add::initPowerStatus() { - MapperResponseType mapperResponse; - - getLoggingSubTree(bus, mapperResponse); - if (mapperResponse.empty()) - { - // No errors to process. - return; - } - - for (const auto& elem : mapperResponse) + try { - auto method = bus.new_method_call( - elem.second.begin()->first.c_str(), elem.first.c_str(), - "org.freedesktop.DBus.Properties", "Get"); - method.append("xyz.openbmc_project.Association.Definitions"); - method.append("Associations"); - auto reply = bus.call(method); - if (reply.is_method_error()) - { - // do not stop, continue with next elog - lg2::error("Error in getting associations"); - continue; - } - - std::variant assoc; - try - { - reply.read(assoc); - } - catch (const sdbusplus::exception_t& e) - { - lg2::error( - "Failed to parse existing callouts associations message, ERROR = {ERROR}", - "ERROR", e); - continue; - } - auto& assocs = std::get(assoc); - if (assocs.empty()) - { - // no associations, skip - continue; - } - - for (const auto& item : assocs) - { - if (std::get<1>(item).compare(CALLOUT_REV_ASSOCIATION) == 0) - { - removeWatches.emplace_back( - std::make_unique(bus, std::get<2>(item))); - action(bus, std::get<2>(item), true); - } + auto ret = dBusHandler.getProperty(hostPath, hostInterface, + hostProperty); + auto status = std::get(ret); + if (status.size() >= running.size()){ + isPoweredOn = (status.compare(status.size() - running.size(), + running.size(), running.data()) == 0); } + } + catch (const sdbusplus::exception::exception& e) + { + log("Failed to get power status", + entry("ERROR=%s", e.what())); } } diff --git a/fault-monitor/fru-fault-monitor.hpp b/fault-monitor/fru-fault-monitor.hpp index 0dacc0c..185693b 100644 --- a/fault-monitor/fru-fault-monitor.hpp +++ b/fault-monitor/fru-fault-monitor.hpp @@ -2,7 +2,10 @@ #include "config.h" +#include "../utils.hpp" + #include +#include #include #include #include @@ -26,6 +29,13 @@ namespace fault { namespace monitor { +constexpr auto hostBusname = "xyz.openbmc_project.State.Host"; +constexpr auto hostInterface = "xyz.openbmc_project.State.Host"; +constexpr auto hostPath = "/xyz/openbmc_project/state/host0"; +constexpr auto hostProperty = "CurrentHostState"; +constexpr std::string_view running = "Running"; + +using namespace phosphor::led::utils; constexpr auto faultLedPath = "/xyz/openbmc_project/led/groups/status_fault"; constexpr auto dimmfaultLedPath = "/xyz/openbmc_project/led/groups/dimm_fault"; constexpr auto okLedPath = "/xyz/openbmc_project/led/groups/status_ok"; @@ -155,6 +165,19 @@ class Add "/xyz/openbmc_project/logging"), std::bind(std::mem_fn(&Add::created), this, std::placeholders::_1)), + powerMatch(bus, + sdbusplus::bus::match::rules::propertiesChanged( + hostPath, hostInterface), + [this](sdbusplus::message::message& message) { + std::string objectName; + std::map> values; + message.read(objectName, values); + auto findState = values.find(hostProperty); + if (findState != values.end()) + { + deassertOnPowerReset(); + } + }), dimmLEDMatch(bus, "type='signal',member='PropertiesChanged',path_namespace='" + std::string(dimmSensorPath) + "',arg0namespace='" + @@ -165,7 +188,7 @@ class Add createAssociation(objectServer); dimmledInit(); - processExistingCallouts(bus); + initPowerStatus(); } @@ -174,12 +197,45 @@ class Add /** @brief sdbusplus signal match for fault created */ sdbusplus::bus::match_t matchCreated; + /** Match for host power monitor */ + sdbusplus::bus::match::match powerMatch; + sdbusplus::bus::match_t dimmLEDMatch; std::vector> removeWatches; std::shared_ptr faultMonitorAssociation; + /** @brief Initialize the host power status*/ + void initPowerStatus(); + + /** @brief clear CPU alarm status + */ + void clearCPUStatusAlarm(SensorStatusInfo& statusInfo); + + /** @brief clear MEM alarm status + */ + void clearMEMStatusAlarm(SensorStatusInfo& statusInfo); + + /** @brief clear PCIE alarm status + */ + void clearPCIEStatusAlarm(SensorStatusInfo& statusInfo); + + /** @brief clear FIRMWARE alarm status + */ + void clearFirmwareProgressAlarm(SensorStatusInfo& statusInfo); + + /** @brief deassert some alarm bits when the host power restarts. + */ + void deassertOnPowerReset(void); + + /* DBusHandler class handles the D-Bus operations */ + DBusHandler dBusHandler; + + public: + /** @brief Indicate if the host status is power on */ + static bool isPoweredOn; + /** @brief Callback function for fru fault created * @param[in] msg - Data associated with subscribed signal */ @@ -201,10 +257,6 @@ class Add void createAssociation(sdbusplus::asio::object_server& objectServer); - /** @brief This function process all callouts at application start - * @param[in] bus - The Dbus bus object - */ - void processExistingCallouts(sdbusplus::bus_t& bus); }; /** @class Remove diff --git a/fault-monitor/meson.build b/fault-monitor/meson.build index 7ced362..a36ef80 100644 --- a/fault-monitor/meson.build +++ b/fault-monitor/meson.build @@ -10,6 +10,7 @@ if get_option('monitor-operational-status').enabled() else fault_monitor_sources += [ 'fru-fault-monitor.cpp', + '../utils.cpp', ] endif -- 2.25.1