Initial commit
This commit is contained in:
Executable
+351
@@ -0,0 +1,351 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script monitors S0/S1 GPIO fault and detects errors from CPUs
|
||||
#
|
||||
# So far, there is no specification describes the behavior of LED when an error (a pattern is detected) occurs.
|
||||
# So when detecting a pattern, we simply set the gpio fault flag and turn on the SYS LED.
|
||||
#
|
||||
# The Parttern will in the format:
|
||||
# <minor_byte> <quite_gap_1second> <major_byte> <stop_condition_low_for_3seconds>
|
||||
#
|
||||
# Ex: pattern minor_byte=0x03, major_byte=0x02, you will see the waveform like
|
||||
# _1010100...(quite gap, low for 1 second)..0111111111000000000111111111110000000000...(stop condition, low for 3 seconds)..
|
||||
#
|
||||
# Usage: <app_name> <socket 0/1>
|
||||
#
|
||||
# shellcheck source=/dev/null
|
||||
source /usr/sbin/gpio-lib.sh
|
||||
|
||||
# global variables
|
||||
error_flag='/tmp/gpio_fault'
|
||||
|
||||
# the command "cat /sys/class/gpio/gpio"$gpio_Id"/value" itself, takes 10ms~35ms to complete, depends on CPU loading
|
||||
polling_minor_byte_rate=0
|
||||
polling_major_byte_rate=200000
|
||||
polling_rate=$polling_minor_byte_rate
|
||||
|
||||
# the mount of low to ensure that already get out of minor_byte and is in quite gap
|
||||
# this value depends on the polling_minor_byte_rate
|
||||
max_low_in_minor_byte=9
|
||||
|
||||
# the mount of low to ensure that already get out of major_byte and is in stop condition
|
||||
# this value depends on the polling_major_byte_rate
|
||||
max_low_in_major_byte=9
|
||||
|
||||
max_low=$max_low_in_minor_byte
|
||||
|
||||
# state machines:
|
||||
# detecting_minor_byte=0
|
||||
# detecting_major_byte=1
|
||||
curr_state=0
|
||||
|
||||
minor_byte=0
|
||||
major_byte=0
|
||||
|
||||
gpio_status=0
|
||||
|
||||
socket=$1
|
||||
|
||||
socket1_present=151
|
||||
socket1_status=1
|
||||
|
||||
S0_fault_gpio='s0-fault-alert'
|
||||
S1_fault_gpio='s1-fault-alert'
|
||||
|
||||
map_event_name() {
|
||||
case $major_byte in
|
||||
2)
|
||||
event_major="FAULT_LED_BOOT_ERROR"
|
||||
case $minor_byte in
|
||||
1)
|
||||
event_minor="SOC_BOOTDEV_INIT_SEC_ERROR"
|
||||
;;
|
||||
2)
|
||||
event_minor="SECJMP_FAIL_ERROR"
|
||||
;;
|
||||
3)
|
||||
event_minor="UART_INIT_WARN"
|
||||
;;
|
||||
4)
|
||||
event_minor="UART_TX_WARN"
|
||||
;;
|
||||
5)
|
||||
event_minor="SOC_ROMPATCH_BAD_ERROR"
|
||||
;;
|
||||
6)
|
||||
event_minor="SOC_ROMPATCH_RANGE_ERROR"
|
||||
;;
|
||||
7)
|
||||
event_minor="SPI_INIT_ERROR"
|
||||
;;
|
||||
8)
|
||||
event_minor="SPI_TX_ERROR"
|
||||
;;
|
||||
9)
|
||||
event_minor="SPINOR_UNKNOW_DEVICE_WARN"
|
||||
;;
|
||||
10)
|
||||
event_minor="EEPROM_BAD_NVP_HEADER_WARN"
|
||||
;;
|
||||
11)
|
||||
event_minor="EEPROM_BAD_NVP_FIELD_WARN"
|
||||
;;
|
||||
12)
|
||||
event_minor="EEPROM_BAD_CHECKSUM_ERROR_WARN"
|
||||
;;
|
||||
13)
|
||||
event_minor="I2C_DMA_ERROR"
|
||||
;;
|
||||
14)
|
||||
event_minor="I2C_TIMEOUT_ERROR"
|
||||
;;
|
||||
15)
|
||||
event_minor="SOC_BOOTDEV_SPI_LOAD_ERROR"
|
||||
;;
|
||||
16)
|
||||
event_minor="SOC_BOOTDEV_AUTHENTICATION_ERROR"
|
||||
;;
|
||||
17)
|
||||
event_minor="PCP_POWERUP_FAILED"
|
||||
;;
|
||||
18)
|
||||
event_minor="PCP_POWERDOWN_FAILED"
|
||||
;;
|
||||
19)
|
||||
event_minor="CPUPLL_INIT_FAILED"
|
||||
;;
|
||||
20)
|
||||
event_minor="MESHPLL_INIT_FAILED"
|
||||
;;
|
||||
*)
|
||||
event_minor="NOT_SUPPORT"
|
||||
esac
|
||||
;;
|
||||
3)
|
||||
event_major="FAULT_LED_FW_LOAD_ERROR"
|
||||
case $minor_byte in
|
||||
9)
|
||||
event_minor="LFS_ERROR"
|
||||
;;
|
||||
*)
|
||||
event_minor="NOT_SUPPORT"
|
||||
esac
|
||||
;;
|
||||
4)
|
||||
event_major="FAULT_LED_SECURITY_ERROR"
|
||||
case $minor_byte in
|
||||
1)
|
||||
event_minor="SEC_INVALID_KEY_CERT"
|
||||
;;
|
||||
2)
|
||||
event_minor="SEC_INVALID_CONT_CERT"
|
||||
;;
|
||||
3)
|
||||
event_minor="SEC_INVALID_ROOT_KEY"
|
||||
;;
|
||||
4)
|
||||
event_minor="SEC_INVALID_SECPRO_KEY"
|
||||
;;
|
||||
5)
|
||||
event_minor="SEC_INVALID_KEY_CERT_SIG"
|
||||
;;
|
||||
6)
|
||||
event_minor="SEC_INVALID_CONT_CERT_SIG"
|
||||
;;
|
||||
7)
|
||||
event_minor="SEC_INVALID_IMAGE_HASH"
|
||||
;;
|
||||
8)
|
||||
event_minor="SEC_INVALID_PRI_VERSION"
|
||||
;;
|
||||
9)
|
||||
event_minor="SEC_HUK_MISMATCH"
|
||||
;;
|
||||
10)
|
||||
event_minor="SEC_FUSE_BLOW_CERT_WITHOUT_SPECIAL_BOOT_PIN"
|
||||
;;
|
||||
11)
|
||||
event_minor="SEC_INVALID_CERT_SUBTYPE_STRUCT"
|
||||
;;
|
||||
12)
|
||||
event_minor="SEC_TMMCFG_FAIL"
|
||||
;;
|
||||
13)
|
||||
event_minor="SEC_INVALID_LCS_FROM_EFUSE"
|
||||
;;
|
||||
14)
|
||||
event_minor="SEC_EFUSE_WRITE_FAILED"
|
||||
;;
|
||||
15)
|
||||
event_minor="SEC_INVALID_CERT_VALUE"
|
||||
;;
|
||||
16)
|
||||
event_minor="SEC_INVALID_CERT_VERSION"
|
||||
;;
|
||||
*)
|
||||
event_minor="NOT_SUPPORT"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
5)
|
||||
event_major="FAULT_LED_EXCEPTION_ERROR"
|
||||
case $minor_byte in
|
||||
1)
|
||||
event_minor="KERNEL_EXCEPTION_UNKNOWN_REASON_ERROR"
|
||||
;;
|
||||
2)
|
||||
event_minor="KERNEL_EXCEPTION_HARD_FAULT_ERROR"
|
||||
;;
|
||||
3)
|
||||
event_minor="KERNEL_EXCEPTION_BUS_FAULT_ERROR"
|
||||
;;
|
||||
4)
|
||||
event_minor="KERNEL_EXCEPTION_MEMMANAGE_FAULT_ERROR"
|
||||
;;
|
||||
5)
|
||||
event_minor="KERNEL_EXCEPTION_USAGE_FAULT_ERROR"
|
||||
;;
|
||||
*)
|
||||
event_minor="NOT_SUPPORT"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*)
|
||||
event_major="NOT_SUPPORT"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
set_unset_gpio_fault_flag() {
|
||||
if [ ! -f $error_flag ] && [ "$1" == 1 ] ; then
|
||||
touch $error_flag
|
||||
elif [ -f $error_flag ] && [ "$1" == 0 ]; then
|
||||
rm $error_flag
|
||||
fi
|
||||
}
|
||||
|
||||
toggle_state() {
|
||||
if [ "$curr_state" == 0 ]; then
|
||||
curr_state=1
|
||||
polling_rate=$polling_major_byte_rate
|
||||
else
|
||||
curr_state=0
|
||||
polling_rate=$polling_minor_byte_rate
|
||||
map_event_name
|
||||
echo "detected major_byte=$event_major, minor_byte=$event_minor"
|
||||
set_unset_gpio_fault_flag 1
|
||||
fi
|
||||
}
|
||||
|
||||
save_pulse_of_byte() {
|
||||
if [ "$curr_state" == 0 ]; then
|
||||
minor_byte=$1
|
||||
#echo "minor_byte=$1"
|
||||
else
|
||||
major_byte=$1
|
||||
#echo "major_byte=$1"
|
||||
fi
|
||||
}
|
||||
|
||||
# we do not care the pulse is 50ms or 500ms, what we care is that the number of high pulses
|
||||
cnt_falling_edge_in_byte() {
|
||||
local cnt_falling_edge=0
|
||||
local cnt_low=0
|
||||
|
||||
local prev=0
|
||||
local curr=0
|
||||
|
||||
while true
|
||||
do
|
||||
prev=$curr
|
||||
curr=$gpio_status
|
||||
# count the falling edges, if they occur, just reset cnt_low
|
||||
if [ "$prev" == 1 ] && [ "$curr" == 0 ]; then
|
||||
cnt_falling_edge=$(( cnt_falling_edge + 1 ))
|
||||
cnt_low=0
|
||||
continue
|
||||
# check if we are in the quite gap or stop condition
|
||||
elif [ "$prev" == 0 ] && [ "$curr" == 0 ]; then
|
||||
cnt_low=$(( cnt_low + 1 ))
|
||||
if [ "$cnt_low" == "$max_low" ]; then
|
||||
save_pulse_of_byte "$cnt_falling_edge"
|
||||
toggle_state
|
||||
break
|
||||
fi
|
||||
fi
|
||||
usleep $polling_rate
|
||||
gpio_status=$(cat /sys/class/gpio/gpio"$gpio_Id"/value)
|
||||
done
|
||||
}
|
||||
|
||||
gpio_config_input() {
|
||||
echo "$gpio_Id" > /sys/class/gpio/export
|
||||
echo "in" > /sys/class/gpio/gpio"${gpio_Id}"/direction
|
||||
}
|
||||
|
||||
gpio_number() {
|
||||
local offset
|
||||
local gpioPin
|
||||
local str
|
||||
|
||||
str=$(gpiofind "$1")
|
||||
if [ "$?" == '1' ]; then
|
||||
echo -1
|
||||
else
|
||||
gpioid=$(echo "$str"|cut -c 9)
|
||||
offset=$(echo "$str"|cut -d " " -f 2)
|
||||
gpioPin=$(("$offset" + ${AST2600_GPIO_BASE[$gpioid]}))
|
||||
echo "$gpioPin"
|
||||
fi
|
||||
}
|
||||
|
||||
init_sysfs_fault_gpio() {
|
||||
gpio_Id=$(gpio_number "$fault_gpio")
|
||||
if [ "$gpio_Id" == "-1" ]; then
|
||||
echo "Invalid GPIO number"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -d /sys/class/gpio/gpio"$gpio_Id" ]; then
|
||||
return
|
||||
fi
|
||||
gpio_config_input "$gpio_Id"
|
||||
}
|
||||
|
||||
# init
|
||||
if [ "$socket" == "0" ]; then
|
||||
fault_gpio="$S0_fault_gpio"
|
||||
else
|
||||
socket1_status=$(gpioget 0 "$socket1_present")
|
||||
if [ "$socket1_status" == 1 ]; then
|
||||
echo "socket 1 not present"
|
||||
exit 0
|
||||
fi
|
||||
fault_gpio=$S1_fault_gpio
|
||||
fi
|
||||
|
||||
init_sysfs_fault_gpio
|
||||
|
||||
# daemon start
|
||||
while true
|
||||
do
|
||||
# detect when pattern starts
|
||||
if [ "$gpio_status" == 1 ]; then
|
||||
if [ "$curr_state" == 0 ]; then
|
||||
# detecting minor byte, set up minor byte variables
|
||||
max_low=$max_low_in_minor_byte
|
||||
polling_rate=$polling_minor_byte_rate
|
||||
else
|
||||
# detecting major byte, set up major byte variables
|
||||
max_low=$max_low_in_major_byte
|
||||
polling_rate=$polling_major_byte_rate
|
||||
fi
|
||||
# now, there is something on gpio, check if that is a byte pattern
|
||||
cnt_falling_edge_in_byte
|
||||
fi
|
||||
|
||||
usleep $polling_rate
|
||||
gpio_status=$(cat /sys/class/gpio/gpio"$gpio_Id"/value)
|
||||
done
|
||||
|
||||
exit 1
|
||||
+218
@@ -0,0 +1,218 @@
|
||||
#!/bin/bash
|
||||
# This script monitors fan, over-temperature, PSU, CPU/SCP failure and update fault LED status
|
||||
|
||||
# shellcheck disable=SC2004
|
||||
# shellcheck source=/dev/null
|
||||
source /usr/sbin/gpio-lib.sh
|
||||
|
||||
# common variables
|
||||
on=1
|
||||
off=0
|
||||
|
||||
overtemp_fault_flag='/tmp/fault_overtemp'
|
||||
|
||||
# gpio fault
|
||||
gpio_fault="false"
|
||||
gpio_fault_flag="/tmp/gpio_fault"
|
||||
|
||||
# fan variables
|
||||
fan_failed="false"
|
||||
fan_failed_flag='/tmp/fan_failed'
|
||||
|
||||
# PSU variables
|
||||
psu_failed="false"
|
||||
psu_bus=2
|
||||
psu0_addr=0x58
|
||||
psu1_addr=0x59
|
||||
status_word_cmd=0x79
|
||||
# Following the PMBus Specification
|
||||
# Bit[1]: CML faults
|
||||
# Bit[2]: Over temperature faults
|
||||
# Bit[3]: Under voltage faults
|
||||
# Bit[4]: Over current faults
|
||||
# Bit[5]: Over voltage fault
|
||||
# Bit[10]: Fan faults
|
||||
psu_fault_bitmask=0x43e
|
||||
|
||||
# led variables
|
||||
fan_fault_led_status=$off
|
||||
psu_fault_led_status=$off
|
||||
led_bus=15
|
||||
led_addr=0x22
|
||||
led_port0_config=0x06
|
||||
led_port0_output=0x02
|
||||
|
||||
# functions declaration
|
||||
check_fan_failed() {
|
||||
if [[ -f $fan_failed_flag ]]; then
|
||||
fan_failed="true"
|
||||
else
|
||||
fan_failed="false"
|
||||
fi
|
||||
}
|
||||
|
||||
turn_on_off_fan_fault_led() {
|
||||
# Control fan fault led via CPLD's I2C at slave address 0x22, I2C16.
|
||||
# Get Port0 value
|
||||
p0_val=$(i2cget -f -y $led_bus $led_addr $led_port0_config)
|
||||
p0_val=$(("$p0_val" & ~1))
|
||||
# Config CPLD's IOepx Port0[0] from input to output, clear IOepx Port0[0].
|
||||
i2cset -f -y $led_bus $led_addr $led_port0_config $p0_val
|
||||
|
||||
# Get led value
|
||||
led_st=$(i2cget -f -y $led_bus $led_addr $led_port0_output)
|
||||
|
||||
if [ "$1" == $on ]; then
|
||||
led_st=$(("$led_st" | 1))
|
||||
else
|
||||
led_st=$(("$led_st" & ~1))
|
||||
fi
|
||||
|
||||
# Turn on/off fan fault led
|
||||
i2cset -f -y $led_bus $led_addr $led_port0_output $led_st
|
||||
}
|
||||
|
||||
turn_on_off_psu_fault_led() {
|
||||
# Control psu fault led via CPLD's I2C at slave address 0x22, I2C16.
|
||||
# Get Port1 value
|
||||
p1_val=$(i2cget -f -y $led_bus $led_addr $led_port0_config)
|
||||
p1_val=$(("$p1_val" & ~2))
|
||||
# Config CPLD's IOepx Port0[1] from input to output, clear IOepx Port0[1].
|
||||
i2cset -f -y $led_bus $led_addr $led_port0_config $p1_val
|
||||
|
||||
# Get led value
|
||||
led_st=$(i2cget -f -y $led_bus $led_addr $led_port0_output)
|
||||
if [ "$1" == $on ]; then
|
||||
led_st=$(("$led_st" | 2))
|
||||
else
|
||||
led_st=$(("$led_st" & ~2))
|
||||
fi
|
||||
|
||||
# Turn on/off psu fault led
|
||||
i2cset -f -y $led_bus $led_addr $led_port0_output $led_st
|
||||
}
|
||||
|
||||
control_fan_fault_led() {
|
||||
if [ "$fan_failed" == "true" ]; then
|
||||
if [ "$fan_fault_led_status" == $off ]; then
|
||||
turn_on_off_fan_fault_led $on
|
||||
fan_fault_led_status=$on
|
||||
fi
|
||||
else
|
||||
if [ "$fan_fault_led_status" == $on ]; then
|
||||
turn_on_off_fan_fault_led $off
|
||||
fan_fault_led_status=$off
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
check_psu_failed() {
|
||||
local psu0_presence
|
||||
local psu1_presence
|
||||
local psu0_value
|
||||
local psu1_value
|
||||
|
||||
psu0_presence=$(gpio_name_get presence-ps0)
|
||||
psu0_failed="true"
|
||||
if [ "$psu0_presence" == "0" ]; then
|
||||
# PSU0 presence, monitor the PSUs using pmbus, check the STATUS_WORD
|
||||
psu0_value=$(i2cget -f -y $psu_bus $psu0_addr $status_word_cmd w)
|
||||
psu0_bit_fault=$(($psu0_value & $psu_fault_bitmask))
|
||||
if [ "$psu0_bit_fault" == "0" ]; then
|
||||
psu0_failed="false"
|
||||
fi
|
||||
fi
|
||||
|
||||
psu1_presence=$(gpio_name_get presence-ps1)
|
||||
psu1_failed="true"
|
||||
if [ "$psu1_presence" == "0" ]; then
|
||||
# PSU1 presence, monitor the PSUs using pmbus, check the STATUS_WORD
|
||||
psu1_value=$(i2cget -f -y $psu_bus $psu1_addr $status_word_cmd w)
|
||||
psu1_bit_fault=$(($psu1_value & $psu_fault_bitmask))
|
||||
if [ "$psu1_bit_fault" == "0" ]; then
|
||||
psu1_failed="false"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$psu0_failed" == "true" ] || [ "$psu1_failed" == "true" ]; then
|
||||
psu_failed="true"
|
||||
else
|
||||
psu_failed="false"
|
||||
fi
|
||||
}
|
||||
|
||||
control_psu_fault_led() {
|
||||
if [ "$psu_failed" == "true" ]; then
|
||||
if [ "$psu_fault_led_status" == $off ]; then
|
||||
turn_on_off_psu_fault_led $on
|
||||
psu_fault_led_status=$on
|
||||
fi
|
||||
else
|
||||
if [ "$psu_fault_led_status" == $on ]; then
|
||||
turn_on_off_psu_fault_led $off
|
||||
psu_fault_led_status=$off
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
check_overtemp_occured() {
|
||||
if [[ -f $overtemp_fault_flag ]]; then
|
||||
echo "Over temperature occured, turn on fault LED"
|
||||
overtemp_occured="true"
|
||||
else
|
||||
overtemp_occured="false"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
check_gpio_fault() {
|
||||
if [[ -f $gpio_fault_flag ]]; then
|
||||
echo "GPIO fault event(s) occured, turn on fault LED"
|
||||
gpio_fault="true"
|
||||
else
|
||||
gpio_fault="false"
|
||||
fi
|
||||
}
|
||||
|
||||
check_fault() {
|
||||
if [[ "$fan_failed" == "true" ]] || [[ "$psu_failed" == "true" ]] \
|
||||
|| [[ "$overtemp_occured" == "true" ]] \
|
||||
|| [[ "$gpio_fault" == "true" ]]; then
|
||||
fault="true"
|
||||
else
|
||||
fault="false"
|
||||
fi
|
||||
}
|
||||
|
||||
# The System Fault Led turns on upon the system error, update the System Fault Led
|
||||
# based on the Fan fault status and PSU fault status
|
||||
control_sys_fault_led() {
|
||||
# Turn on/off the System Fault Led
|
||||
if [ "$fault" == "true" ]; then
|
||||
gpio_name_set led-fault $on
|
||||
else
|
||||
gpio_name_set led-fault $off
|
||||
fi
|
||||
}
|
||||
|
||||
# daemon start
|
||||
while true
|
||||
do
|
||||
# Monitors Fan speeds
|
||||
check_fan_failed
|
||||
# Monitors PSU presence
|
||||
check_psu_failed
|
||||
|
||||
check_overtemp_occured
|
||||
check_gpio_fault
|
||||
# Check fault to update fail
|
||||
check_fault
|
||||
control_sys_fault_led
|
||||
|
||||
control_fan_fault_led
|
||||
control_psu_fault_led
|
||||
|
||||
sleep 2
|
||||
done
|
||||
|
||||
exit 1
|
||||
Reference in New Issue
Block a user