megacli 管理 ceph 存储常用脚本

来源:互联网 发布:苹果电脑音频剪辑软件 编辑:程序博客网 时间:2024/05/16 12:01

说明

只适用于可用使用 megacli 支持的 raid controller 只适用于手动进行 ceph 管理的集群,  不支持 ceph deploy 创建的集群需修改对应磁盘设备命名

脚本

#!/bin/bash##  使用说明: #     脚本用于停止 osd, umount osd, 标注对应 raid 设备为离线状态#     ./stopraid.sh [number]     [number] 为对应的 raid 卡 slot 槽号##  1. 获取帮助  (./raidrepair.sh   ||   ./raidrepair.sh  -h ) ##  2. 确认整体磁盘#    确认磁盘整体故障     (./raidrepair.sh -a)#    确认具体某个磁盘故障  (./raidrepair.sh -c 8)   假设 raid 中第 8 个 slot number 有问题  ( 确定 slot number 8 对应着 /dev/sdf1  /var/lib/ceph/osd/ceph-20 )#    确认磁盘挂载点       (./raidrepair.sh -m)    确定 slot number 8 对应着 /dev/sdf1  /var/lib/ceph/osd/ceph-20#    确认 raid 中虚拟磁盘  (./raidrepair.sh -v)   缺 slot number 8 对应着 ( Virtual Drive: 5 )  #    ##########   建议手动对 /etc/fstab 中, 所有 ceph 磁盘信息进行屏蔽  ######################  3. 故障处理#     删除故障磁盘  (./raidrepair.sh -d 8) ##  4. 关机, 更换磁盘  ( /etc/init.d/ceph -a stop osd ; init 0 )#      ##########   关机后, 可能需要对 raid 执行清除缓存操作 (需要利用 idrac 进行手动操作 ######################      ##########   假如没有对 /etc/fstab 中 ceph 磁盘执行屏蔽操作, 可能导致无法登入系统 #######################  5. 修复磁盘#      确认磁盘状态  ( ./raidrepair.sh -c 8 )  确认状态  ( Firmware state: Unconfigured(good), Spun Up )#      令磁盘在线    ( ./raidrepair.sh -o 8 )#      令该磁盘执行初始化  ( ./raidrepair.sh -i 8 )  注意, 不要写错 slot, 因为磁盘数据将会丢失#      重启测试          ( init 6 )##  6. 初始化 ceph#      挂载所有的 ceph 磁盘 ( ./raidrepair.sh -m )#      手动启动 ceph mon, ceph osd  ( /etc/init.d/ceph start mon )#      格式化新的故障 ceph 磁盘 ( ./raidrepair.sh -f 8 )#      初始化 ceph  ( ./raidrepair.sh -p 8 )if [ `whoami` != 'root' ]then    echo "This program must be run by root."    exitfifunction rpmcheck(){    if [ ! -f "/sbin/megacli" ]    then    toolsrpm=`rpm -qa | grep -i megacli | tail -1 `    if [ -z "$toolsrpm" ]    then        echo "Error:  Megacli tool not install."        exit    fi        grep 8.07 `echo $toolsrpm`  > /dev/null 2>&1        if [ $? -eq 0 ]        then            file=`rpm -ql $toolsrpm | grep -i "MegaCli$"`        else            file=`rpm -ql $toolsrpm | grep -i "MegaCli$" | grep "bin/"`        fi        ln -s $file /sbin/megacli    fi}function parametercheck(){    if [ -z $1 ]    then        help        exit    fi}function verifyceph(){    dirs=`ls -d /var/lib/ceph/osd/*`    if [ -z "$dirs" ]    then        echo "there is nothing is /var/lib/ceph/osd directory"        exit    else        for dir in $dirs        do            if [ -f "$dir/whoami" ]            then                num=`cat $dir/whoami`                partition=`df | grep "$dir" | awk '{print $1}'`            else                num="NULL"                partition="NULL"            fi            echo -e "$dir\t\tCeph Number is: $num\t\tPartition is: $partition"        done    fi}function help(){ cat <<'EOF'-a  use to check all raid slot status-h  use to display help-y  use to check ceph mount -m  use to show partition mount to ceph.-c  [num]  || ex: ( raid card slot number )  use to check raid slot number.-d  [num]  || ex: ( raid card slot number )  use to offline raid disk. -f  [num]  || ex: ( raid card slot number )  use to format partition.-i  [num]  || ex: ( raid card slot number )  use to initial raid slot number disk.-o  [num]  || ex: ( raid card slot number )  use to online raid disk.-p  [num]  || ex: ( raid card slot number )  use to initial ceph and start up ceph.-v  [num]  || ex: ( raid card slot number )  use to show virtual partition.EOF}parametercheck $1rpmcheckfunction raidcheckall(){    /sbin/megacli -PDList  -aALL |egrep "Slot Number|Error"     exit}function virtualcheck(){    /sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot"}function automountceph(){    disks=`fdisk -l  | grep GPT | awk '{print $1}' | grep -v sda`    for disk in $disks    do        mount $disk /mnt        if [ -f "/mnt/whoami" ]        then            cephnum=`cat /mnt/whoami`            echo "mount  $disk /var/lib/ceph/osd/ceph-$cephnum"        fi        umount /mnt    done}while getopts ":hHaAvVyYmMc:d:o:p:f:i:" OPT;do    case $OPT in        h|H)            help        ;;        a|A)            raidcheckall        ;;        v|V)            virtualcheck        ;;        y|Y)            verifyceph        ;;        m|M)            automountceph        ;;        c)            slotnum=$OPTARG                ;;        d)            down=$OPTARG        ;;        o)            up=$OPTARG        ;;        i)            initial=$OPTARG        ;;        f)            prepare=$OPTARG        ;;        p)            ready=$OPTARG        ;;        ?)            help            exit        ;;        *)            help            exit        ;;    esacdonefunction raidcheck(){        if [ ! -z $slotnum ]        then            /sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $slotnum\$"/,/Media\ Type/p | grep -E "Slot Number|Device Id|Error Count|Failure Count|Raw Size|Firmware state|Inquiry Data"             virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $slotnum" | grep Virtual  | awk '{print $3}'`        if [ -z $virtualnum ]        then            exit        fi            case $virtualnum in                 0)                    partition=sda1                    disk=sda                ;;                1)                    partition=sdb1                    disk=sdb                ;;                2)                    partition=sdc1                    disk=sdc                ;;                3)                    partition=sdd1                    disk=sdd                ;;                4)                    partition=sde1                    disk=sde                ;;                5)                    partition=sdf1                    disk=sdf                ;;                6)                    partition=sdg1                    disk=sdg                ;;                7)                    partition=sdh1                    disk=sdh                ;;                8)                    partition=sdi1                    disk=sdi                ;;                9)                    partition=sdj1                    disk=sdj                ;;                10)                    partition=sdk1                    disk=sdk                ;;                11)                    partition=sdl1                    disk=sdl                ;;                12)                    partition=sdm1                    disk=sdm                ;;                13)                    partition=sdn1                    disk=sdn                ;;            esac#       if [ -z $partition ]            cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'`            echo "Ceph number: $cephnum"            echo "Partition: $partition"        fi}function raidoffline(){    if [ ! -z $down ]    then        nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`        echo $nums | grep $down > /dev/null        if [ $? -ne 0 ]        then            echo "Slot Number: $down is not exists"            exit        else            /sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $down\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null            if [ $? -ne 0 ]            then                echo "Slot Number: $down status error, please try to use ./$0 -c $down"            fi            virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w -B 1 "Slot Number: $down" | grep Virtual | awk '{print $3}'`            case $virtualnum in                 0)                    partition=sda1                    disk=sda                ;;                1)                    partition=sdb1                    disk=sdb                ;;                2)                    partition=sdc1                    disk=sdc                ;;                3)                    partition=sdd1                    disk=sdd                ;;                4)                    partition=sde1                    disk=sde                ;;                5)                    partition=sdf1                    disk=sdf                ;;                6)                    partition=sdg1                    disk=sdg                ;;                7)                    partition=sdh1                    disk=sdh                ;;                8)                    partition=sdi1                    disk=sdi                ;;                9)                    partition=sdj1                    disk=sdj                ;;                10)                    partition=sdk1                    disk=sdk                ;;                11)                    partition=sdl1                    disk=sdl                ;;                12)                    partition=sdm1                    disk=sdm                ;;                13)                    partition=sdn1                    disk=sdn                ;;            esac            raidnum=`/sbin/megacli -PDList  -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'`            cephnum=`df -h | grep /dev/$partition | awk '{print $NF}' | awk -F- '{print $NF}'`# 执行 ceph 维护            ceph osd set norecover            ceph osd set noscrub            ceph osd set nobackfill            ceph osd set nodeep-scrub            if [ ! -z $cephnum ]            then                /etc/init.d/ceph stop osd.$cephnum                umount /dev/$partition                sed -i /ceph-$cephnum/d /etc/fstab            fi#  执行 megacli 维护            /sbin/megacli -PDOffline -PhysDrv [$raidnum:$down] -a0            /sbin/megacli -PDMarkMissing -PhysDrv [$raidnum:$down] -a0            /sbin/megacli -PDPrpRmv -PhysDrv [$raidnum:$down] -a0## 提示            echo "stop raid slot complete, please use this command to shutdown compute"            echo "#####################################################"             echo "#       /etc/init.d/ceph -a stop osd ; init 0       #"            echo "#####################################################"         fi    fi}##################terrryfunction raidonline(){#######  检测没有被挂载的 ceph 目录对应的 number#    if [ ! -z $up ]    then        /sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $up\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null        if [ $? -eq 0 ]        then            echo "Slot Number: $up  status is up, use $0 -c $up"            exit        fi        raidnum=`/sbin/megacli -PDList  -aALL | grep 'Enclosure Device ID' | uniq | awk -F[:\ ] '{print $NF}'`##    获得 -L 的 num         virtualnum=`megacli -cfgdsply -aALL  | grep  -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $up\$" | grep Virtual | awk '{print $3}'`## megacli 维护        /sbin/megacli -PDMakeGood -PhysDrv [$raidnum:$up] -force -a0         /sbin/megacli -CfgLdAdd -r0 [$raidnum:$up] WT RA DIRECT -a0        if [ ! -z $virtualnum ]        then            /sbin/megacli  -LDInit -start -L$virtualnum -a0        fi    fi}function initialraid(){    if [ ! -z $initial ]    then        /sbin/megacli -PDList  -aALL  | sed -n /"Slot\ Number:\ $initial\$"/,/Media\ Type/p | grep "Firmware state" | grep Online | grep Up > /dev/null        if [ $? -ne 0 ]        then            echo "Slot Number: $initial  status is not up, use $0 -c $initial"            exit        fi        virtualnum=`/sbin/megacli -cfgdsply -aALL  | grep  -E "Target\ Id:|Slot Number:" | grep -w -B 1 "Slot Number: $initial\$" | grep Virtual | awk '{print $3}'`        if [ ! -z $virtualnum ]        then            /sbin/megacli  -LDInit -start -L$virtualnum -a0        fi    fi}#   分区#function startupceph(){    if [ ! -z $prepare ]    then        nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`        echo $nums | grep $prepare > /dev/null        if [ $? -ne 0 ]        then            echo "Slot Number: $prepare is not exists"            exit        else            virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w  -B 1 "Slot Number: $prepare" | grep Virtual | awk '{print $3}'`            case $virtualnum in                 0)                    partition=sda1                    disk=sda                ;;                1)                    partition=sdb1                    disk=sdb                ;;                2)                    partition=sdc1                    disk=sdc                ;;                3)                    partition=sdd1                    disk=sdd                ;;                4)                    partition=sde1                    disk=sde                ;;                5)                    partition=sdf1                    disk=sdf                ;;                6)                    partition=sdg1                    disk=sdg                ;;                7)                    partition=sdh1                    disk=sdh                ;;                8)                    partition=sdi1                    disk=sdi                ;;                9)                    partition=sdj1                    disk=sdj                ;;                10)                    partition=sdk1                    disk=sdk                ;;                11)                    partition=sdl1                    disk=sdl                ;;                12)                    partition=sdm1                    disk=sdm                ;;                13)                    partition=sdn1                    disk=sdn                ;;            esac#############        fi        if [[ ! -b "/dev/$partition" ]] && [[ -b "/dev/$disk" ]]        then            disksize=`parted /dev/$disk print|  grep -v Flags | grep ^Disk  | awk -F[:\ ] '{print $4}' | sed s/GB//`            if [ $disksize -gt 2000 ]             then                parted /dev/"$disk"  mklabel gpt              else                parted /dev/"$disk" mklabel msdos            fi            parted /dev/"$disk"  mkpart primary xfs 1 100%            partprobe            mkfs -t xfs -i size=512 /dev/"$partition"        else            echo "/dev/$partition is exists, are you sure wanna initial? use dd if=/dev/zero of=/dev/$disk bs=1M count=10"        fi    fi}function initialceph(){    if [ ! -z $ready ]    then        nums=`/sbin/megacli -PDList  -aALL  |  grep "Slot Number" | awk -F: '{print $2}'`        echo $nums | grep $ready > /dev/null        if [ $? -ne 0 ]        then            echo "Slot Number: $ready is not exists"            exit        else            virtualnum=`/sbin/megacli -cfgdsply -aALL | grep -v Information | grep -E "Virtual|Slot" | grep -w  -B 1 "Slot Number: $ready" | grep Virtual | awk '{print $3}'`            case $virtualnum in                 0)                    partition=sda1                    disk=sda                ;;                1)                    partition=sdb1                    disk=sdb                ;;                2)                    partition=sdc1                    disk=sdc                ;;                3)                    partition=sdd1                    disk=sdd                ;;                4)                    partition=sde1                    disk=sde                ;;                5)                    partition=sdf1                    disk=sdf                ;;                6)                    partition=sdg1                    disk=sdg                ;;                7)                    partition=sdh1                    disk=sdh                ;;                8)                    partition=sdi1                    disk=sdi                ;;                9)                    partition=sdj1                    disk=sdj                ;;                10)                    partition=sdk1                    disk=sdk                ;;                11)                    partition=sdl1                    disk=sdl                ;;                12)                    partition=sdm1                    disk=sdm                ;;                13)                    partition=sdn1                    disk=sdn                ;;            esac#############        fi#  检测没有被挂载的 ceph 目录对应的 number#        for dir in `ls -d /var/lib/ceph/osd/ceph-*`        do            if [ ! -f  "$dir/whoami" ]            then                cephnum=`echo $dir | awk -F"-"  '{print $NF}'`            fi        done        mount "/dev/$partition" "/var/lib/ceph/osd/ceph-$cephnum"        uuid=`blkid /dev/$partition | awk -F\" '{print $2}'`        echo  "UUID=$uuid  /var/lib/ceph/osd/ceph-$cephnum  xfs defaults 0 0" >> /etc/fstab        cephuuid=`cat /etc/ceph/ceph.conf | grep fsid | awk -F[=\ ] '{print $NF}'`        /usr/bin/ceph-osd -i $cephnum --mkfs --mkkey --osd-uuid $cephuuid        authkey=`/usr/bin/ceph auth list 2> /dev/null |  grep -w -A 1 osd."$cephnum"  | grep key | awk -F[:\ ] '{print $NF}'`        echo -e "[osd.$cephnum]\n\tkey = $authkey"  > /var/lib/ceph/osd/ceph-$cephnum/keyring        touch /var/lib/ceph/osd/ceph-$cephnum/{sysvinit,done}        /etc/init.d/ceph start osd.$cephnum        ceph osd unset norecover        ceph osd unset noscrub        ceph osd unset nobackfill        ceph osd unset nodeep-scrub    fi}raidcheckraidofflineraidonlinestartupcephinitialcephinitialraid
原创粉丝点击