#!/bin/bash # # /etc/rc.d/init.d/zaoe # # An init script to start or stop the ATA-over-ethernet # transport and mount aoe attached devices that do not # require GFS. Checks for common problems at start time. # # Charlie Brooks, HBCS LTD. # This script is licensed for use under the GNU GPLv2. # Removal of this notice and/or author attribution is not permitted. # # note "flush" and "revalidate" functions are not available # in the aoe module versions shipped by Red Hat Enterprise # Linux. We can simulate revalidate in some (but not all) # circumstances. If a more recent driver has been built # and appropriate dev nodes exist both functions will work. # # Charlie Brooks 2006-06-05 For Red Hat Enterprise Linux # 2007-05-18 LHG Added explicit mtu setting when IF is brought up # 2007-08-23 CTB Port to RHEL5 with some code from Sam and Ed at Coraid # 2007-09-05 CTB enhanced status reporting, revalidation hack # 2007-09-09 CTB allow wait forever for etherdrives to show up # 2007-09-11 CTB backport to RHEL4 & RHEL3 hoorah # 2007-09-18 CTB clean up commentry, add flush support # 2009-02-16 CTB Red Hat moved location of aoe_iflist in RHEL5 # 2010-04-18 CTB add LVM mount support, reorder start/stop, rename zaoe # 2012-04-09 CTB added LVM dismounts, start accomodating RHEL6 # 2012-04-20 CTB finish initial port to RHEL6 # 2012-04-24 CTB hide dangerous revalidate # 2012-04-25 CTB do a vgchange wait like the mount wait # 2012-05-09 CTB support udev 147 under RHEL6 # # Like most Red Hat scripts, this checks for a file containing # any host-specific options in /etc/sysconfig # # note coraid sample script has K01 in levels 0126 and S99 in 35 # which does not work well at all because of other script ordering # # chkconfig: 2345 12 99 # description: ATA-over-Ethernet transport and SAN storage management # probe: no # PATH=/sbin:/usr/sbin:$PATH OPTFILE=/etc/sysconfig/aoe # standard Red Hat style init options file SYSDIR=/sys/module/aoe # sysfs dir created by modprobe aoe DEVDIR=/dev/etherd # where the actual aoe device files will live LOCKDIR=/var/lock/subsys # for (mostly useless) flag file # Modify the variable MKNODES to suit your system. # oldstyle_mknod is appropriate to Red Hat Enterprise Linux 3 # udev_mknod_RH4 is appropriate to Red Hat Enterprise Linux 4 # udev_mknod_RH5 is appropriate to Red Hat Enterprise Linux 5 # udev_mknod_RH6 is appropriate to Red Hat Enterprise Linux 6 MKNODES=udev_mknod_RH6 # subroutine used for dynamic device creation AOERULES=60-aoe.rules # name of file to hold udev rules for aoe RULESDIR=/etc/udev/rules.d # where udev will look for the aoe rules file RETVAL=1 # guilty until proven innocent # Source function library. . /etc/rc.d/init.d/functions # default jumbo frame size from coraid.com empirical testing IFMTU=4132 # sleep time for udev to respond to modprobe DEVSLEEP=7 # sleep time for micro-sleeps while waiting for disks to come on line VOLSLEEP=2 # sleep time for micro-sleeps while waiting for ethernet card to respond ETHSLEEP=1 # get options (if any) to override any defaults set above # note you should at least set AOEINTERFACES if nothing else [ -r "$OPTFILE" ] && . $OPTFILE usage () { echo "Usage: service $prog {start|stop|status|flush|discover}" RETVAL=0 } # depending on the OS and driver versions, stuff moves around # some functions in this initscript are version dependent find_version() { if test -d "$SYSDIR" ; then if test -r $SYSDIR/version ; then AOEVERS=`cat $SYSDIR/version` else if test -r $SYSDIR/parameters/version ; then AOEVERS=`cat $SYSDIR/parameters/version` else AOEVERS=0 fi fi fi } # for several of these functions we will need to know what # ethernet interfaces are up and running on this machine find_ethn () { ETHUP=`ip link show |sed -n 's/[0-9]\+.*\(eth[0-9]\+\).*,UP.*/\1/p'` } # for the status function, at least, we also need to know # what aoe block devices are known to sysfs (2.6 kernels and up) find_bdevs () { BDEVS=`ls -d /sys/block/etherd* 2>/dev/null |sed 's/.*\!\(e[0-9]\+\.[0-9]\+\)/\1/'` # ls -d $sysd/block/*e[0-9]*\.[0-9]* 2>/dev/null | grep -v p } # We need to make sure that we have our rules in place before loading # the aoe kernel module or udev will not create the necessary device # files. Older systems would use aoe-tools to create device nodes. # Red Hat EL6 has udev 147 and a 2.6.32 kernel so it no longer # needs (or supports) NAME= rules required by earlier versions udev_mknod_RH6 () { if ( ! test -f "$RULESDIR/$AOERULES" ) ; then echo Creating ATA-over-Ethernet rules for udev in $RULESDIR$AOERULES cat > $RULESDIR/$AOERULES <<-'EOF' # aoe char devices SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440" SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" # aoe block devices KERNEL=="etherd*", GROUP="disk" EOF chown root:root $RULESDIR/$AOERULES chmod 0644 $RULESDIR/$AOERULES fi } # RHEL5 uses double equals and NAME="%k" udev_mknod_RH5 () { if ( ! test -f "$RULESDIR/$AOERULES" ) ; then echo Creating ATA-over-Ethernet rules for udev in $RULESDIR$AOERULES cat > $RULESDIR/$AOERULES <<-'EOF' # aoe char devices SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440" SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" # aoe block devices KERNEL=="etherd*", NAME="%k", GROUP="disk" EOF chown root:root $RULESDIR/$AOERULES chmod 0644 $RULESDIR/$AOERULES fi } # RHEL4 uses a somewhat bizarre version of udev that was nice at the time udev_mknod_RH4 () { if ( ! test -f "$RULESDIR/$AOERULES" ) ; then echo Creating ATA-over-Ethernet rules for udev in $RULESDIR$AOERULES cat > $RULESDIR/$AOERULES <<-'EOF' # aoe char devices SYSFS{dev}="152:*", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" SYSFS{dev}="152:*", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440" SYSFS{dev}="152:*", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" SYSFS{dev}="152:*", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" SYSFS{dev}="152:*", KERNEL="flush", NAME="etherd/%k", GROUP="disk", MODE="0220" EOF chown root:root $RULESDIR/$AOERULES chmod 0644 $RULESDIR/$AOERULES fi } # This script was originally written for RHEL3 which doesn't have udev oldstyle_mknod () { RETVAL=0 echo "creating any missing device special nodes... " typeset -i MAJOR=152 typeset -i MINOR=1 # boxes running sysfs won't want the stat file, so comment out next line mknod -m 0400 $DEVDIR/stat c $MAJOR 1 for cnod in err discover interfaces ; do MINOR=$MINOR+1 if ( ! test -c "$DEVDIR/$cnod" ) ; then if ( test -e "$DEVDIR/$cnod" ) ; then rm -f $DEVDIR/$cnod fi if mknod -m 0400 $DEVDIR/$cnod c $MAJOR $MINOR ; then echo created $DEVDIR/$cnod else RETVAL=1 fi fi done } # provoke a discovery action once the module is loaded # so we won't have to wait for the aoe devices to # make their periodic availability notifications load_aoe_module () { if ( test -n "$AOEINTERFACES" ) ; then MODULEOPTIONS="aoe_iflist=\"$AOEINTERFACES\"" fi echo "loading ATA-over-Ethernet kernel module... " if modprobe aoe $MODULEOPTIONS ; then while ( ! test -d "$DEVDIR" ) ; do sleep $DEVSLEEP RETVAL=0 done test -w $DEVDIR/discover && echo >$DEVDIR/discover else RETVAL=1 fi } # bring up any required interfaces and set frame size. If this host # is part of a cluster, and cluster management is using the same # interfaces, they will already be lit up set_if_mtu () { RETVAL=0 if ( test -n "$AOEINTERFACES" ) ; then for Ifn in $AOEINTERFACES ; do LINKSTATE=`ip link show $Ifn 2>&1 |grep -c ",UP.*mtu $IFMTU"` if ( test "$LINKSTATE" == "0" ) ; then echo -n "Bringing interface $Ifn online.." if ifconfig $Ifn mtu $IFMTU up ; then until ( test ! "$LINKSTATE" == "0" ) ; do echo -n . sleep $ETHSLEEP LINKSTATE=`ip link show $Ifn 2>&1 |grep -c ',UP'` done else RETVAL=1 fi echo fi done else echo "WARNING: No interface restrictions found in $OPTFILE" echo "use of AOE on general-purpose networks is not recommended." echo "Jumbo frames not enabled." fi } start () { $MKNODES set_if_mtu CURMOD=`lsmod|grep aoe` if ( test -z "$CURMOD" ) ; then load_aoe_module else if ( test -n "$AOEINTERFACES" ) ; then echo "Setting ATA-over-Ethernet interface restrictions... " if echo $AOEINTERFACES >$DEVDIR/interfaces ; then RETVAL=0 else RETVAL=1 fi fi fi # The linux kernel scanned for LVM volumes at boot time, but AOE was # not available then so it couldn't see any AOE devices. We need to # do it again so we will be able to mount any AOE-attached LVM stuff # I could shotgun "vgchange -a y" here but I think that is dangerous # in a shared hardware/unshared software scenario (like, two systems # on one coraid shelf that do not share gfs volumes) because there is # no inherent I/O fencing to prevent one system from trying to rebuild # another system's RAID devices. So we'll explicitly vgchange each vg. # The complexity here is due to Red Hat's unusual decision to move most # of the LVM stuff into /usr/sbin but keep a separate static linked # version in /sbin for use when /usr is not mounted. LVMACT="echo Cannot find LVM device" if test -x /sbin/vgchange ; then LVMACT="/sbin/vgchange -a y" # RHEL6 all runlevels else if test -d /usr/sbin ; then if /usr/sbin/vgscan > /dev/null 2>&1 ; then LVMACT="/usr/sbin/vgchange -a y" fi else if test -x /sbin/lvm.static ; then if /sbin/lvm.static vgscan > /dev/null 2>&1 ; then LVMACT="/sbin/lvm.static vgchange -a y" # RHEL 3/4/5 fi else echo "Logical Volume Management not available" echo -n "Any ATA-over-Ethernet volumes that require" echo " LVM will not be mounted" LVMACT="echo No lvm toolset found, cannot activate" fi fi fi ################################################################# # # # WARNING: To avoid unnecessary complexity, this code assumes # # that LVM Volume Groups that physically reside on AOE attached # # devices will have the string AoE (CASE SENSITIVE) in the VG's # # name. It looks in the first column of /etc/fstab for strings # # containing "AoE" and strings that begin with the AoE device # # directory (usually "/dev/etherd/"). I strongly recommend you # # use this naming convention, but if you want to plow your own # # furrow, you will need to do your fancy LVM commands here. # # If you're using AoE JBOD under linux soft RAID, ??? # # Don't forget to manipulate RETVAL appropriately # # # ################################################################# CURMOD=`lsmod|grep aoe` if ! ( test -n "$CURMOD" ) ; then echo ERROR: ATA-over-ethernet kernel module is not loaded. RETVAL=1 else echo "loading AOE devices from /etc/fstab... " while read BlockDev MountPoint fsType MntOpt fsFreq fsPass ; do # We are only going to mount non-GFS volumes because there's another # init script that makes sure any cluster management stuff that GFS # and GFS2 volumes require is working before doing GFS mounts --CTB if ( test "${fsType:0:3}" != "gfs" ) ; then case "$BlockDev" in # We have to wait for all aoe devices to become available or # the mounts will fail and that can cause cascading failures # in other software including the clustering code. During a # RHEL5 boot up sequence these waits can be quite lengthy, # for reasons that are poorly understood as of 2007-09-09 # still poorly understood 2012-04-15 $DEVDIR*) echo -n waiting for $BlockDev to come on line.. while ( ! test -e "$BlockDev" ) ; do sleep $VOLSLEEP echo -n \. done MountList="${MountList}${MountPoint} " echo ;; /dev/*AoE*/*) VOLGRP=`echo $BlockDev|cut -d/ -f3` echo -n waiting for $VOLGRP to come on line.. while ! ( vgs |grep -q "^[\t ]*$VOLGRP" ) ; do sleep $VOLSLEEP echo -n \. done echo $LVMACT $VOLGRP MountList="${MountList}${MountPoint} " ;; LABEL=*) echo ignoring $BlockDev, non-portable fstab syntax ;; UUID=*) echo ignoring $BlockDev, non-portable fstab syntax ;; *) ;; esac fi done < /etc/fstab # All that bumpf was just to build the mount list. Mount the volumes now if ( test -n "$MountList" ) ; then for mtp in $MountList ; do if ( mount $mtp ) ; then echo $mtp mounted else echo Failed to mount $mtp RETVAL=1 fi done fi fi if ( test "$RETVAL" == "0" ) ; then test -d $LOCKDIR && touch $LOCKDIR/aoe echo -n "ATA-over-ethernet subsystem startup" ; success else echo -n $"ATA-over-ethernet subsystem startup" ; failure fi echo } # aoe volumes must be unmounted in reverse order in case any # of them are mounted on each other, and the aoe kernel module # should only be unloaded if no other modules are using it. stop () { RETVAL=0 typeset -i MtCt=0 MOUNTED=`tac /etc/mtab| gawk '/^\/dev\/(etherd\/|mapper\/[^\/]*AoE)/{printf "%s ",$2}'` if ( test -z "$MOUNTED" ) ; then echo "INFO: No ATA-over-ethernet attached volumes were found" else for mtd in $MOUNTED ; do MtCt=$MtCt+1 if umount $mtd ; then echo "$mtd unmounted" else echo "Failed to unmount $mtd" RETVAL=1 fi done if ( ! test "$MtCt" == 0 ) ; then if [ $RETVAL == 0 ] ; then echo "$MtCt ATA-over-ethernet volumes successfully unmounted" else echo "Attempted to unmount $MtCt volumes" fi fi fi typeset -i VgCt=0 VGS=`vgdisplay -A -c 2>/dev/null |gawk -F\: '($1~/AoE/){print $1}'` if ( test -z "$VGS" ) ; then echo "INFO: No active ATA-over-ethernet volume groups were found" else for actv in $VGS ; do VgCt=$VgCt+1 if vgchange -a n $actv ; then echo "$actv deactivated" else echo "failed to deactivate $actv" RETVAL=1 fi done if ( ! test "$VgCt" == 0 ) ; then if [ $RETVAL == 0 ] ; then echo "$VgCt ATA-over-ethernet volume groups deactivated" else echo "Attempted to deactivate $VgCt ATA-over-ethernet volume groups" fi fi fi MODHOOKS=`lsmod |gawk '/^aoe/{print $3}'` if ( test -z "$MODHOOKS" ) ; then echo INFO: No ATA-over-ethernet kernel module loaded, aoe unload skipped else if ( test "$MODHOOKS" == "0" ) ; then if ( rmmod aoe ) ; then echo INFO: ATA-over-Ethernet kernel module unloaded else echo WARNING: Failed to unload aoe kernel module fi else echo ERROR: Cannot unload aoe module because it is still in use RETVAL=1 fi fi if ( test "$RETVAL" == "0" ) ; then test -d $LOCKDIR && rm -f $LOCKDIR/aoe echo -n "ATA-over-ethernet subsystem shutdown" ; success else echo -n "ATA-over-ethernet subsystem shutdown" ; failure fi echo } # This routine only works if you have sysfs, obviously. It is quite # similar to the aoe-stat distributed with later versions of aoe-tools # but hopefully a bit faster and more informative. If there is no # sysfs to be found, we will drop back to the aoe-tools command, and # hopefully that toolset will include the appropriate aoe-stat binary. display_sysfs_stats () { # Disk drive vendors want one K to equal 1000 bytes #typeset -i ONEK=1000 # bits Bytes K Megs Gigs TeraBytes PetaBytes ExaBytes ZettaBytes YottaBytes #declare -a UnitTaxonomy=(b B KB MB GB TB PB EB ZB YB) # but all Real Computer Programmers know a K is really 1024. typeset -i ONEK=1024 # we will grudgingly use the new style (IEC 1998) taxonomy declare -a UnitTaxonomy=(b B KiB MiB GiB TiB PiB EiB ZiB YiB) # bits Bytes Kibi Mebi Gibi TebiBytes PebiBytes ExbiBytes ZebiBytes YobiBytes # at this time BrontoBytes, GeopBytes, and HellaBytes are not yet # recognized by the IEC, IEEE, or the International System of Units (SI) # bash uses base zero arrays so subtract one from array element count UnitDepth=$((${#UnitTaxonomy[*]}-1)) if ( test -d "/sys/block" ) ; then find_bdevs if test -z "$BDEVS" ; then echo "INFO: No ATA-over-ethernet block devices found in /sys/block" RETVAL=0 else format="%10s %15s %-14s %-18s %-16s\n" printf "$format" "device" "size " " state " " MAC" " if" for dev in $BDEVS ; do sysd="/sys/block/etherd!$dev" devd="$DEVDIR/$dev" minor="`awk -F: '{print $2}' $sysd/dev`" if ( test -b "$devd" ) ; then m_node="`ls -l $devd | awk '{print $6}'`" if ( ! test "$minor" == "$m_node" ) ; then echo WARNING: device node $devd has bad minor device number fi else echo WARNING: no device node $devd found for sysfs device $sysd fi # Calculate volume size and format it all purty like # just add more taxons if you need something bigger than a yottabyte sectors="`cat \"$sysd/size\"`" psize=$((512000 * $sectors)) UnitLevel=1 while ( test ${#psize} -gt 6 -a $UnitLevel -lt $UnitDepth ) ; do psize=$(($psize / $ONEK)) UnitLevel=$(($UnitLevel+1)) done Unit=${UnitTaxonomy[UnitLevel]} psize=`printf "%04d\n" $psize | sed 's!\(...\)$!.\1!'` printf "$format" \ "$dev" \ "${psize}${Unit}" \ "`cat \"$sysd/state\"`" \ "`cat $sysd/mac | sed 's/../&:/g;s/:$//'`" \ "`cat \"$sysd/netif\"`" done | sort fi else if ( test -d /sys ) ; then echo "ERROR: cannot read /sys/block folder. Is sysfs mounted\?" RETVAL=1 elif ( ! aoe-stat ) ; then RETVAL=1 fi fi } status() { if ( test -e $LOCKDIR/aoe ) ; then echo -e "\nATA-over-ethernet service flag exists in $LOCKDIR/aoe" else echo -e "\nNo ATA-over-ethernet service flag found in $LOCKDIR" fi MODLS=`lsmod|grep aoe` if ( test -z "$MODLS" ) ; then echo "The aoe kernel module is not loaded." else if test -d $SYSDIR ; then if test -r $SYSDIR/aoe_iflist ; then # RHEL4 IFLIST=`cat $SYSDIR/aoe_iflist` else if test -r $SYSDIR/parameters/aoe_iflist ; then IFLIST=`cat $SYSDIR/parameters/aoe_iflist` else echo -e "\nWARNING: Unable to read aoe_iflist." fi fi else echo -e "\nNo aoe kernel module folder found in $SYSDIR." fi if ( test -z "$IFLIST" ) ; then find_ethn IFLIST=$ETHUP fi echo -e "\nATA-over-ethernet allowed interfaces: $IFLIST\n" display_sysfs_stats echo -e "\n" fi } # If you change the size of an AOE device from the device itself # (for example, if you shrink or stretch a CoRAID array) the # changes will not be picked up until the device is remounted or # a "revalidate" operation is performed. Keep in mind that you # can break running applications by reducing the size of an # array while IO is still pending -- use with caution! # revalidate() { if ( test -d "$SYSDIR" ) ; then find_version if ( test "$AOEVERS" > "14" ) ; then ls -axl $DEVDIR | \ gawk '/^brw.+e[0-9]+\.[0-9]+(p[0-9]+)?$/ \ { print $10; system("sync") }' \ >"$DEVDIR/revalidate" else MODHOOKS=`lsmod |gawk '/^aoe/{print $3}'` if ( test "$MODHOOKS" -eq "0" ) ; then rmmod aoe modprobe aoe $MODULEOPTIONS else echo -e "\tVersion $AOEVERS of the aoe kernel module does not" echo -e "\tsupport dynamic revalidation. Revalidation can be" echo -e "\tforced by loading and unloading the module itself," echo -e "\tbut there are currently $MODHOOKS other modules" echo -e "\tactively using the aoe module." echo -n "ATA-over-ethernet subsystem revalidate" ; failure echo fi fi else start fi } flush () { if test -d "$SYSDIR" ; then find_version if ( test "$AOEVERS" -gt "39" ) ; then if ! test -w "$DEVDIR/flush" ; then echo "ERROR: $DEVDIR/flush does not exist or is not writeable." RETVAL=1 break fi if ! test -c "$DEVDIR/flush" ; then echo "ERROR: $DEVDIR/flush is not a character device file" RETVAL=1 break fi if ( echo "all" > "$DEVDIR/flush" ) ; then RETVAL=0 else echo "ERROR: Unable to flush existing devices." RETVAL=1 fi else echo "AOE drivers older than version 39 do not support flushing." fi else start fi } # AOE devices send out periodic announcements so that attached # hosts know they are available. You can also provoke these # announcements with a "query config" (aoe protocol command 1) # broadcast from the host - very handy to speed things up. discover () { if ( test -d "$SYSDIR" ) ; then if ( test -w "$DEVDIR/discover" ) ; then if ( ! echo 1 >$DEVDIR/discover ) ; then echo "ERROR: unable to provoke AOE device discovery\!" RETVAL=1 else RETVAL=0 fi else echo "ERROR: $DEVDIR/discover is not writable." RETVAL=1 fi else load_module fi } case "$1" in start) start ;; stop) stop ;; status) status ;; revalidate) revalidate ;; flush) flush ;; discover) discover ;; *) usage ;; esac exit $RETVAL