mfs高可用避免单点故障

来源:互联网 发布:vip域名备案 编辑:程序博客网 时间:2024/06/03 22:44

1. 机器

W1  192.168.37.21/24(drbd+mfsmaster)

W2  192.168.37.22/24(drbd+mfsmaster)

VIP 192.168.37.200

W3  192.168.37.23/24(metalogger server)

W4  192.168.37.24/24(chuck server)

W5  192.168.37.25/24(chuck server)

W6  192.168.37.26/24(客户端)

##hosts文件里要写,保证能通过主机名互相访问



2. 安装并配置drbd

rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.orgrpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpmyum install -y kmod-drbd84 drbd84-utils

vim /etc/drbd.d/global_common.conf# DRBD is the result of over a decade of development by LINBIT.# In case you need professional services for DRBD or have# feature requests visit http://www.linbit.comglobal {usage-count no;udev-always-use-vnr; # treat implicit the same as explicit volumes}common {protocol C;handlers {pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";}startup {}options {}disk {on-io-error detach;}net {}syncer {rate 1024M;}}

vim /etc/drbd.d/mfs.resresource mfs {protocol C;meta-disk internal;device /dev/drbd1;syncer {verify-alg sha1;}net {allow-two-primaries;}on w1 {disk /dev/sdb;address 192.168.37.21:7789;}on w2 {disk /dev/sdb;address 192.168.37.22:7789;}}

drbdadm create-md mfs

##启动drbd

modprobe drbd

##手动加载内核模块

[root@w1 ~]# lsmod | grep drbddrbd                  396875  0 libcrc32c              12644  2 xfs,drbd

##查看内核是否加载了模块


[root@w1 ~]# drbdadm up mfs[root@w1 ~]# drbdadm -- --force primary mfs[root@w1 ~]# drbd-overview NOTE: drbd-overview will be deprecated soon.Please consider using drbdtop. 1:mfs/0  WFConnection Primary/Unknown UpToDate/DUnknown
##启动drbd


在对端节点w2执行:

[root@w2 ~]# drbdadm create-md mfs[root@w2 ~]# modprobe drbd[root@w2 ~]# drbdadm up mfs[root@w2 ~]# cat /proc/drbdversion: 8.4.10-1 (api:1/proto:86-101)GIT-hash: a4d5de01fffd7e4cde48a080e2c686f9e8cebf4c build by mockbuild@, 2017-09-15 14:23:22 1: cs:SyncTarget ro:Secondary/Primary ds:Inconsistent/UpToDate C r-----    ns:0 nr:1808384 dw:1808384 dr:0 al:8 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:8677020[==>.................] sync'ed: 17.3% (8472/10236)Mfinish: 0:03:39 speed: 39,448 (39,312) want: 102,400 K/sec

##查看数据同步状态


[root@w1 ~]# mkfs.xfs /dev/drbd1meta-data=/dev/drbd1             isize=512    agcount=4, agsize=655338 blks         =                       sectsz=512   attr=2, projid32bit=1         =                       crc=1        finobt=0, sparse=0data     =                       bsize=4096   blocks=2621351, imaxpct=25         =                       sunit=0      swidth=0 blksnaming   =version 2              bsize=4096   ascii-ci=0 ftype=1log      =internal log           bsize=4096   blocks=2560, version=2         =                       sectsz=512   sunit=0 blks, lazy-count=1realtime =none                   extsz=4096   blocks=0, rtextents=0

##格式化


mkdir /usr/local/mfschown -R mfs:mfs mfs/[root@w1 local]# mount /dev/drbd1 /usr/local/mfs/[root@w1 local]# df -hFilesystem           Size  Used Avail Use% Mounted on/dev/drbd1            10G   33M   10G   1% /usr/local/mfs
##创建目录,设置属性并挂载





3.  Moosefs部署

基础软件下载、安装

yum install zlib-devel gcc -y  ##所有机器都要##useradd mfs  ##mfs所有机器创建的mfs用户id和组id要一样##cd /usr/local/srcwget https://github.com/moosefs/moosefs/archive/v3.0.96.tar.gzscp src/v3.0.96.tar.gz w3:/usr/local/src##scp v3.0.96.tar.gz到除w2的主机##tar xvf v3.0.96.tar.gzcd moosefs-3.0.96/##解压等这些命令在后面就打出来了

## Master主机w1 &&w2(因为是安装在共享存储里,所以装一台就行)

[root@w1 moosefs-3.0.96]# ./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs --disable-mfschunkserver --disable-mfsmount[root@w1 moosefs-3.0.96]# make && make install[root@w1 mfs]# pwd/usr/local/mfs/etc/mfs[root@w1 mfs]# cp mfsexports.cfg.sample mfsexports.cfg[root@w1 mfs]# cp mfsmaster.cfg.sample mfsmaster.cfg[root@w1 mfs]# vim mfsmaster.cfg
##mfsmaster.cfg直接使用官方的默认配置就行

[root@w1 mfs]# vim mfsexports.cfg*                       /       rw,alldirs,mapall=mfs:mfs,password=000000*                       .       rw
##修改控制文件
[root@w1 mfs]# pwd/usr/local/mfs/var/mfs[root@w1 mfs]# cp metadata.mfs.empty metadata.mfs

##需手动开启元数据文件


[root@w1 system]# pwd/usr/lib/systemd/system[root@w1 system]# cat mfsmaster.service [Unit]Description=mfsAfter=network.target   [Service]Type=forkingExecStart=/usr/local/mfs/sbin/mfsmaster startExecStop=/usr/local/mfs/sbin/mfsmaster stopPrivateTmp=true   [Install]WantedBy=multi-user.target

##编写启动脚本,记得要给执行权限

##scp同步到另一台master上

systemctl enable drbd mfsmastersystemctl stop drbd  mfsmaster

##因为要让crm接管服务所以要把服务先关掉,但要设置开机自启,这样在crm中才能找到这个服务



Metalogger主机(w3)

[root@w3 moosefs-3.0.96]# ./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs  --disable-mfschunkserver --disable-mfsmount[root@w3 moosefs-3.0.96]# make && make install[root@w3 ~]# cd /usr/local/mfs/etc/mfs/[root@w3 mfs]# cp mfsmetalogger.cfg.sample mfsmetalogger.cfg[root@w3 mfs]# cat mfsmetalogger.cfg | grep MASTER_HOSTMASTER_HOST = 192.168.37.200

##这里我直接填写到时高可用配置的VIP地址

[root@w3 ~]# /usr/local/mfs/sbin/mfsmetalogger start[root@w3 ~]# netstat -ntlp | grep metalogger[root@w3 ~]# netstat -ntlp | grep 9419

##启动Metalogger Server,但是我这里是直接指向Master的VIP(暂时没有配置)所以是看不到任何东西的,所以我直接不启动它。如果想查看,把MASTER_HOST的IP直接指向其中一台master的IP就行






Chuck servers主机(w4、w5)

cd moosefs-3.0.96/./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs  --disable-mfsmaster --disable-mfsmountmake && make installcd /usr/local/mfs/etc/mfscp mfschunkserver.cfg.sample mfschunkserver.cfgcat mfschunkserver.cfg | grep MASTER_HOSTMASTER_HOST = 192.168.37.200cat mfshdd.cfg | grep html/html

##目录名称,自己定义就行

mkdir /htmlchown -R mfs:mfs /html//usr/local/mfs/sbin/mfschunkserver startnetstat -lantp|grep 9420

##同样是没配VIP,所以你懂的




客户端(w6)

[root@w6 ~]# yum install -y fuse fuse-devel[root@w6 ~]# modprobe fuse[root@w6 ~]# lsmod | grep fusefuse                   91874  1[root@w6 moosefs-3.0.96]# ./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs --disable-mfsmaster --disable-mfschunkserver --enable-mfsmount[root@w6 moosefs-3.0.96]# make && make install[root@w6 ~]# chown -R mfs:mfs /test[root@w6 ~]# /usr/local/mfs/bin/mfsmount /test -H 192.168.37.200 -p

##同样的这里没vip也是暂时没法验证(df -h),忘记密码的自己去看master的mfsexports.cfg

##也就是说最后挂载这句话暂时也是用不了的




4.  Pcs集群构建(w1、w2)

yum install -y pacemaker pcs psmisc policycoreutils-pythonsystemctl enable pcsdsystemctl start pcsdecho 000000 | passwd --stdin hacluster

##修改hacluster用户的密码

 

以下在w1执行就行

[root@w1 ~]# pcs cluster auth w1 w2Username: haclusterPassword: 000000w2: Authorizedw1: Authorized

##注册pcs集群主机

[root@w1 ~]# pcs cluster setup --name mycluster w1 w2 –force

##在集群上注册两台集群

[root@w1 corosync]# pcs cluster start --allw1: Starting Cluster...w2: Starting Cluster...

##启动集群

ps -ef | grep corosyncps -ef | grep pacemakercorosync-cfgtool -s

##查看集群状态

corosync-cmapctl | grep membersruntime.totem.pg.mrp.srp.members.1.config_version (u64) = 0runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(192.168.37.21) runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1runtime.totem.pg.mrp.srp.members.1.status (str) = joinedruntime.totem.pg.mrp.srp.members.2.config_version (u64) = 0runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(192.168.37.22) runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 1runtime.totem.pg.mrp.srp.members.2.status (str) = joined

##查看集群相关的子节点信息

pcs statusCluster name: myclusterWARNING: no stonith devices and stonith-enabled is not false##这里得现实隔离设备Stack: corosync   ##底层由哪个传递信息Current DC: w1 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum##DC指定的协调员(DC为全局仲裁节点,由所有节点选举出来)Last updated: Sat Oct 28 23:04:17 2017Last change: Sat Oct 28 22:45:43 2017 by hacluster via crmd on w12 nodes configured0 resources configuredOnline: [ w1 w2 ]No resources   ##还没有资源,所以暂时为空Daemon Status:   ##各守护进程准备正常  corosync: active/disabled  pacemaker: active/disabled   pcsd: active/enabled

crm_verify -L -V   error: unpack_resources:Resource start-up disabled since no STONITH resources have been defined   error: unpack_resources:Either configure some or disable STONITH with the stonith-enabled option   error: unpack_resources:NOTE: Clusters with shared data need STONITH to ensure data integrityErrors found during check: config not valid

##查看集群是否有错,可以看出是STONITH设备的问题

[root@w1 corosync]# pcs property set stonith-enabled=falsecrm_verify -L -V

##可以看到关闭STONITH设备后,两台机都没出现错误






5. Crm,只在一个节点安装就行

##在开始前,先确认一下drbd,mfsmaster要已经关了,保险起见再restart一下corosync和pacemaker,原因是遇到这种情况:

crm(live)# statusStack: corosyncCurrent DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorumLast updated: Sun Oct 29 08:09:14 2017Last change: Sun Oct 29 08:08:18 2017 by root via cibadmin on w12 nodes configured2 resources configuredOnline: [ w1 w2 ]Full list of resources: Master/Slave Set: ms_mfs_drbd [mfs_drbd]     Stopped: [ w1 w2 ]Failed Actions:* mfs_drbd_monitor_0 on w2 'not configured' (6): call=12, status=complete, exitreason='meta parameter misconfigured, expected clone-max -le 2, but found unset.',    last-rc-change='Sun Oct 29 08:07:31 2017', queued=0ms, exec=77ms* mfs_drbd_monitor_0 on w1 'not configured' (6): call=12, status=complete, exitreason='meta parameter misconfigured, expected clone-max -le 2, but found unset.',last-rc-change='Sun Oct 29 08:07:31 2017', queued=1ms, exec=62ms


[root@w1 src]# cd crmsh-2.3.2[root@w1 crmsh-2.3.2]# python setup.py install

##yum默认源里是没有的,我是自己下的

##https://github.com/ClusterLabs/crmsh  GitHub里面有

systemctl start corosync pacemakersystemctl enable corosync pacemaker




6. drbd+mount资源配置

##开启配置工具

[root@w1 ~]# crm##开始配置crm,使其接管服务crm(live)configure# primitive mfs_drbd ocf:linbit:drbd params drbd_resource=mfs op monitor role=Master interval=10 timeout=20 op monitor role=Slave interval=20 timeout=20 op start timeout=240 op stop timeout=100 ##drbd定义资源名,drbd_resource定义drbd名,后面则是一些监控项crm(live)configure# verify##检查语法crm(live)configure# master mfs_drbd drbd meta master-max=1 master-node-max=2 clone-max=2 clone-node-max=1 notify="True"crm(live)configure# verifycrm(live)configure# commit##定义主资源crm(live)configure# primitive mfsmount ocf:heartbeat:Filesystem params device=/dev/drbd1 directory=/usr/local/mfs fstype=xfs op start interval=0 timeout=60s op stop interval=0 timeout=60scrm(live)configure# verifycrm(live)configure# colocation ms_mfs_drbd_mfsmount inf: mfsmount ms_mfs_drbd##排列约束资源,指定哪些资源捆绑一起,在同一节点上运行crm(live)configure# order ms_mfs_drbd_brfore_mfsmount Mandatory: ms_mfs_drbd:promote mfsmount:start##资源的启动先后顺序,指定排列约束中的资源启动顺序,该顺序和colocation顺序相反crm(live)configure# verifycrm(live)configure# commit##定义共享文件资源,并让服务器切换时可自动挂载## location位置约束,指定资源首选在哪些节点上运行crm(live)# statusStack: corosyncCurrent DC: w1 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorumLast updated: Sun Oct 29 08:46:54 2017Last change: Sun Oct 29 08:38:44 2017 by root via cibadmin on w12 nodes configured3 resources configuredOnline: [ w1 w2 ]Full list of resources: Master/Slave Set: ms_mfs_drbd [mfs_drbd]     Masters: [ w1 ]     Slaves: [ w2 ] mfsmount(ocf::heartbeat:Filesystem):Started w1##drbd挂载共享的配置好像到这里就没了




7.  Mfs+VIP资源的配置

crm(live)configure# primitive mfs systemd:mfsmaster op monitor interval=30s timeout=100s op start interval=0 timeout=100s op stop interval=0 timeout=100s##如果你设置timeout的时间小于100s,就会出下面的提示:## WARNING: mfs: specified timeout 30s for start is smaller than the advised 100##WARNING: mfs: specified timeout 30s for stop is smaller than the advised 100##不用理会他crm(live)configure# verifycrm(live)configure# colocation mfs_with_mfsmount inf: mfsmount mfs##亲缘性绑定crm(live)configure# order mfsmount_before_mfs Mandatory: mfsmount mfs##先启动mfsmount,再启mfscrm(live)configure# commit##注意,这里一定要做好绑定和启动顺序,不然,直白点,挂载在w1,mfs在w2,但我们的安装内容在drbd的共享里,你觉得会没出问题吗crm(live)configure# primitive vip ocf:heartbeat:IPaddr params ip=192.168.37.200crm(live)configure# colocation vip_with_mfs inf: mfs vipcrm(live)configure# order mfs_before_vip Mandatory: mfs vipcrm(live)configure# verifycrm(live)configure# commitcrm(live)# statusStack: corosyncCurrent DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorumLast updated: Sun Oct 29 09:26:59 2017Last change: Sun Oct 29 09:26:46 2017 by root via cibadmin on w12 nodes configured5 resources configuredOnline: [ w1 w2 ]Full list of resources: Master/Slave Set: ms_mfs_drbd [mfs_drbd]     Masters: [ w1 ]     Slaves: [ w2 ] mfsmount(ocf::heartbeat:Filesystem):Started w1 mfs(systemd:mfsmaster):Started w1 vip(ocf::heartbeat:IPaddr):Started w1

##服务都在w1起了


[root@w1 ~]# ip addr | grep 192.168    inet 192.168.37.21/24 brd 192.168.37.255 scope global ens33    inet 192.168.37.200/24 brd 192.168.37.255 scope global secondary ens33

##在w1可以看到vip,w2是没有的



8. 验证

启动Metalogger server

[root@w3 ~]# cat /usr/lib/systemd/system/mfsmetalogger.service[Unit]Description=mfsAfter=network.target   [Service]Type=forkingExecStart=/usr/local/mfs/sbin/mfsmetalogger startExecStop=/usr/local/mfs/sbin/mfsmetalogger stopPrivateTmp=true   [Install]WantedBy=multi-user.target[root@w3 ~]# systemctl enable mfsmetalogger[root@w3 ~]# systemctl start mfsmetalogger

启动chuck server(w4、w5)

cat /usr/lib/systemd/system/mfschuck.service [Unit]Description=mfsAfter=network.target   [Service]Type=forkingExecStart=/usr/local/mfs/sbin/mfschunkserver startExecStop=/usr/local/mfs/sbin/mfschunkserver stopPrivateTmp=true   [Install]WantedBy=multi-user.targetsystemctl enable mfschucksystemctl start mfschuck


客户端挂载

[root@w6 ~]# /usr/local/mfs/bin/mfsmount /test -H 192.168.37.200 -pMFS Password:   ##密码000000mfsmaster accepted connection with parameters: read-write,restricted_ip,map_all ; root mapped to mfs:mfs ; users mapped to mfs:mfs[root@w6 ~]# df -h | grep test192.168.37.200:9421   16G  3.6G   13G  23% /test##挂载成功[root@w6 test]# touch 1.txt[root@w6 test]# echo "I am WSL" > 1.txt [root@w6 test]# cat 1.txt I am WSL##写入读取完全没问题


cd /usr/local/mfs/var/mfs/

##可以去master和metalogger这个路径,里面的文件有记录你得操作



9. Master故障测试
[root@w1 ~]# pcs statusCluster name: myclusterStack: corosyncCurrent DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorumLast updated: Sun Oct 29 10:06:24 2017Last change: Sun Oct 29 09:26:46 2017 by root via cibadmin on w12 nodes configured5 resources configuredOnline: [ w1 w2 ]Full list of resources: Master/Slave Set: ms_mfs_drbd [mfs_drbd]     Masters: [ w1 ]     Slaves: [ w2 ] mfsmount(ocf::heartbeat:Filesystem):Started w1 mfs(systemd:mfsmaster):Started w1 vip(ocf::heartbeat:IPaddr):Started w1Daemon Status:  corosync: active/enabled  pacemaker: active/enabled  pcsd: active/enabled

##这是w1未关机前的状态




[root@w2 ~]# pcs statusCluster name: myclusterStack: corosyncCurrent DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorumLast updated: Sun Oct 29 10:08:26 2017Last change: Sun Oct 29 09:26:46 2017 by root via cibadmin on w12 nodes configured5 resources configuredOnline: [ w2 ]OFFLINE: [ w1 ]Full list of resources: Master/Slave Set: ms_mfs_drbd [mfs_drbd]     Masters: [ w2 ]     Stopped: [ w1 ] mfsmount(ocf::heartbeat:Filesystem):Started w2 mfs(systemd:mfsmaster):Started w2 vip(ocf::heartbeat:IPaddr):Started w2Daemon Status:  corosync: active/enabled  pacemaker: active/enabled  pcsd: active/enabled

##在w2一直刷pcs status可以看到资源一个个慢慢关掉,再按照之前定义的顺序启动



[root@w6 test]# echo 123 > test.txt[root@w6 test]# cat test.txt 123

##客户端挂载照样能用

[root@w2 mfs]# cat changelog.0.mfs 2: 1509286097|CSDBOP(0,3232245016,9422,1)3: 1509286097|CHUNKADD(1,1,1509890897)4: 1509286100|CSDBOP(0,3232245017,9422,2)5: 1509286101|SESADD(#9773763645954448363,1,16,0000,1000,1000,1000,1000,1,9,0,4294967295,3232245018,/test):16: 1509286124|ACQUIRE(1,1)7: 1509286124|ACQUIRE(1,2)8: 1509286202|CREATE(1,test.txt,1,438,18,1000,1000,0):29: 1509286203|WRITE(2,0,1,0):210: 1509286204|AMTIME(2,1509286202,1509286203,1509286203)11: 1509286204|LENGTH(2,4,0)12: 1509286204|UNLOCK(2)13: 1509286208|ACCESS(1)14: 1509286209|AMTIME(2,1509286208,1509286203,1509286203)


10. 假设

假如你的运气真的很差,两台mfsmaster主机都挂了,没事metaloggerserver还在,把metalogger升为mfsmaster就行了

具体操作这里就不演示了,大概就是把chuckserver和本机的MASTER_HOST改为本机,启动本机的mfsmaster,重启chuckserver的服务,大概就这样了,具体可以自行查询如何升metalogger成mfsmaster