# Probleme mit DRBD, OCFS2 und Vmware-Server

## simp

Guten Abend Zusammen, 

nach langem probieren und googlen komme ich hier nicht weiter. Ich habe zwei identische Maxdata Server mit Raid1. Beide Server sind mit 2x 36 GB SCSI HDD ausgestattet und haben 2 GB RAM und 2 LAN Karten, jeweils 1GBit. Meine Stage4 Installation beinhaltet hardened-kernel  2.6.24-hardened-r3 und dhcpcd, dnsmasq, openntp, syslog-ng und fcron. Beider Server Node1 und Node2 haben folgende Partitions:

```

Disk /dev/sda: 36.6 GB, 36660072960 bytes

255 heads, 63 sectors/track, 4457 cylinders

Units = cylinders of 16065 * 512 = 8225280 bytes

Disk identifier: 0x000f2571

   Device Boot      Start         End      Blocks   Id  System

/dev/sda1   *           1          13      104391   83  Linux

/dev/sda2              14         257     1959930   82  Linux swap / Solaris

/dev/sda3             258        1474     9775552+  83  Linux

/dev/sda4            1475        4457    23960947+  8e  Linux LVM

```

/boot und / sind jeweils ext2 und ext3 formatiert. Auf beiden Nodes läuft drbd-8.0.12 und drbd-kernel-8.0.12. Ich habe die Partition sda4 freigegeben. 

/etc/drbd.conf

```

global {

        usage-count no;

}

common {

}

resource "drbd0" {

        # transfer protocol to use.

        # C: write IO is reported as completed, if we know it has

        #    reached _both_ local and remote DISK.

        #    * for critical transactional data.

        # B: write IO is reported as completed, if it has reached

        #    local DISK and remote buffer cache.

        #    * for most cases.

        # A: write IO is reported as completed, if it has reached

        #    local DISK and local tcp send buffer. (see also sndbuf-size)

        #    * for high latency networks

        #

        protocol C;

        handlers {

                # what should be done in case the cluster starts up in

                # degraded mode, but knows it has inconsistent data.

                # pri-on-incon-degr "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";

                # pri-on-incon-degr "echo 'DRBD: primary requested but inconsistent!' | wall; /etc/init.d/heartbeat stop"; #"halt -f";

                # pri-lost-after-sb "echo 'DRBD: primary requested but lost!' | wall; /etc/init.d/heartbeat stop"; #"halt -f";

                # pri-on-incon-degr "echo o > /proc/sysrq-trigger";

                # pri-lost-after-sb "echo o > /proc/sysrq-trigger";

                # local-io-error "echo o > /proc/sysrq-trigger";

        }

        startup {

                #The init script drbd(8) blocks the boot process until the DRBD resources are connected.  When the  cluster  manager

                #starts later, it does not see a resource with internal split-brain.  In case you want to limit the wait time, do it

                #here.  Default is 0, which means unlimited. The unit is seconds.

                wfc-timeout 0;  # 2 minutes

                # Wait for connection timeout if this node was a degraded cluster.

                # In case a degraded cluster (= cluster with only one node left)

                # is rebooted, this timeout value is used.

                #

                degr-wfc-timeout 120;    # 2 minutes.

                become-primary-on both;  # drbdadm primary all

        }

        syncer {

                rate 100M;

                # This is now expressed with "after res-name"

                #group 1;

                al-extents 257;

        }

        net {

                # TODO: Should these timeouts be relative to some heartbeat settings?

                # timeout       60;    #  6 seconds  (unit = 0.1 seconds)

                # connect-int   10;    # 10 seconds  (unit = 1 second)

                # ping-int      10;    # 10 seconds  (unit = 1 second)

                # if the connection to the peer is lost you have the choice of

                #  "reconnect"   -> Try to reconnect (AKA WFConnection state)

                #  "stand_alone" -> Do not reconnect (AKA StandAlone state)

                #  "freeze_io"   -> Try to reconnect but freeze all IO until

                #                   the connection is established again.

                # FIXME This appears to be obsoleate

                # on-disconnect reconnect;

                # FIXME Experemental Crap

                #cram-hmac-alg "sha256";

                #shared-secret "secretPassword555";

                #after-sb-0pri discard-younger-primary;

                #after-sb-1pri consensus;

                #after-sb-2pri disconnect;

                #rr-conflict disconnect;

                allow-two-primaries;

        }

        disk {

                # if the lower level device reports io-error you have the choice of

                #  "pass_on"  ->  Report the io-error to the upper layers.

                #                 Primary   -> report it to the mounted file system.

                #                 Secondary -> ignore it.

                #  "panic"    ->  The node leaves the cluster by doing a kernel panic.

                #  "detach"   ->  The node drops its backing storage device, and

                #                 continues in disk less mode.

                #

                on-io-error   pass_on;

                # Under  fencing  we understand preventive measures to avoid situations where both nodes are

                # primary and disconnected (AKA split brain).

                fencing dont-care;

                # In case you only want to use a fraction of the available space

                # you might use the "size" option here.

                #

                # size 10G;

        }

        on node1 {

                device          /dev/drbd0;

                disk            /dev/sda4;

                address         192.168.0.1:7788;

                meta-disk       internal;

        }

        on node2 {

                device          /dev/drbd0;

                disk            /dev/sda4;

                address         192.168.0.2:7788;

                meta-disk       internal;

        }

}

```

Zur besseren Lastenverteilung habe ich den normalen Traffic (eth0) vom DRBD (eth1) Netz getrennt. Alle Dienste laufen und eine Synchronisation ist möglich. Beide Systeme zeigen mir folgendes:

```

version: 8.0.12 (api:86/proto:86)

GIT-hash: 5c9f89594553e32adb87d9638dce591782f947e3 build by root@node1, 2008-08-04 01:11:49

 0: cs:Connected st:Primary/Primary ds:UpToDate/UpToDate C r---

    ns:2425980 nr:336994 dw:338914 dr:2576160 al:84 bm:551 lo:0 pe:0 ua:0 ap:0

        resync: used:0/61 hits:65659 misses:147 starving:0 dirty:0 changed:147

        act_log: used:0/257 hits:990 misses:84 starving:0 dirty:0 changed:84

```

Als weiteren Schritt für meinen HA-Cluster habe mich, nach langem testen gegen GFS2 entschieden und verwende OCFS2. Die Konfiguration von OCFS2 fiel mir wesentlich leichter und bracht mich schneller zum Ziel. Obwohl ich beides ausprobiert und in Verbindung mit drbd getestet habe. Hier noch schnell die OCFS Konfiguration:(sys-fs/ocfs2-tools-1.3.9)

/etc/ocfs2/cluster:

```

node:

        ip_port = 7777

        ip_address = 10.0.2.101

        number = 0

        name = node1

        cluster = ocfs2

node:

        ip_port = 7777

        ip_address = 10.0.2.102

        number = 1

        name = node2

        cluster = ocfs2

cluster:

        node_count = 2

        name = ocfs2

```

Beide Dienste laufen auch ohne Probleme. Nun ist es möglich auf beiden Server das gewünschte geshared Verzeichnis zu mounten.

Ein formatieren mit mkfs.ocfs2 /dev/drbd0 vorausgesetzt. 

Nun kann ich das Verzeichnis auf beiden Servern gleichzeitig ansprechen und mit Daten füllen. Alles funktioniert problemlos. Aber sobald ich Vmware-server starte und die VMs auf das geshared Verzeichnis lege und die VM auf einem der beiden Server starte bzw Windows installieren will, bricht während der Installation die DRBD Verbindung zusammen und das komplette System friert ein. Ich kann mich nicht mal mehr an der Console anmelden. 

```

rbd0: Resync done (total 53 sec; paused 0 sec; 25888 K/sec)

drbd0: conn( SyncSource -> Connected ) pdsk( Inconsistent -> UpToDate )

drbd0: Writing meta data super block now.

ocfs2_dlm: Nodes in domain ("E1F4ABD93CB94CB7A3FEEA43E919174B"): 0

kjournald starting.  Commit interval 5 seconds

ocfs2: Mounting device (147,0) on (node 0, slot 0) with ordered data mode.

o2net: accepted connection from node node2 (num 1) at 10.0.2.102:7777

ocfs2_dlm: Node 1 joins domain E1F4ABD93CB94CB7A3FEEA43E919174B

ocfs2_dlm: Nodes in domain ("E1F4ABD93CB94CB7A3FEEA43E919174B"): 0 1

[5660]: Module vmmon: unloaded

bridge-eth0: down

bridge-eth0: detached

[5750]: VMCI: Driver initialized.

[5750]: Module vmmon: registered with major=10 minor=165

[5750]: Module vmmon: initialized

/dev/vmnet: open called by PID 5792 (vmnet-bridge)

/dev/vmnet: hub 0 does not exist, allocating memory.

/dev/vmnet: port on hub 0 successfully opened

bridge-eth0: enabling the bridge

bridge-eth0: up

bridge-eth0: already up

bridge-eth0: attached

drbd0: PingAck did not arrive in time.

drbd0: peer( Primary -> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown )

drbd0: asender terminated

drbd0: Terminating asender thread

drbd0: Creating new current UUID

drbd0: Writing meta data super block now.

drbd0: short read expecting header on sock: r=-512

drbd0: tl_clear()

drbd0: Connection closed

drbd0: conn( NetworkFailure -> Unconnected )

drbd0: receiver terminated

drbd0: receiver (re)started

drbd0: conn( Unconnected -> WFConnection )

o2net: no longer connected to node node2 (num 1) at 10.0.2.102:7777

(6063,2):dlm_get_lock_resource:913 E1F4ABD93CB94CB7A3FEEA43E919174B:M0000000000000000000019987a2e83: at least one node (1) to recover before lock mastery can begin

(6063,2):dlm_get_lock_resource:967 E1F4ABD93CB94CB7A3FEEA43E919174B:M0000000000000000000019987a2e83: at least one node (1) to recover before lock mastery can begin

(5602,0):dlm_get_lock_resource:913 E1F4ABD93CB94CB7A3FEEA43E919174B:$RECOVERY: at least one node (1) to recover before lock mastery can begin

(5602,0):dlm_get_lock_resource:947 E1F4ABD93CB94CB7A3FEEA43E919174B: recovery map is not empty, but must master $RECOVERY lock now

(6063,2):ocfs2_replay_journal:996 Recovering node 1 from slot 1 on device (147,0)

kjournald starting.  Commit interval 5 seconds

r8169: eth1: link down

r8169: eth1: link up

drbd0: Handshake successful: DRBD Network Protocol version 86

drbd0: conn( WFConnection -> WFReportParams )

drbd0: Starting asender thread (from drbd0_receiver [4976])

drbd0: peer( Unknown -> Secondary ) conn( WFReportParams -> WFBitMapS ) pdsk( DUnknown -> UpToDate )

drbd0: Writing meta data super block now.

drbd0: conn( WFBitMapS -> SyncSource ) pdsk( UpToDate -> Inconsistent )

drbd0: Began resync as SyncSource (will sync 1052700 KB [263175 bits set]).

drbd0: Writing meta data super block now.

drbd0: peer( Secondary -> Primary )

drbd0: Resync done (total 39 sec; paused 0 sec; 26992 K/sec)

drbd0: conn( SyncSource -> Connected ) pdsk( Inconsistent -> UpToDate )

drbd0: Writing meta data super block now.

```

Ich kann das Problem nicht nachvollziehen, denn ich kann beide Server normal ansprechen, kann auf beiden Servern das geshared Verzeichnis ansprechen und das auch mit 34 Mb/s. Nur VMware will nicht. Aber warum ?

Keins der Logs gibt etwas her weder syslog noch kernel.log. 

Hat jemand eine Idee ? 

Vielen Dank für eure Hilfe   :Very Happy: 

PS: habe auch schon die  Netzwerkkarten/Switch getauscht, nix. Gleiches Problem. Das gleiche passiert auch mit GFS2. 

```

Portage 2.1.4.4 (hardened/x86/2.6, gcc-3.4.6, glibc-2.6.1-r0, 2.6.24-hardened-r3 i686)

=================================================================

System uname: 2.6.24-hardened-r3 i686 Intel(R) Xeon(TM) CPU 2.80GHz

Timestamp of tree: Sun, 03 Aug 2008 21:30:01 +0000

app-shells/bash:     3.2_p33

dev-lang/python:     2.5.2-r6

sys-apps/baselayout: 1.12.11.1

sys-apps/sandbox:    1.2.18.1-r2

sys-devel/autoconf:  2.61-r2

sys-devel/automake:  1.10.1

sys-devel/binutils:  2.18-r3

sys-devel/gcc-config: 1.4.0-r4

sys-devel/libtool:   1.5.26

virtual/os-headers:  2.6.23-r3

ACCEPT_KEYWORDS="x86"

CBUILD="i686-pc-linux-gnu"

CFLAGS="-march=i686 -O2 -pipe -fomit-frame-pointer -fforce-addr -mno-tls-direct-seg-refs"

CHOST="i686-pc-linux-gnu"

CONFIG_PROTECT="/etc"

CONFIG_PROTECT_MASK="/etc/env.d /etc/fonts/fonts.conf /etc/gconf /etc/revdep-rebuild /etc/terminfo /etc/udev/rules.d"

CXXFLAGS="-march=i686 -O2 -pipe -fomit-frame-pointer -fforce-addr -mno-tls-direct-seg-refs"

DISTDIR="/usr/portage/distfiles"

FEATURES="autoconfig candy distlocks fixpackages metadata-transfer parallel-fetch sandbox sfperms strict unmerge-orphans userfetch"

GENTOO_MIRRORS="http://distfiles.gentoo.org http://distro.ibiblio.org/pub/linux/distributions/gentoo"

LANG="en_US.utf8"

LC_ALL="en_US.utf8"

LINGUAS="de"

MAKEOPTS="-j2"

PKGDIR="/usr/portage/packages"

PORTAGE_RSYNC_OPTS="--recursive --links --safe-links --perms --times --compress --force --whole-file --delete --stats --timeout=180 --exclude=/distfiles --exclude=/local --exclude=/packages"

PORTAGE_TMPDIR="/var/tmp"

PORTDIR="/usr/portage"

PORTDIR_OVERLAY="/usr/portage/local"

SYNC="rsync://rsync.gentoo.org/gentoo-portage"

USE="3dnow 3dnowext berkdb bzip2 clamav cracklib crypt gmp hardened iproute2 logrotate mmx mmxext ncurses nls nptl nptlonly pam perl pic python readline screen slang sse sse2 ssl tcpd threads unicode urandom x86 xml zlib" ALSA_CARDS="ali5451 als4000 atiixp atiixp-modem bt87x ca0106 cmipci emu10k1  emu10k1x ens1370 ens1371 es1938 es1968 fm801 hda-intel intel8x0 intel8x0m  maestro3 trident usb-audio via82xx via82xx-modem ymfpci" ALSA_PCM_PLUGINS="adpcm alaw asym copy dmix dshare dsnoop empty extplug file hooks iec958 ioplug ladspa lfloat linear meter mmap_emul mulaw multi null plug rate route share shm softvol" APACHE2_MODULES="actions alias auth_basic authn_alias authn_anon authn_dbm authn_default authn_file authz_dbm authz_default authz_groupfile authz_host authz_owner authz_user autoindex cache dav dav_fs dav_lock deflate dir disk_cache env expires ext_filter file_cache filter headers include info log_config logio mem_cache mime mime_magic negotiation rewrite setenvif speling status unique_id userdir usertrack vhost_alias" ELIBC="glibc" INPUT_DEVICES="mouse keyboard" KERNEL="linux" LCD_DEVICES="bayrad cfontz cfontz633 glk hd44780 lb216 lcdm001 mtxorb ncurses text" LINGUAS="de" USERLAND="GNU" VIDEO_CARDS="apm ark chips cirrus cyrix dummy fbdev glint i128 i740 i810 imstt      mach64 mga neomagic nsc nv r128 radeon rendition s3 s3virge savage      siliconmotion sis sisusb tdfx tga trident tseng v4l vesa vga via vmware    voodoo"

Unset:  CPPFLAGS, CTARGET, EMERGE_DEFAULT_OPTS, INSTALL_MASK, LDFLAGS, PORTAGE_COMPRESS, PORTAGE_COMPRESS_FLAGS, PORTAGE_RSYNC_EXTRA_OPTS

```

----------

