diff mbox

make mkcephfs and init-ceph osd filesystem handling more flexible

Message ID 1344526971-17035-1-git-send-email-danny.kukawka@bisect.de (mailing list archive)
State New, archived
Headers show

Commit Message

Danny Kukawka Aug. 9, 2012, 3:42 p.m. UTC
Remove btrfs specific keys and replace them by more generic
keys to be able to replace btrfs with e.g. xfs or ext4 easily.

Add new key to define the osd fs type: 'fstype', which can get
defined in the [osd] section for all OSDs.

Replace:
- 'btrfs devs' -> 'devs'
- 'btrfs path' -> 'fs path'
- 'btrfs options' -> 'fs options'
- mkcephfs: replace --mkbtrfs with --mkfs
- init-ceph: replace --btrfs with --fsmount, --nobtrfs
with --nofsmount, --btrfsumount with --fsumount

Update documentation, manpage and example config files.

Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de>
---
 doc/man/8/mkcephfs.rst                      |   17 +++-----
 man/mkcephfs.8                              |   15 +++----
 src/ceph.conf.twoosds                       |    7 ++--
 src/init-ceph.in                            |   50 +++++++++++++---------
 src/mkcephfs.in                             |   60 +++++++++++++++++----------
 src/sample.ceph.conf                        |   15 ++++---
 src/test/cli/osdmaptool/ceph.conf.withracks |    3 +-
 7 Dateien geändert, 95 Zeilen hinzugefügt(+), 72 Zeilen entfernt(-)

Comments

Tommi Virtanen Aug. 9, 2012, 4:26 p.m. UTC | #1
On Thu, Aug 9, 2012 at 8:42 AM, Danny Kukawka <danny.kukawka@bisect.de> wrote:
> Remove btrfs specific keys and replace them by more generic
> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
>
> Add new key to define the osd fs type: 'fstype', which can get
> defined in the [osd] section for all OSDs.

I'm going to say let's not do this. In fact, I've wanted to remove the
"btrfs devs" option for a while now, as it keeps leading people down
the wrong path:
https://github.com/ceph/ceph/commits/kill-btrfs-devs

mkcephfs is not a viable route forward. For example, it is unable to
expand a pre-existing cluster.

The new "OSD hotplugging" style init is much, much nicer. And does
more than just mkfs & mount.

>
> Replace:
> - 'btrfs devs' -> 'devs'
> - 'btrfs path' -> 'fs path'
> - 'btrfs options' -> 'fs options'
> - mkcephfs: replace --mkbtrfs with --mkfs
> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
> with --nofsmount, --btrfsumount with --fsumount
>
> Update documentation, manpage and example config files.
>
> Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de>
> ---
>  doc/man/8/mkcephfs.rst                      |   17 +++-----
>  man/mkcephfs.8                              |   15 +++----
>  src/ceph.conf.twoosds                       |    7 ++--
>  src/init-ceph.in                            |   50 +++++++++++++---------
>  src/mkcephfs.in                             |   60 +++++++++++++++++----------
>  src/sample.ceph.conf                        |   15 ++++---
>  src/test/cli/osdmaptool/ceph.conf.withracks |    3 +-
>  7 Dateien geändert, 95 Zeilen hinzugefügt(+), 72 Zeilen entfernt(-)
>
> diff --git a/doc/man/8/mkcephfs.rst b/doc/man/8/mkcephfs.rst
> index ddc378a..dd3fbd5 100644
> --- a/doc/man/8/mkcephfs.rst
> +++ b/doc/man/8/mkcephfs.rst
> @@ -70,20 +70,15 @@ Options
>     default is ``/etc/ceph/keyring`` (or whatever is specified in the
>     config file).
>
> -.. option:: --mkbtrfs
> +.. option:: --mkfs
>
> -   Create and mount the any btrfs file systems specified in the
> -   ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
> -   and (if it differs from "osd data") "btrfs path" options must be
> -   defined.
> +   Create and mount any file system specified in the ceph.conf for
> +   OSD data storage using mkfs. The "devs" and (if it differs from
> +   "osd data") "fs path" options must be defined.
>
>     **NOTE** Btrfs is still considered experimental.  This option
> -   can ease some configuration pain, but is the use of btrfs is not
> -   required when ``osd data`` directories are mounted manually by the
> -   adminstrator.
> -
> -   **NOTE** This option is deprecated and will be removed in a future
> -   release.
> +   can ease some configuration pain, but is not required when
> +   ``osd data`` directories are mounted manually by the adminstrator.
>
>  .. option:: --no-copy-conf
>
> diff --git a/man/mkcephfs.8 b/man/mkcephfs.8
> index 8544a01..22a5335 100644
> --- a/man/mkcephfs.8
> +++ b/man/mkcephfs.8
> @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
>  .
>  .SH SYNOPSIS
>  .nf
> -\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkbtrfs ] [ \-a, \-\-all\-hosts [ \-k
> +\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkfs ] [ \-a, \-\-all\-hosts [ \-k
>  \fI/path/to/admin.keyring\fP ] ]
>  .fi
>  .sp
> @@ -111,19 +111,16 @@ config file).
>  .UNINDENT
>  .INDENT 0.0
>  .TP
> -.B \-\-mkbtrfs
> -Create and mount the any btrfs file systems specified in the
> -ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
> -and (if it differs from "osd data") "btrfs path" options must be
> +.B \-\-mkfs
> +Create and mount any file systems specified in the
> +ceph.conf for OSD data storage using mkfs.*. The "devs"
> +and (if it differs from "osd data") "fs path" options must be
>  defined.
>  .sp
>  \fBNOTE\fP Btrfs is still considered experimental.  This option
> -can ease some configuration pain, but is the use of btrfs is not
> +can ease some configuration pain, but the use of this option is not
>  required when \fBosd data\fP directories are mounted manually by the
>  adminstrator.
> -.sp
> -\fBNOTE\fP This option is deprecated and will be removed in a future
> -release.
>  .UNINDENT
>  .INDENT 0.0
>  .TP
> diff --git a/src/ceph.conf.twoosds b/src/ceph.conf.twoosds
> index c0cfc68..05ca754 100644
> --- a/src/ceph.conf.twoosds
> +++ b/src/ceph.conf.twoosds
> @@ -67,7 +67,8 @@
>         debug journal = 20
>         log dir = /data/cosd$id
>         osd data = /mnt/osd$id
> -       btrfs options = "flushoncommit,usertrans"
> +       fs options = "flushoncommit,usertrans"
> +       fstype = btrfs
>  ;      user = root
>
>  ;      osd journal = /mnt/osd$id/journal
> @@ -75,8 +76,8 @@
>         osd journal = "/dev/disk/by-path/pci-0000:05:02.0-scsi-6:0:0:0"
>  ;      filestore max sync interval = 1
>
> -       btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
> -;      btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
> +       devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
> +;      devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
>  ;                 /dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 \
>  ;                 /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 \
>  ;                 /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 \
> diff --git a/src/init-ceph.in b/src/init-ceph.in
> index a8c5a29..32bcc9a 100644
> --- a/src/init-ceph.in
> +++ b/src/init-ceph.in
> @@ -100,8 +100,8 @@ docrun=
>  allhosts=0
>  debug=0
>  monaddr=
> -dobtrfs=1
> -dobtrfsumount=0
> +dofsmount=1
> +dofsumount=0
>  verbose=0
>
>  while echo $1 | grep -q '^-'; do     # FIXME: why not '^-'?
> @@ -130,14 +130,14 @@ case $1 in
>             shift
>             MON_ADDR=$1
>             ;;
> -    --btrfs)
> -           dobtrfs=1
> +    --fsmount)
> +           dofsmount=1
>             ;;
> -    --nobtrfs)
> -           dobtrfs=0
> +    --nofsmount)
> +           dofsmount=0
>             ;;
> -    --btrfsumount)
> -           dobtrfsumount=1
> +    --fsumount)
> +           dofsumount=1
>             ;;
>      --conf | -c)
>             [ -z "$2" ] && usage_exit
> @@ -222,9 +222,9 @@ for name in $what; do
>
>      if echo $name | grep -q ^osd; then
>         get_conf osd_data "" "osd data"
> -       get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
> -       get_conf btrfs_devs "" "btrfs devs"
> -       first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
> +       get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
> +       get_conf fs_devs "" "devs"
> +       first_dev=`echo $fs_devs | cut '-d ' -f 1`
>      fi
>
>      # do lockfile, if RH
> @@ -262,13 +262,25 @@ for name in $what; do
>
>             cmd="$wrap $cmd $runmode"
>
> -           if [ $dobtrfs -eq 1 ] && [ -n "$btrfs_devs" ]; then
> +           if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
>                 get_conf pre_mount "true" "pre mount command"
> -               get_conf btrfs_opt "noatime" "btrfs options"
> -               [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
> +               get_conf fs_opt "noatime" "fs options"
> +               get_conf fs_type "" "fstype"
> +
> +               if [ -z "$fs_type" ]; then
> +                   echo No filesystem type defined!
> +                   exit 0
> +                fi
> +
> +               [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
>                 [ -n "$pre_mount" ] && do_cmd "$pre_mount"
> -               echo Mounting Btrfs on $host:$btrfs_path
> -               do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $btrfs_path' /proc/mounts || mount -t btrfs $btrfs_opt $first_dev $btrfs_path"
> +
> +               if [ "$fs_type" == "btrfs" ]; then
> +                   echo Mounting Btrfs on $host:$fs_path
> +                   do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
> +               else
> +                   do_root_cmd "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
> +               fi
>             fi
>             echo Starting Ceph $name on $host...
>             mkdir -p $RUN_DIR
> @@ -289,9 +301,9 @@ for name in $what; do
>             stop_daemon $name ceph-$type $pid_file
>             [ -n "$post_stop" ] && do_cmd "$post_stop"
>             [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
> -           if [ $dobtrfsumount -eq 1 ] && [ -n "$btrfs_devs" ]; then
> -               echo Unmounting Btrfs on $host:$btrfs_path
> -               do_root_cmd "umount $btrfs_path || true"
> +           if [ $dofsumount -eq 1 ] && [ -n "$fs_devs" ]; then
> +               echo Unmounting OSD volume on $host:$fs_path
> +               do_root_cmd "umount $fs_path || true"
>             fi
>             ;;
>
> diff --git a/src/mkcephfs.in b/src/mkcephfs.in
> index c507709..492d4b6 100644
> --- a/src/mkcephfs.in
> +++ b/src/mkcephfs.in
> @@ -60,7 +60,7 @@ else
>  fi
>
>  usage_exit() {
> -    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkbtrfs]"
> +    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkfs]"
>      echo "   to generate a new ceph cluster on all nodes; for advanced usage see man page"
>      echo "   ** be careful, this WILL clobber old data; check your ceph.conf carefully **"
>      exit
> @@ -70,7 +70,7 @@ usage_exit() {
>
>
>  allhosts=0
> -mkbtrfs=0
> +mkfs=0
>  preparemonmap=0
>  prepareosdfs=""
>  initdaemon=""
> @@ -130,8 +130,8 @@ case $1 in
>             preparemon=1
>              manual_action=1
>             ;;
> -    --mkbtrfs)
> -           mkbtrfs=1
> +    --mkfs)
> +           mkfs=1
>             ;;
>      --no-copy-conf)
>             nocopyconf=1
> @@ -306,21 +306,26 @@ if [ -n "$prepareosdfs" ]; then
>
>      get_conf osd_data "/var/lib/ceph/osd/ceph-$id" "osd data"
>      get_conf osd_journal "$osd_data/journal" "osd journal"
> -    get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
> -    get_conf btrfs_devs "" "btrfs devs"
> +    get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
> +    get_conf fs_devs "" "devs"
> +    get_conf fs_type "" "fstype"
>
> -    if [ -z "$btrfs_devs" ]; then
> -       echo "no btrfs devs defined for $name"
> +    if [ -z "$fs_devs" ]; then
> +       echo "no devs defined for $name"
> +       exit 0
> +    fi
> +    if [ -z "$fs_type" ]; then
> +       echo "no filesystem type defined for $name"
>         exit 0
>      fi
>
> -    first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
> -    get_conf btrfs_opt "noatime" "btrfs options"
> -    [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
> +    first_dev=`echo $fs_devs | cut '-d ' -f 1`
> +    get_conf fs_opt "noatime" "fs options"
> +    [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
>      get_conf osd_user "root" "user"
>
> -    if [ -n "$osd_journal" ] && echo "$btrfs_devs" | grep -q -w "$osd_journal" ; then
> -       echo "ERROR: osd journal device ($osd_journal) also used by btrfs devs ($btrfs_devs)"
> +    if [ -n "$osd_journal" ] && echo "fs_devs" | grep -q -w "$osd_journal" ; then
> +       echo "ERROR: osd journal device ($osd_journal) also used by devs ($fs_devs)"
>         exit 1
>      fi
>
> @@ -330,18 +335,27 @@ if [ -n "$prepareosdfs" ]; then
>         test -d $osd_journal || mkdir -p `dirname $osd_journal`
>      fi
>
> -    umount $btrfs_path || true
> -    for f in $btrfs_devs ; do
> +    umount $fs_path || true
> +    for f in $fs_devs ; do
>         umount $f || true
>      done
>
> -    modprobe btrfs || true
> -    mkfs.btrfs $btrfs_devs
> -    btrfs device scan || btrfsctl -a
> -    sync   # seems to fix problems for some people...
> -    mount -t btrfs $btrfs_opt $first_dev $btrfs_path
> -    chown $osd_user $btrfs_path
> -    chmod +w $btrfs_path
> +    if [ "$fs_type" == "btrfs" ]; then
> +        modprobe btrfs || true
> +        mkfs.btrfs $fs_devs
> +        btrfs device scan || btrfsctl -a
> +       sync # seems to fix problems for some people...
> +    elif [ "$fs_type" == "xfs" ]; then
> +        modprobe xfs || true
> +       mkfs.xfs -f $fs_devs
> +    else
> +       modprobe $fs_type || true
> +       mkfs.$fs_type $fs_devs
> +    fi
> +
> +    mount -t $fs_type $fs_opt $first_dev $fs_path
> +    chown $osd_user $fs_path
> +    chmod +w $fs_path
>
>      exit 0
>  fi
> @@ -459,7 +473,7 @@ if [ $allhosts -eq 1 ]; then
>             fi
>         fi
>
> -       if [ $mkbtrfs -eq 1 ] && [ "$type" = "osd" ]; then
> +       if [ $mkfs -eq 1 ] && [ "$type" = "osd" ]; then
>             do_root_cmd "$0 -d $rdir --prepare-osdfs $name"
>         fi
>
> diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf
> index 88f7f02..147777d 100644
> --- a/src/sample.ceph.conf
> +++ b/src/sample.ceph.conf
> @@ -131,27 +131,30 @@
>         ;debug filestore = 20
>         ;debug journal = 20
>
> +       ; The filesystem used on the volumes
> +       fstype = btrfs
> +
>  [osd.0]
>         host = delta
>
> -       ; if 'btrfs devs' is not specified, you're responsible for
> +       ; if 'devs' is not specified, you're responsible for
>         ; setting up the 'osd data' dir.  if it is not btrfs, things
>         ; will behave up until you try to recover from a crash (which
>         ; usually fine for basic testing).
> -       btrfs devs = /dev/sdx
> +       devs = /dev/sdx
>
>          ; If you want to specify some other mount options, you can do so.
>          ; The default values are rw,noatime
> -        ;btrfs options = rw,noatime
> +        ; options = rw,noatime
>
>  [osd.1]
>         host = epsilon
> -       btrfs devs = /dev/sdy
> +       devs = /dev/sdy
>
>  [osd.2]
>         host = zeta
> -       btrfs devs = /dev/sdx
> +       devs = /dev/sdx
>
>  [osd.3]
>         host = eta
> -       btrfs devs = /dev/sdy
> +       devs = /dev/sdy
> diff --git a/src/test/cli/osdmaptool/ceph.conf.withracks b/src/test/cli/osdmaptool/ceph.conf.withracks
> index 1e14411..87b0716 100644
> --- a/src/test/cli/osdmaptool/ceph.conf.withracks
> +++ b/src/test/cli/osdmaptool/ceph.conf.withracks
> @@ -42,7 +42,8 @@
>    keyring = /mnt/osd.$id/keyring
>    osd data = /mnt/osd.$id
>    osd journal = /dev/disk/by-label/osd.$id.journal
> -  btrfs devs = /dev/disk/by-label/osd.$id.data
> +  devs = /dev/disk/by-label/osd.$id.data
> +  fstype = btrfs
>  ; temp sage
>    debug osd = 20
>    debug ms = 1
> --
> 1.7.10.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jim Schutt Aug. 9, 2012, 4:46 p.m. UTC | #2
On 08/09/2012 10:26 AM, Tommi Virtanen wrote:
> mkcephfs is not a viable route forward. For example, it is unable to
> expand a pre-existing cluster.
>
> The new "OSD hotplugging" style init is much, much nicer. And does
> more than just mkfs&  mount.

I'm embarrassed to admit I haven't been keeping up with this,
but I seem to recall that early versions didn't handle a
journal on a partition.  Did I get that wrong, or maybe that
capability exists now?  In the past I've found it to have a
small performance benefit, and would hate to lose it.

Thanks -- Jim

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Danny Kukawka Aug. 9, 2012, 4:49 p.m. UTC | #3
Am 09.08.2012 18:26, schrieb Tommi Virtanen:
> On Thu, Aug 9, 2012 at 8:42 AM, Danny Kukawka <danny.kukawka@bisect.de> wrote:
>> Remove btrfs specific keys and replace them by more generic
>> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
>>
>> Add new key to define the osd fs type: 'fstype', which can get
>> defined in the [osd] section for all OSDs.
> 
> I'm going to say let's not do this. In fact, I've wanted to remove the
> "btrfs devs" option for a while now, as it keeps leading people down
> the wrong path:
> https://github.com/ceph/ceph/commits/kill-btrfs-devs
> 
> mkcephfs is not a viable route forward. For example, it is unable to
> expand a pre-existing cluster.
> 
> The new "OSD hotplugging" style init is much, much nicer. And does
> more than just mkfs & mount.

And where can I find this new "OSD hotplugging" style init? Is there any
documentation?

Danny
Tommi Virtanen Aug. 9, 2012, 4:53 p.m. UTC | #4
On Thu, Aug 9, 2012 at 9:46 AM, Jim Schutt <jaschut@sandia.gov> wrote:
> I'm embarrassed to admit I haven't been keeping up with this,
> but I seem to recall that early versions didn't handle a
> journal on a partition.  Did I get that wrong, or maybe that
> capability exists now?  In the past I've found it to have a
> small performance benefit, and would hate to lose it.

It's still not quite ready for prime time. Journal placement was cut
out to allow us to focus on the distributed aspects of it first; those
are now working pretty well. Putting much effort on the alternative is
probably not a good bet.

This ticket needs to get fixed (and defined better, first!) to have
more flexibility with journals:
http://tracker.newdream.net/issues/2398
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tommi Virtanen Aug. 9, 2012, 4:54 p.m. UTC | #5
On Thu, Aug 9, 2012 at 9:49 AM, Danny Kukawka <danny.kukawka@bisect.de> wrote:
> And where can I find this new "OSD hotplugging" style init? Is there any
> documentation?

mkcephfs does not use the new style. The Chef cookbooks we have do. If
you use the Juju Charms that Canonical has been working on, those
should also use the new way now.

Here's the installation documents using Chef:
http://ceph.com/docs/master/install/chef/
http://ceph.com/docs/master/config-cluster/chef/
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Danny Kukawka Aug. 9, 2012, 5:03 p.m. UTC | #6
Am 09.08.2012 18:53, schrieb Tommi Virtanen:
> On Thu, Aug 9, 2012 at 9:46 AM, Jim Schutt <jaschut@sandia.gov> wrote:
>> I'm embarrassed to admit I haven't been keeping up with this,
>> but I seem to recall that early versions didn't handle a
>> journal on a partition.  Did I get that wrong, or maybe that
>> capability exists now?  In the past I've found it to have a
>> small performance benefit, and would hate to lose it.
> 
> It's still not quite ready for prime time. Journal placement was cut
> out to allow us to focus on the distributed aspects of it first; those
> are now working pretty well. Putting much effort on the alternative is
> probably not a good bet.
> 
> This ticket needs to get fixed (and defined better, first!) to have
> more flexibility with journals:
> http://tracker.newdream.net/issues/2398

So you mean chef?! Will there be an alternative to simply setup a
cluster from console?

We (SUSE) are already working on an own chef ceph cookbook. But from
what I've seen till now it's really hard and more laborious to initially
setup a cluster with chef than with mkcephfs.

Danny
Tommi Virtanen Aug. 9, 2012, 5:12 p.m. UTC | #7
On Thu, Aug 9, 2012 at 10:03 AM, Danny Kukawka <danny.kukawka@bisect.de> wrote:
> So you mean chef?! Will there be an alternative to simply setup a
> cluster from console?
>
> We (SUSE) are already working on an own chef ceph cookbook. But from
> what I've seen till now it's really hard and more laborious to initially
> setup a cluster with chef than with mkcephfs.

I've written about this on the mailing list several times. We see a
lot of demand for Chef, but don't want to tie our hands -- Canonical
is working on Juju Charms, and I would like to see a mkcephfs
replacement that relies on just SSH connections from a workstation
node. We've made an explicit effort to improve the product as a whole,
and to make the Chef cookbook as thin as possible.

For some reason, the threading is broken on the archive, but this is a
fragment of the most recent thread that talked about this:

http://thread.gmane.org/gmane.comp.file-systems.ceph.devel/8263/focus=8265
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Aug. 10, 2012, 1:53 a.m. UTC | #8
On Thu, 9 Aug 2012, Tommi Virtanen wrote:
> On Thu, Aug 9, 2012 at 10:03 AM, Danny Kukawka <danny.kukawka@bisect.de> wrote:
> > So you mean chef?! Will there be an alternative to simply setup a
> > cluster from console?
> >
> > We (SUSE) are already working on an own chef ceph cookbook. But from
> > what I've seen till now it's really hard and more laborious to initially
> > setup a cluster with chef than with mkcephfs.
> 
> I've written about this on the mailing list several times. We see a
> lot of demand for Chef, but don't want to tie our hands -- Canonical
> is working on Juju Charms, and I would like to see a mkcephfs
> replacement that relies on just SSH connections from a workstation
> node. We've made an explicit effort to improve the product as a whole,
> and to make the Chef cookbook as thin as possible.
> 
> For some reason, the threading is broken on the archive, but this is a
> fragment of the most recent thread that talked about this:
> 
> http://thread.gmane.org/gmane.comp.file-systems.ceph.devel/8263/focus=8265

I think the real question is whether the planned "mkcephfs 2.0" is going 
to capture the equivalent functionality of being able to enumerate up 
front which osds will exist and which disks/journals they will use, and to 
bring them up.  Assuming it will (IMHO it needs to), can we make that 
compatible with the current way that mkcephfs is invoked (i.e., a 
ceph.conf file and a few command line args)?  Not everyone (and I daresay 
probably a minority) of users will be using Chef/Juju/Puppet/whatever, 
regardless of whether or not we feel that is the right way to do things 
and try to push them in that direction.

In any case, since the new osd hotplugging stuff isn't available now and 
is probably still a sprint or two off, I think we should consider applying 
this patch.  We aren't recommending btrfs across the board, and cluster 
bringup is currently painful on ext4/xfs/etc.  And, Danny already did the 
work.  :)

sage
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiaopong Tran Aug. 10, 2012, 2:01 a.m. UTC | #9
On 08/10/2012 09:53 AM, Sage Weil wrote:
> On Thu, 9 Aug 2012, Tommi Virtanen wrote:
>> On Thu, Aug 9, 2012 at 10:03 AM, Danny Kukawka <danny.kukawka@bisect.de> wrote:
>>> So you mean chef?! Will there be an alternative to simply setup a
>>> cluster from console?
>>>
>>> We (SUSE) are already working on an own chef ceph cookbook. But from
>>> what I've seen till now it's really hard and more laborious to initially
>>> setup a cluster with chef than with mkcephfs.
>>
>> I've written about this on the mailing list several times. We see a
>> lot of demand for Chef, but don't want to tie our hands -- Canonical
>> is working on Juju Charms, and I would like to see a mkcephfs
>> replacement that relies on just SSH connections from a workstation
>> node. We've made an explicit effort to improve the product as a whole,
>> and to make the Chef cookbook as thin as possible.
>>
>> For some reason, the threading is broken on the archive, but this is a
>> fragment of the most recent thread that talked about this:
>>
>> http://thread.gmane.org/gmane.comp.file-systems.ceph.devel/8263/focus=8265
>
> I think the real question is whether the planned "mkcephfs 2.0" is going
> to capture the equivalent functionality of being able to enumerate up
> front which osds will exist and which disks/journals they will use, and to
> bring them up.  Assuming it will (IMHO it needs to), can we make that
> compatible with the current way that mkcephfs is invoked (i.e., a
> ceph.conf file and a few command line args)?  Not everyone (and I daresay
> probably a minority) of users will be using Chef/Juju/Puppet/whatever,
> regardless of whether or not we feel that is the right way to do things
> and try to push them in that direction.
>
> In any case, since the new osd hotplugging stuff isn't available now and
> is probably still a sprint or two off, I think we should consider applying
> this patch.  We aren't recommending btrfs across the board, and cluster
> bringup is currently painful on ext4/xfs/etc.  And, Danny already did the
> work.  :)
>
> sage
> --

+1.

Rgds,

Xiaopong


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Aug. 10, 2012, 3:54 p.m. UTC | #10
On Thu, 9 Aug 2012, Danny Kukawka wrote:
> Remove btrfs specific keys and replace them by more generic
> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
> 
> Add new key to define the osd fs type: 'fstype', which can get
> defined in the [osd] section for all OSDs.
> 
> Replace:
> - 'btrfs devs' -> 'devs'
> - 'btrfs path' -> 'fs path'
> - 'btrfs options' -> 'fs options'
> - mkcephfs: replace --mkbtrfs with --mkfs
> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
> with --nofsmount, --btrfsumount with --fsumount
> 
> Update documentation, manpage and example config files.

Maybe this should keep the old options as well, so that --mkbtrfs is an 
alias for --mkfs --btrfs...

Tommi, is this kind of invocation compatible with your notion of what 
mkcephfs 2.0 should be?  If we can jump to the target interface and 
rewrite the implementation in terms of the new tools that would capture 
the best of both worlds.

Thanks!
sage


> 
> Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de>
> ---
>  doc/man/8/mkcephfs.rst                      |   17 +++-----
>  man/mkcephfs.8                              |   15 +++----
>  src/ceph.conf.twoosds                       |    7 ++--
>  src/init-ceph.in                            |   50 +++++++++++++---------
>  src/mkcephfs.in                             |   60 +++++++++++++++++----------
>  src/sample.ceph.conf                        |   15 ++++---
>  src/test/cli/osdmaptool/ceph.conf.withracks |    3 +-
>  7 Dateien ge?ndert, 95 Zeilen hinzugef?gt(+), 72 Zeilen entfernt(-)
> 
> diff --git a/doc/man/8/mkcephfs.rst b/doc/man/8/mkcephfs.rst
> index ddc378a..dd3fbd5 100644
> --- a/doc/man/8/mkcephfs.rst
> +++ b/doc/man/8/mkcephfs.rst
> @@ -70,20 +70,15 @@ Options
>     default is ``/etc/ceph/keyring`` (or whatever is specified in the
>     config file).
>  
> -.. option:: --mkbtrfs
> +.. option:: --mkfs
>  
> -   Create and mount the any btrfs file systems specified in the
> -   ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
> -   and (if it differs from "osd data") "btrfs path" options must be
> -   defined.
> +   Create and mount any file system specified in the ceph.conf for 
> +   OSD data storage using mkfs. The "devs" and (if it differs from 
> +   "osd data") "fs path" options must be defined.
>  
>     **NOTE** Btrfs is still considered experimental.  This option
> -   can ease some configuration pain, but is the use of btrfs is not
> -   required when ``osd data`` directories are mounted manually by the
> -   adminstrator.
> -
> -   **NOTE** This option is deprecated and will be removed in a future
> -   release.
> +   can ease some configuration pain, but is not required when 
> +   ``osd data`` directories are mounted manually by the adminstrator.
>  
>  .. option:: --no-copy-conf
>  
> diff --git a/man/mkcephfs.8 b/man/mkcephfs.8
> index 8544a01..22a5335 100644
> --- a/man/mkcephfs.8
> +++ b/man/mkcephfs.8
> @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
>  .
>  .SH SYNOPSIS
>  .nf
> -\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkbtrfs ] [ \-a, \-\-all\-hosts [ \-k
> +\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkfs ] [ \-a, \-\-all\-hosts [ \-k
>  \fI/path/to/admin.keyring\fP ] ]
>  .fi
>  .sp
> @@ -111,19 +111,16 @@ config file).
>  .UNINDENT
>  .INDENT 0.0
>  .TP
> -.B \-\-mkbtrfs
> -Create and mount the any btrfs file systems specified in the
> -ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
> -and (if it differs from "osd data") "btrfs path" options must be
> +.B \-\-mkfs
> +Create and mount any file systems specified in the
> +ceph.conf for OSD data storage using mkfs.*. The "devs"
> +and (if it differs from "osd data") "fs path" options must be
>  defined.
>  .sp
>  \fBNOTE\fP Btrfs is still considered experimental.  This option
> -can ease some configuration pain, but is the use of btrfs is not
> +can ease some configuration pain, but the use of this option is not
>  required when \fBosd data\fP directories are mounted manually by the
>  adminstrator.
> -.sp
> -\fBNOTE\fP This option is deprecated and will be removed in a future
> -release.
>  .UNINDENT
>  .INDENT 0.0
>  .TP
> diff --git a/src/ceph.conf.twoosds b/src/ceph.conf.twoosds
> index c0cfc68..05ca754 100644
> --- a/src/ceph.conf.twoosds
> +++ b/src/ceph.conf.twoosds
> @@ -67,7 +67,8 @@
>  	debug journal = 20
>  	log dir = /data/cosd$id
>  	osd data = /mnt/osd$id
> -	btrfs options = "flushoncommit,usertrans"
> +	fs options = "flushoncommit,usertrans"
> +	fstype = btrfs
>  ;	user = root
>  
>  ;	osd journal = /mnt/osd$id/journal
> @@ -75,8 +76,8 @@
>  	osd journal = "/dev/disk/by-path/pci-0000:05:02.0-scsi-6:0:0:0"
>  ;	filestore max sync interval = 1
>  
> -	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
> -;	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
> +	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
> +;	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
>  ;	      	   /dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 \
>  ;		   /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 \
>  ;		   /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 \
> diff --git a/src/init-ceph.in b/src/init-ceph.in
> index a8c5a29..32bcc9a 100644
> --- a/src/init-ceph.in
> +++ b/src/init-ceph.in
> @@ -100,8 +100,8 @@ docrun=
>  allhosts=0
>  debug=0
>  monaddr=
> -dobtrfs=1
> -dobtrfsumount=0
> +dofsmount=1
> +dofsumount=0
>  verbose=0
>  
>  while echo $1 | grep -q '^-'; do     # FIXME: why not '^-'?
> @@ -130,14 +130,14 @@ case $1 in
>  	    shift
>  	    MON_ADDR=$1
>  	    ;;
> -    --btrfs)
> -	    dobtrfs=1
> +    --fsmount)
> +	    dofsmount=1
>  	    ;;
> -    --nobtrfs)
> -	    dobtrfs=0
> +    --nofsmount)
> +	    dofsmount=0
>  	    ;;
> -    --btrfsumount)
> -	    dobtrfsumount=1
> +    --fsumount)
> +	    dofsumount=1
>  	    ;;
>      --conf | -c)
>  	    [ -z "$2" ] && usage_exit
> @@ -222,9 +222,9 @@ for name in $what; do
>  
>      if echo $name | grep -q ^osd; then
>  	get_conf osd_data "" "osd data"
> -	get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
> -	get_conf btrfs_devs "" "btrfs devs"
> -	first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
> +	get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
> +	get_conf fs_devs "" "devs"
> +	first_dev=`echo $fs_devs | cut '-d ' -f 1`
>      fi
>  
>      # do lockfile, if RH
> @@ -262,13 +262,25 @@ for name in $what; do
>  
>  	    cmd="$wrap $cmd $runmode"
>  	    
> -	    if [ $dobtrfs -eq 1 ] && [ -n "$btrfs_devs" ]; then
> +	    if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
>  		get_conf pre_mount "true" "pre mount command"
> -		get_conf btrfs_opt "noatime" "btrfs options"
> -		[ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
> +		get_conf fs_opt "noatime" "fs options"
> +		get_conf fs_type "" "fstype"
> +
> +		if [ -z "$fs_type" ]; then
> +		    echo No filesystem type defined!
> +		    exit 0
> +                fi 
> +
> +		[ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
>  		[ -n "$pre_mount" ] && do_cmd "$pre_mount"
> -		echo Mounting Btrfs on $host:$btrfs_path
> -		do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $btrfs_path' /proc/mounts || mount -t btrfs $btrfs_opt $first_dev $btrfs_path"
> +
> +		if [ "$fs_type" == "btrfs" ]; then
> +		    echo Mounting Btrfs on $host:$fs_path
> +		    do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
> +		else
> +		    do_root_cmd "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
> +		fi
>  	    fi
>  	    echo Starting Ceph $name on $host...
>  	    mkdir -p $RUN_DIR
> @@ -289,9 +301,9 @@ for name in $what; do
>  	    stop_daemon $name ceph-$type $pid_file
>  	    [ -n "$post_stop" ] && do_cmd "$post_stop"
>  	    [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
> -	    if [ $dobtrfsumount -eq 1 ] && [ -n "$btrfs_devs" ]; then
> -		echo Unmounting Btrfs on $host:$btrfs_path
> -		do_root_cmd "umount $btrfs_path || true"
> +	    if [ $dofsumount -eq 1 ] && [ -n "$fs_devs" ]; then
> +		echo Unmounting OSD volume on $host:$fs_path
> +		do_root_cmd "umount $fs_path || true"
>  	    fi
>  	    ;;
>  
> diff --git a/src/mkcephfs.in b/src/mkcephfs.in
> index c507709..492d4b6 100644
> --- a/src/mkcephfs.in
> +++ b/src/mkcephfs.in
> @@ -60,7 +60,7 @@ else
>  fi
>  
>  usage_exit() {
> -    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkbtrfs]"
> +    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkfs]"
>      echo "   to generate a new ceph cluster on all nodes; for advanced usage see man page"
>      echo "   ** be careful, this WILL clobber old data; check your ceph.conf carefully **"
>      exit
> @@ -70,7 +70,7 @@ usage_exit() {
>  
>  
>  allhosts=0
> -mkbtrfs=0
> +mkfs=0
>  preparemonmap=0
>  prepareosdfs=""
>  initdaemon=""
> @@ -130,8 +130,8 @@ case $1 in
>  	    preparemon=1
>              manual_action=1
>  	    ;;
> -    --mkbtrfs)
> -	    mkbtrfs=1
> +    --mkfs)
> +	    mkfs=1
>  	    ;;
>      --no-copy-conf)
>  	    nocopyconf=1
> @@ -306,21 +306,26 @@ if [ -n "$prepareosdfs" ]; then
>  
>      get_conf osd_data "/var/lib/ceph/osd/ceph-$id" "osd data"
>      get_conf osd_journal "$osd_data/journal" "osd journal"
> -    get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
> -    get_conf btrfs_devs "" "btrfs devs"
> +    get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
> +    get_conf fs_devs "" "devs"
> +    get_conf fs_type "" "fstype"
>  
> -    if [ -z "$btrfs_devs" ]; then
> -	echo "no btrfs devs defined for $name"
> +    if [ -z "$fs_devs" ]; then
> +	echo "no devs defined for $name"
> +	exit 0
> +    fi
> +    if [ -z "$fs_type" ]; then
> +	echo "no filesystem type defined for $name"
>  	exit 0
>      fi
>  
> -    first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
> -    get_conf btrfs_opt "noatime" "btrfs options"
> -    [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
> +    first_dev=`echo $fs_devs | cut '-d ' -f 1`
> +    get_conf fs_opt "noatime" "fs options"
> +    [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
>      get_conf osd_user "root" "user"
>      
> -    if [ -n "$osd_journal" ] && echo "$btrfs_devs" | grep -q -w "$osd_journal" ; then
> -	echo "ERROR: osd journal device ($osd_journal) also used by btrfs devs ($btrfs_devs)"
> +    if [ -n "$osd_journal" ] && echo "fs_devs" | grep -q -w "$osd_journal" ; then
> +	echo "ERROR: osd journal device ($osd_journal) also used by devs ($fs_devs)"
>  	exit 1
>      fi
>      
> @@ -330,18 +335,27 @@ if [ -n "$prepareosdfs" ]; then
>  	test -d $osd_journal || mkdir -p `dirname $osd_journal`
>      fi
>  
> -    umount $btrfs_path || true
> -    for f in $btrfs_devs ; do
> +    umount $fs_path || true
> +    for f in $fs_devs ; do
>  	umount $f || true
>      done
>  
> -    modprobe btrfs || true
> -    mkfs.btrfs $btrfs_devs
> -    btrfs device scan || btrfsctl -a
> -    sync   # seems to fix problems for some people...
> -    mount -t btrfs $btrfs_opt $first_dev $btrfs_path
> -    chown $osd_user $btrfs_path
> -    chmod +w $btrfs_path
> +    if [ "$fs_type" == "btrfs" ]; then 
> +        modprobe btrfs || true
> +        mkfs.btrfs $fs_devs
> +        btrfs device scan || btrfsctl -a
> +	sync # seems to fix problems for some people...
> +    elif [ "$fs_type" == "xfs" ]; then
> +        modprobe xfs || true
> +	mkfs.xfs -f $fs_devs
> +    else
> +	modprobe $fs_type || true
> +	mkfs.$fs_type $fs_devs
> +    fi
> +
> +    mount -t $fs_type $fs_opt $first_dev $fs_path
> +    chown $osd_user $fs_path
> +    chmod +w $fs_path
>      
>      exit 0
>  fi
> @@ -459,7 +473,7 @@ if [ $allhosts -eq 1 ]; then
>  	    fi
>  	fi
>  	
> -	if [ $mkbtrfs -eq 1 ] && [ "$type" = "osd" ]; then
> +	if [ $mkfs -eq 1 ] && [ "$type" = "osd" ]; then
>  	    do_root_cmd "$0 -d $rdir --prepare-osdfs $name"
>  	fi
>  
> diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf
> index 88f7f02..147777d 100644
> --- a/src/sample.ceph.conf
> +++ b/src/sample.ceph.conf
> @@ -131,27 +131,30 @@
>  	;debug filestore = 20
>  	;debug journal = 20
>  
> +	; The filesystem used on the volumes
> +	fstype = btrfs
> +
>  [osd.0]
>  	host = delta
>  
> -	; if 'btrfs devs' is not specified, you're responsible for
> +	; if 'devs' is not specified, you're responsible for
>  	; setting up the 'osd data' dir.  if it is not btrfs, things
>  	; will behave up until you try to recover from a crash (which
>  	; usually fine for basic testing).
> -	btrfs devs = /dev/sdx
> +	devs = /dev/sdx
>  
>          ; If you want to specify some other mount options, you can do so.
>          ; The default values are rw,noatime
> -        ;btrfs options = rw,noatime
> +        ; options = rw,noatime
>  
>  [osd.1]
>  	host = epsilon
> -	btrfs devs = /dev/sdy
> +	devs = /dev/sdy
>  
>  [osd.2]
>  	host = zeta
> -	btrfs devs = /dev/sdx
> +	devs = /dev/sdx
>  
>  [osd.3]
>  	host = eta
> -	btrfs devs = /dev/sdy
> +	devs = /dev/sdy
> diff --git a/src/test/cli/osdmaptool/ceph.conf.withracks b/src/test/cli/osdmaptool/ceph.conf.withracks
> index 1e14411..87b0716 100644
> --- a/src/test/cli/osdmaptool/ceph.conf.withracks
> +++ b/src/test/cli/osdmaptool/ceph.conf.withracks
> @@ -42,7 +42,8 @@
>    keyring = /mnt/osd.$id/keyring
>    osd data = /mnt/osd.$id
>    osd journal = /dev/disk/by-label/osd.$id.journal
> -  btrfs devs = /dev/disk/by-label/osd.$id.data
> +  devs = /dev/disk/by-label/osd.$id.data
> +  fstype = btrfs
>  ; temp sage
>    debug osd = 20
>    debug ms = 1
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Danny Kukawka Aug. 10, 2012, 4:03 p.m. UTC | #11
Am 10.08.2012 17:54, schrieb Sage Weil:
> On Thu, 9 Aug 2012, Danny Kukawka wrote:
>> Remove btrfs specific keys and replace them by more generic
>> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
>>
>> Add new key to define the osd fs type: 'fstype', which can get
>> defined in the [osd] section for all OSDs.
>>
>> Replace:
>> - 'btrfs devs' -> 'devs'
>> - 'btrfs path' -> 'fs path'
>> - 'btrfs options' -> 'fs options'
>> - mkcephfs: replace --mkbtrfs with --mkfs
>> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
>> with --nofsmount, --btrfsumount with --fsumount
>>
>> Update documentation, manpage and example config files.
> 
> Maybe this should keep the old options as well, so that --mkbtrfs is an 
> alias for --mkfs --btrfs...

I can add this to the patch, no problem!

> Tommi, is this kind of invocation compatible with your notion of what 
> mkcephfs 2.0 should be?  If we can jump to the target interface and 
> rewrite the implementation in terms of the new tools that would capture 
> the best of both worlds.

If you can point me to some documentation how the target interface
works, I could take a look at adapting mkcephfs to the new tools as soon
as the new interface/workflow is ready.

Danny
Sage Weil Aug. 10, 2012, 4:12 p.m. UTC | #12
On Fri, 10 Aug 2012, Danny Kukawka wrote:
> Am 10.08.2012 17:54, schrieb Sage Weil:
> > On Thu, 9 Aug 2012, Danny Kukawka wrote:
> >> Remove btrfs specific keys and replace them by more generic
> >> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
> >>
> >> Add new key to define the osd fs type: 'fstype', which can get
> >> defined in the [osd] section for all OSDs.
> >>
> >> Replace:
> >> - 'btrfs devs' -> 'devs'
> >> - 'btrfs path' -> 'fs path'
> >> - 'btrfs options' -> 'fs options'
> >> - mkcephfs: replace --mkbtrfs with --mkfs
> >> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
> >> with --nofsmount, --btrfsumount with --fsumount
> >>
> >> Update documentation, manpage and example config files.
> > 
> > Maybe this should keep the old options as well, so that --mkbtrfs is an 
> > alias for --mkfs --btrfs...
> 
> I can add this to the patch, no problem!
> 
> > Tommi, is this kind of invocation compatible with your notion of what 
> > mkcephfs 2.0 should be?  If we can jump to the target interface and 
> > rewrite the implementation in terms of the new tools that would capture 
> > the best of both worlds.
> 
> If you can point me to some documentation how the target interface
> works, I could take a look at adapting mkcephfs to the new tools as soon
> as the new interface/workflow is ready.

It's not defined yet.  I know Tommi has something in his head, but I'm not 
sure if it's something similar to the current one (a cluster-wide 
ceph.conf) or something else.  He's off today, so we probably have to wait 
to find out.

sage
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mandell Degerness Aug. 10, 2012, 4:57 p.m. UTC | #13
Comment below in-line:

On Fri, Aug 10, 2012 at 9:12 AM, Sage Weil <sage@inktank.com> wrote:
> On Fri, 10 Aug 2012, Danny Kukawka wrote:
>> Am 10.08.2012 17:54, schrieb Sage Weil:
>> > On Thu, 9 Aug 2012, Danny Kukawka wrote:
>> >> Remove btrfs specific keys and replace them by more generic
>> >> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
>> >>
>> >> Add new key to define the osd fs type: 'fstype', which can get
>> >> defined in the [osd] section for all OSDs.
>> >>
>> >> Replace:
>> >> - 'btrfs devs' -> 'devs'
>> >> - 'btrfs path' -> 'fs path'
>> >> - 'btrfs options' -> 'fs options'
>> >> - mkcephfs: replace --mkbtrfs with --mkfs

I'm just a little concerned about this as --mkfs in other parts of the
project refers to the on file system changes to prepare the directory
structure for use by the daemons.  I actually prefer the usage here, I
only want to point out the confusing CLI parameter collision.

>> >> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
>> >> with --nofsmount, --btrfsumount with --fsumount
>> >>
>> >> Update documentation, manpage and example config files.
>> >
>> > Maybe this should keep the old options as well, so that --mkbtrfs is an
>> > alias for --mkfs --btrfs...
>>
>> I can add this to the patch, no problem!
>>
>> > Tommi, is this kind of invocation compatible with your notion of what
>> > mkcephfs 2.0 should be?  If we can jump to the target interface and
>> > rewrite the implementation in terms of the new tools that would capture
>> > the best of both worlds.
>>
>> If you can point me to some documentation how the target interface
>> works, I could take a look at adapting mkcephfs to the new tools as soon
>> as the new interface/workflow is ready.
>
> It's not defined yet.  I know Tommi has something in his head, but I'm not
> sure if it's something similar to the current one (a cluster-wide
> ceph.conf) or something else.  He's off today, so we probably have to wait
> to find out.
>
> sage
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Aug. 16, 2012, 10:32 p.m. UTC | #14
On Fri, 10 Aug 2012, Sage Weil wrote:
> On Fri, 10 Aug 2012, Danny Kukawka wrote:
> > Am 10.08.2012 17:54, schrieb Sage Weil:
> > > On Thu, 9 Aug 2012, Danny Kukawka wrote:
> > >> Remove btrfs specific keys and replace them by more generic
> > >> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
> > >>
> > >> Add new key to define the osd fs type: 'fstype', which can get
> > >> defined in the [osd] section for all OSDs.
> > >>
> > >> Replace:
> > >> - 'btrfs devs' -> 'devs'
> > >> - 'btrfs path' -> 'fs path'
> > >> - 'btrfs options' -> 'fs options'
> > >> - mkcephfs: replace --mkbtrfs with --mkfs
> > >> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
> > >> with --nofsmount, --btrfsumount with --fsumount
> > >>
> > >> Update documentation, manpage and example config files.
> > > 
> > > Maybe this should keep the old options as well, so that --mkbtrfs is an 
> > > alias for --mkfs --btrfs...
> > 
> > I can add this to the patch, no problem!
> > 
> > > Tommi, is this kind of invocation compatible with your notion of what 
> > > mkcephfs 2.0 should be?  If we can jump to the target interface and 
> > > rewrite the implementation in terms of the new tools that would capture 
> > > the best of both worlds.
> > 
> > If you can point me to some documentation how the target interface
> > works, I could take a look at adapting mkcephfs to the new tools as soon
> > as the new interface/workflow is ready.
> 
> It's not defined yet.  I know Tommi has something in his head, but I'm not 
> sure if it's something similar to the current one (a cluster-wide 
> ceph.conf) or something else.  He's off today, so we probably have to wait 
> to find out.

Okay, I just synced up with Tommi.  The goal is for the new 'mkcephfs' 
functionality to generalize to both cluster setup and expansion, and to 
avoid the pain associated with explicitly defining osd ids, paths, and 
devices for each osd.  (IDs will by dynamic, hotplugging will be possible, 
etc.).  The result will be the ability to define a file specifying 
information like

 - hosts that will be monitors, osds, and/or mons
 - config options to put in the ceph.conf, or config file fragment

To instantiate osds, you could either:

 - explicitly run ceph-disk-prepare on the appropriate nodes, devices 
   (specifying fs type etc)
 - plug in a properly tagged disk that will be magically consumed
 - possibly provide a list of devices/paths in the file above and let the 
   mkcephfs replacement run it.

Point being, it won't be a drop-in replacement for mkcephfs (with a 
ceph.conf as input) because the current approach makes you allocate osd 
ids and such, and the new approach won't require that.

That being the case, I'm all for generalizing the current mkcephfs to 
other file systems as an interim step.  I think the requirements there are 
just that the old options continue to work, and pick good names.  That is,

 * conf: make 'btrfs ...' options alias to new options
 * mkcephfs: make --mkbtrfs an alias for '--mkfs' (or --mkosdfs?)
 * init-ceph: make the --btrfs alias --fsmount, etc.

As for the new options, I suggest:

 * osd fs type
 * osd fs devs   (will work for mkcephfs, not for new stuff)
 * osd fs path
 * osd fs options

The new stuff can definitely use 'osd fs type' and 'osd fs option' (e.g., 
in [osd] section).  It's less clear whether the others will remain useful, 
since they are generally tied to specific osd instances, which live in 
[osd.$id] sections, which the new stuff is won't require of you.

My hope is for the both methods to be present in bobtail, and mkcephfs to 
be deprecated by cuttlefish.  Does that sound reasonable?

Thanks!
sage
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tommi Virtanen Aug. 16, 2012, 10:52 p.m. UTC | #15
On Thu, Aug 16, 2012 at 3:32 PM, Sage Weil <sage@inktank.com> wrote:
> As for the new options, I suggest:
>
>  * osd fs type
>  * osd fs devs   (will work for mkcephfs, not for new stuff)
>  * osd fs path
>  * osd fs options

What does osd_fs_path mean, and how is it different from the osd_data dir?

I'm expecting to need both mkfs-time options (btrfs metadata block
size etc) and mount-time options (noatime etc).

It would be nice if there was a way to set the options for all
fstypes, and then just toggle which one is used (by default). That
avoids bugs like trying to mkfs/mount btrfs with xfs-specific options,
and vice versa.

I'm not sure how well our config system will handle dynamic variable
names -- ceph-authtool was fine with me just putting data in
osd_crush_location, and we don't need to access these variables from
C++, so it should be fine. If you really wanted to, you could probably
cram the them into a single variable, with ad hoc structured data in
the string value, but that's ugly.. Or just hardcode the list of
possible filesystems, and then it's not dynamic variable names
anymore.

So I'm dreaming of something like:

[osd]
# what mount options will be passed when an osd data disk is using
# one of these filesystems; these are passed to mount -o
osd mount options btrfs = herp,foo=bar
osd mount options xfs = noatime,derp

# what mkfs options are used when creating new osd data disk
# filesystems
osd mkfs options btrfs = --hur
osd mkfs options xfs = --dur

# what fstype to use by default when mkfs'ing; mounting will detect
# what's there (with blkid) and work with anything
osd mkfs type = btrfs

# this may go away with "mkcephfs 2.0", and it will have to get more
# complex if we provide something for journals too, etc, because you
# may want to pair specific data disks to specific journals (DH has
# this need).. haven't had time to think it through, which is why i'm
# leaning toward "and here's a hook where you run something on the
# host that calls ceph-disk-prepare etc on all the disks you want",
# and using uuids to match journals to data disks -- this work has
# not yet started)
osd fs devs = /dev/sdb /dev/sdc
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tommi Virtanen Aug. 16, 2012, 10:55 p.m. UTC | #16
On Thu, Aug 16, 2012 at 3:52 PM, Tommi Virtanen <tv@inktank.com> wrote:
> I'm not sure how well our config system will handle dynamic variable
> names -- ceph-authtool was fine with me just putting data in

*ceph-conf
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Aug. 16, 2012, 11:40 p.m. UTC | #17
On Thu, 16 Aug 2012, Tommi Virtanen wrote:
> On Thu, Aug 16, 2012 at 3:32 PM, Sage Weil <sage@inktank.com> wrote:
> > As for the new options, I suggest:
> >
> >  * osd fs type
> >  * osd fs devs   (will work for mkcephfs, not for new stuff)
> >  * osd fs path
> >  * osd fs options
> 
> What does osd_fs_path mean, and how is it different from the osd_data dir?

The idea was that you might wand the fs mounted somewhere other that 
osd_data.  I'm not sure it's useful; we may as well drop that...

> I'm expecting to need both mkfs-time options (btrfs metadata block
> size etc) and mount-time options (noatime etc).
> 
> It would be nice if there was a way to set the options for all
> fstypes, and then just toggle which one is used (by default). That
> avoids bugs like trying to mkfs/mount btrfs with xfs-specific options,
> and vice versa.
> 
> I'm not sure how well our config system will handle dynamic variable
> names -- ceph-authtool was fine with me just putting data in
> osd_crush_location, and we don't need to access these variables from
> C++, so it should be fine. If you really wanted to, you could probably
> cram the them into a single variable, with ad hoc structured data in
> the string value, but that's ugly.. Or just hardcode the list of
> possible filesystems, and then it's not dynamic variable names
> anymore.

Yeah, ceph-conf will happily take anything.  The C++ code has to do 
slightly more work to get arbitrary config fields, but that's not an 
issue.

> So I'm dreaming of something like:
> 
> [osd]
> # what mount options will be passed when an osd data disk is using
> # one of these filesystems; these are passed to mount -o
> osd mount options btrfs = herp,foo=bar
> osd mount options xfs = noatime,derp
> 
> # what mkfs options are used when creating new osd data disk
> # filesystems
> osd mkfs options btrfs = --hur
> osd mkfs options xfs = --dur
> 
> # what fstype to use by default when mkfs'ing; mounting will detect
> # what's there (with blkid) and work with anything
> osd mkfs type = btrfs
> 
> # this may go away with "mkcephfs 2.0", and it will have to get more
> # complex if we provide something for journals too, etc, because you
> # may want to pair specific data disks to specific journals (DH has
> # this need).. haven't had time to think it through, which is why i'm
> # leaning toward "and here's a hook where you run something on the
> # host that calls ceph-disk-prepare etc on all the disks you want",
> # and using uuids to match journals to data disks -- this work has
> # not yet started)
> osd fs devs = /dev/sdb /dev/sdc

This all looks good to me.  What do you think, Danny?

sage

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Danny Al-Gaaf Nov. 2, 2012, 9:58 a.m. UTC | #18
Hi Sage,

sorry for the late reply, was absent some weeks and busy with other issues.

Am 17.08.2012 01:40, schrieb Sage Weil:
> On Thu, 16 Aug 2012, Tommi Virtanen wrote:
>> On Thu, Aug 16, 2012 at 3:32 PM, Sage Weil <sage@inktank.com> wrote:
>>> As for the new options, I suggest:
>>>
>>>  * osd fs type
>>>  * osd fs devs   (will work for mkcephfs, not for new stuff)
>>>  * osd fs path
>>>  * osd fs options
>>
>> What does osd_fs_path mean, and how is it different from the osd_data dir?
> 
> The idea was that you might wand the fs mounted somewhere other that 
> osd_data.  I'm not sure it's useful; we may as well drop that...
> 
>> I'm expecting to need both mkfs-time options (btrfs metadata block
>> size etc) and mount-time options (noatime etc).
>>
>> It would be nice if there was a way to set the options for all
>> fstypes, and then just toggle which one is used (by default). That
>> avoids bugs like trying to mkfs/mount btrfs with xfs-specific options,
>> and vice versa.
>>
>> I'm not sure how well our config system will handle dynamic variable
>> names -- ceph-authtool was fine with me just putting data in
>> osd_crush_location, and we don't need to access these variables from
>> C++, so it should be fine. If you really wanted to, you could probably
>> cram the them into a single variable, with ad hoc structured data in
>> the string value, but that's ugly.. Or just hardcode the list of
>> possible filesystems, and then it's not dynamic variable names
>> anymore.
> 
> Yeah, ceph-conf will happily take anything.  The C++ code has to do 
> slightly more work to get arbitrary config fields, but that's not an 
> issue.
> 
>> So I'm dreaming of something like:
>>
>> [osd]
>> # what mount options will be passed when an osd data disk is using
>> # one of these filesystems; these are passed to mount -o
>> osd mount options btrfs = herp,foo=bar
>> osd mount options xfs = noatime,derp
>>
>> # what mkfs options are used when creating new osd data disk
>> # filesystems
>> osd mkfs options btrfs = --hur
>> osd mkfs options xfs = --dur
>>
>> # what fstype to use by default when mkfs'ing; mounting will detect
>> # what's there (with blkid) and work with anything
>> osd mkfs type = btrfs

I will prepare a patch with these for the current mkcephfs and init-ceph
incl. aliases for the old keys and cmdline options where possible.

>> # this may go away with "mkcephfs 2.0", and it will have to get more
>> # complex if we provide something for journals too, etc, because you
>> # may want to pair specific data disks to specific journals (DH has
>> # this need).. haven't had time to think it through, which is why i'm
>> # leaning toward "and here's a hook where you run something on the
>> # host that calls ceph-disk-prepare etc on all the disks you want",
>> # and using uuids to match journals to data disks -- this work has
>> # not yet started)
>> osd fs devs = /dev/sdb /dev/sdc
> 
> This all looks good to me.  What do you think, Danny?

This part (osd fs devs) is for a new mkcepfs.2.0 if I understand you
correctly. Sounds okay for me atm. (Tommi: Are there any new information
on this? Did you already start to work on this?)

Danny

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Nov. 2, 2012, 11:13 a.m. UTC | #19
On Fri, 2 Nov 2012, Danny Al-Gaaf wrote:
> Hi Sage,
> 
> sorry for the late reply, was absent some weeks and busy with other issues.
> 
> Am 17.08.2012 01:40, schrieb Sage Weil:
> > On Thu, 16 Aug 2012, Tommi Virtanen wrote:
> >> On Thu, Aug 16, 2012 at 3:32 PM, Sage Weil <sage@inktank.com> wrote:
> >>> As for the new options, I suggest:
> >>>
> >>>  * osd fs type
> >>>  * osd fs devs   (will work for mkcephfs, not for new stuff)
> >>>  * osd fs path
> >>>  * osd fs options
> >>
> >> What does osd_fs_path mean, and how is it different from the osd_data dir?
> > 
> > The idea was that you might wand the fs mounted somewhere other that 
> > osd_data.  I'm not sure it's useful; we may as well drop that...
> > 
> >> I'm expecting to need both mkfs-time options (btrfs metadata block
> >> size etc) and mount-time options (noatime etc).
> >>
> >> It would be nice if there was a way to set the options for all
> >> fstypes, and then just toggle which one is used (by default). That
> >> avoids bugs like trying to mkfs/mount btrfs with xfs-specific options,
> >> and vice versa.
> >>
> >> I'm not sure how well our config system will handle dynamic variable
> >> names -- ceph-authtool was fine with me just putting data in
> >> osd_crush_location, and we don't need to access these variables from
> >> C++, so it should be fine. If you really wanted to, you could probably
> >> cram the them into a single variable, with ad hoc structured data in
> >> the string value, but that's ugly.. Or just hardcode the list of
> >> possible filesystems, and then it's not dynamic variable names
> >> anymore.
> > 
> > Yeah, ceph-conf will happily take anything.  The C++ code has to do 
> > slightly more work to get arbitrary config fields, but that's not an 
> > issue.
> > 
> >> So I'm dreaming of something like:
> >>
> >> [osd]
> >> # what mount options will be passed when an osd data disk is using
> >> # one of these filesystems; these are passed to mount -o
> >> osd mount options btrfs = herp,foo=bar
> >> osd mount options xfs = noatime,derp
> >>
> >> # what mkfs options are used when creating new osd data disk
> >> # filesystems
> >> osd mkfs options btrfs = --hur
> >> osd mkfs options xfs = --dur
> >>
> >> # what fstype to use by default when mkfs'ing; mounting will detect
> >> # what's there (with blkid) and work with anything
> >> osd mkfs type = btrfs
> 
> I will prepare a patch with these for the current mkcephfs and init-ceph
> incl. aliases for the old keys and cmdline options where possible.

We ended up using slightly different option names for ceph-disk-prpare.  
Let's make them match!  They are:

 osd fs type = <fstype>
 osd fs mkfs arguments <fstype> = ...
 osd fs mount options <fstype> = ...

fstype would be ext4, xfs, btrfs.

Sorry, I lost track of this thread as well... :)

sage


> 
> >> # this may go away with "mkcephfs 2.0", and it will have to get more
> >> # complex if we provide something for journals too, etc, because you
> >> # may want to pair specific data disks to specific journals (DH has
> >> # this need).. haven't had time to think it through, which is why i'm
> >> # leaning toward "and here's a hook where you run something on the
> >> # host that calls ceph-disk-prepare etc on all the disks you want",
> >> # and using uuids to match journals to data disks -- this work has
> >> # not yet started)
> >> osd fs devs = /dev/sdb /dev/sdc
> > 
> > This all looks good to me.  What do you think, Danny?
> 
> This part (osd fs devs) is for a new mkcepfs.2.0 if I understand you
> correctly. Sounds okay for me atm. (Tommi: Are there any new information
> on this? Did you already start to work on this?)
> 
> Danny
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Nov. 15, 2012, 12:12 a.m. UTC | #20
Hi Danny,

Have you had a chance to work on this?  I'd like to include this 
in bobtail.  If you don't have time we can go ahead an implement it, but 
I'd like avoid duplicating effort.

Thanks!
sage


On Fri, 2 Nov 2012, Danny Al-Gaaf wrote:
> Hi Sage,
> 
> sorry for the late reply, was absent some weeks and busy with other issues.
> 
> Am 17.08.2012 01:40, schrieb Sage Weil:
> > On Thu, 16 Aug 2012, Tommi Virtanen wrote:
> >> On Thu, Aug 16, 2012 at 3:32 PM, Sage Weil <sage@inktank.com> wrote:
> >>> As for the new options, I suggest:
> >>>
> >>>  * osd fs type
> >>>  * osd fs devs   (will work for mkcephfs, not for new stuff)
> >>>  * osd fs path
> >>>  * osd fs options
> >>
> >> What does osd_fs_path mean, and how is it different from the osd_data dir?
> > 
> > The idea was that you might wand the fs mounted somewhere other that 
> > osd_data.  I'm not sure it's useful; we may as well drop that...
> > 
> >> I'm expecting to need both mkfs-time options (btrfs metadata block
> >> size etc) and mount-time options (noatime etc).
> >>
> >> It would be nice if there was a way to set the options for all
> >> fstypes, and then just toggle which one is used (by default). That
> >> avoids bugs like trying to mkfs/mount btrfs with xfs-specific options,
> >> and vice versa.
> >>
> >> I'm not sure how well our config system will handle dynamic variable
> >> names -- ceph-authtool was fine with me just putting data in
> >> osd_crush_location, and we don't need to access these variables from
> >> C++, so it should be fine. If you really wanted to, you could probably
> >> cram the them into a single variable, with ad hoc structured data in
> >> the string value, but that's ugly.. Or just hardcode the list of
> >> possible filesystems, and then it's not dynamic variable names
> >> anymore.
> > 
> > Yeah, ceph-conf will happily take anything.  The C++ code has to do 
> > slightly more work to get arbitrary config fields, but that's not an 
> > issue.
> > 
> >> So I'm dreaming of something like:
> >>
> >> [osd]
> >> # what mount options will be passed when an osd data disk is using
> >> # one of these filesystems; these are passed to mount -o
> >> osd mount options btrfs = herp,foo=bar
> >> osd mount options xfs = noatime,derp
> >>
> >> # what mkfs options are used when creating new osd data disk
> >> # filesystems
> >> osd mkfs options btrfs = --hur
> >> osd mkfs options xfs = --dur
> >>
> >> # what fstype to use by default when mkfs'ing; mounting will detect
> >> # what's there (with blkid) and work with anything
> >> osd mkfs type = btrfs
> 
> I will prepare a patch with these for the current mkcephfs and init-ceph
> incl. aliases for the old keys and cmdline options where possible.
> 
> >> # this may go away with "mkcephfs 2.0", and it will have to get more
> >> # complex if we provide something for journals too, etc, because you
> >> # may want to pair specific data disks to specific journals (DH has
> >> # this need).. haven't had time to think it through, which is why i'm
> >> # leaning toward "and here's a hook where you run something on the
> >> # host that calls ceph-disk-prepare etc on all the disks you want",
> >> # and using uuids to match journals to data disks -- this work has
> >> # not yet started)
> >> osd fs devs = /dev/sdb /dev/sdc
> > 
> > This all looks good to me.  What do you think, Danny?
> 
> This part (osd fs devs) is for a new mkcepfs.2.0 if I understand you
> correctly. Sounds okay for me atm. (Tommi: Are there any new information
> on this? Did you already start to work on this?)
> 
> Danny
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Danny Al-Gaaf Nov. 15, 2012, 10:32 a.m. UTC | #21
Hi Sage,

Am 15.11.2012 01:12, schrieb Sage Weil:
> Hi Danny,
> 
> Have you had a chance to work on this?  I'd like to include this 
> in bobtail.  If you don't have time we can go ahead an implement it, but 
> I'd like avoid duplicating effort.

I already work on it. Do you have a deadline for bobtail?

Danny

> Thanks!
> sage

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Nov. 15, 2012, 3:05 p.m. UTC | #22
On Thu, 15 Nov 2012, Danny Al-Gaaf wrote:
> Hi Sage,
> 
> Am 15.11.2012 01:12, schrieb Sage Weil:
> > Hi Danny,
> > 
> > Have you had a chance to work on this?  I'd like to include this 
> > in bobtail.  If you don't have time we can go ahead an implement it, but 
> > I'd like avoid duplicating effort.
> 
> I already work on it. Do you have a deadline for bobtail?

Release is ~3 weeks off, but it is technically frozen.  This week would be 
best so that we can make sure it is well tested.

Thanks!
sage
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Nov. 21, 2012, 12:23 a.m. UTC | #23
If you haven't gotten to this yet, I'll go ahead and jump on it.. let me 
know!

Thanks-
sage


On Thu, 9 Aug 2012, Danny Kukawka wrote:

> Remove btrfs specific keys and replace them by more generic
> keys to be able to replace btrfs with e.g. xfs or ext4 easily.
> 
> Add new key to define the osd fs type: 'fstype', which can get
> defined in the [osd] section for all OSDs.
> 
> Replace:
> - 'btrfs devs' -> 'devs'
> - 'btrfs path' -> 'fs path'
> - 'btrfs options' -> 'fs options'
> - mkcephfs: replace --mkbtrfs with --mkfs
> - init-ceph: replace --btrfs with --fsmount, --nobtrfs
> with --nofsmount, --btrfsumount with --fsumount
> 
> Update documentation, manpage and example config files.
> 
> Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de>
> ---
>  doc/man/8/mkcephfs.rst                      |   17 +++-----
>  man/mkcephfs.8                              |   15 +++----
>  src/ceph.conf.twoosds                       |    7 ++--
>  src/init-ceph.in                            |   50 +++++++++++++---------
>  src/mkcephfs.in                             |   60 +++++++++++++++++----------
>  src/sample.ceph.conf                        |   15 ++++---
>  src/test/cli/osdmaptool/ceph.conf.withracks |    3 +-
>  7 Dateien ge?ndert, 95 Zeilen hinzugef?gt(+), 72 Zeilen entfernt(-)
> 
> diff --git a/doc/man/8/mkcephfs.rst b/doc/man/8/mkcephfs.rst
> index ddc378a..dd3fbd5 100644
> --- a/doc/man/8/mkcephfs.rst
> +++ b/doc/man/8/mkcephfs.rst
> @@ -70,20 +70,15 @@ Options
>     default is ``/etc/ceph/keyring`` (or whatever is specified in the
>     config file).
>  
> -.. option:: --mkbtrfs
> +.. option:: --mkfs
>  
> -   Create and mount the any btrfs file systems specified in the
> -   ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
> -   and (if it differs from "osd data") "btrfs path" options must be
> -   defined.
> +   Create and mount any file system specified in the ceph.conf for 
> +   OSD data storage using mkfs. The "devs" and (if it differs from 
> +   "osd data") "fs path" options must be defined.
>  
>     **NOTE** Btrfs is still considered experimental.  This option
> -   can ease some configuration pain, but is the use of btrfs is not
> -   required when ``osd data`` directories are mounted manually by the
> -   adminstrator.
> -
> -   **NOTE** This option is deprecated and will be removed in a future
> -   release.
> +   can ease some configuration pain, but is not required when 
> +   ``osd data`` directories are mounted manually by the adminstrator.
>  
>  .. option:: --no-copy-conf
>  
> diff --git a/man/mkcephfs.8 b/man/mkcephfs.8
> index 8544a01..22a5335 100644
> --- a/man/mkcephfs.8
> +++ b/man/mkcephfs.8
> @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
>  .
>  .SH SYNOPSIS
>  .nf
> -\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkbtrfs ] [ \-a, \-\-all\-hosts [ \-k
> +\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkfs ] [ \-a, \-\-all\-hosts [ \-k
>  \fI/path/to/admin.keyring\fP ] ]
>  .fi
>  .sp
> @@ -111,19 +111,16 @@ config file).
>  .UNINDENT
>  .INDENT 0.0
>  .TP
> -.B \-\-mkbtrfs
> -Create and mount the any btrfs file systems specified in the
> -ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
> -and (if it differs from "osd data") "btrfs path" options must be
> +.B \-\-mkfs
> +Create and mount any file systems specified in the
> +ceph.conf for OSD data storage using mkfs.*. The "devs"
> +and (if it differs from "osd data") "fs path" options must be
>  defined.
>  .sp
>  \fBNOTE\fP Btrfs is still considered experimental.  This option
> -can ease some configuration pain, but is the use of btrfs is not
> +can ease some configuration pain, but the use of this option is not
>  required when \fBosd data\fP directories are mounted manually by the
>  adminstrator.
> -.sp
> -\fBNOTE\fP This option is deprecated and will be removed in a future
> -release.
>  .UNINDENT
>  .INDENT 0.0
>  .TP
> diff --git a/src/ceph.conf.twoosds b/src/ceph.conf.twoosds
> index c0cfc68..05ca754 100644
> --- a/src/ceph.conf.twoosds
> +++ b/src/ceph.conf.twoosds
> @@ -67,7 +67,8 @@
>  	debug journal = 20
>  	log dir = /data/cosd$id
>  	osd data = /mnt/osd$id
> -	btrfs options = "flushoncommit,usertrans"
> +	fs options = "flushoncommit,usertrans"
> +	fstype = btrfs
>  ;	user = root
>  
>  ;	osd journal = /mnt/osd$id/journal
> @@ -75,8 +76,8 @@
>  	osd journal = "/dev/disk/by-path/pci-0000:05:02.0-scsi-6:0:0:0"
>  ;	filestore max sync interval = 1
>  
> -	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
> -;	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
> +	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
> +;	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
>  ;	      	   /dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 \
>  ;		   /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 \
>  ;		   /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 \
> diff --git a/src/init-ceph.in b/src/init-ceph.in
> index a8c5a29..32bcc9a 100644
> --- a/src/init-ceph.in
> +++ b/src/init-ceph.in
> @@ -100,8 +100,8 @@ docrun=
>  allhosts=0
>  debug=0
>  monaddr=
> -dobtrfs=1
> -dobtrfsumount=0
> +dofsmount=1
> +dofsumount=0
>  verbose=0
>  
>  while echo $1 | grep -q '^-'; do     # FIXME: why not '^-'?
> @@ -130,14 +130,14 @@ case $1 in
>  	    shift
>  	    MON_ADDR=$1
>  	    ;;
> -    --btrfs)
> -	    dobtrfs=1
> +    --fsmount)
> +	    dofsmount=1
>  	    ;;
> -    --nobtrfs)
> -	    dobtrfs=0
> +    --nofsmount)
> +	    dofsmount=0
>  	    ;;
> -    --btrfsumount)
> -	    dobtrfsumount=1
> +    --fsumount)
> +	    dofsumount=1
>  	    ;;
>      --conf | -c)
>  	    [ -z "$2" ] && usage_exit
> @@ -222,9 +222,9 @@ for name in $what; do
>  
>      if echo $name | grep -q ^osd; then
>  	get_conf osd_data "" "osd data"
> -	get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
> -	get_conf btrfs_devs "" "btrfs devs"
> -	first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
> +	get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
> +	get_conf fs_devs "" "devs"
> +	first_dev=`echo $fs_devs | cut '-d ' -f 1`
>      fi
>  
>      # do lockfile, if RH
> @@ -262,13 +262,25 @@ for name in $what; do
>  
>  	    cmd="$wrap $cmd $runmode"
>  	    
> -	    if [ $dobtrfs -eq 1 ] && [ -n "$btrfs_devs" ]; then
> +	    if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
>  		get_conf pre_mount "true" "pre mount command"
> -		get_conf btrfs_opt "noatime" "btrfs options"
> -		[ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
> +		get_conf fs_opt "noatime" "fs options"
> +		get_conf fs_type "" "fstype"
> +
> +		if [ -z "$fs_type" ]; then
> +		    echo No filesystem type defined!
> +		    exit 0
> +                fi 
> +
> +		[ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
>  		[ -n "$pre_mount" ] && do_cmd "$pre_mount"
> -		echo Mounting Btrfs on $host:$btrfs_path
> -		do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $btrfs_path' /proc/mounts || mount -t btrfs $btrfs_opt $first_dev $btrfs_path"
> +
> +		if [ "$fs_type" == "btrfs" ]; then
> +		    echo Mounting Btrfs on $host:$fs_path
> +		    do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
> +		else
> +		    do_root_cmd "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
> +		fi
>  	    fi
>  	    echo Starting Ceph $name on $host...
>  	    mkdir -p $RUN_DIR
> @@ -289,9 +301,9 @@ for name in $what; do
>  	    stop_daemon $name ceph-$type $pid_file
>  	    [ -n "$post_stop" ] && do_cmd "$post_stop"
>  	    [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
> -	    if [ $dobtrfsumount -eq 1 ] && [ -n "$btrfs_devs" ]; then
> -		echo Unmounting Btrfs on $host:$btrfs_path
> -		do_root_cmd "umount $btrfs_path || true"
> +	    if [ $dofsumount -eq 1 ] && [ -n "$fs_devs" ]; then
> +		echo Unmounting OSD volume on $host:$fs_path
> +		do_root_cmd "umount $fs_path || true"
>  	    fi
>  	    ;;
>  
> diff --git a/src/mkcephfs.in b/src/mkcephfs.in
> index c507709..492d4b6 100644
> --- a/src/mkcephfs.in
> +++ b/src/mkcephfs.in
> @@ -60,7 +60,7 @@ else
>  fi
>  
>  usage_exit() {
> -    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkbtrfs]"
> +    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkfs]"
>      echo "   to generate a new ceph cluster on all nodes; for advanced usage see man page"
>      echo "   ** be careful, this WILL clobber old data; check your ceph.conf carefully **"
>      exit
> @@ -70,7 +70,7 @@ usage_exit() {
>  
>  
>  allhosts=0
> -mkbtrfs=0
> +mkfs=0
>  preparemonmap=0
>  prepareosdfs=""
>  initdaemon=""
> @@ -130,8 +130,8 @@ case $1 in
>  	    preparemon=1
>              manual_action=1
>  	    ;;
> -    --mkbtrfs)
> -	    mkbtrfs=1
> +    --mkfs)
> +	    mkfs=1
>  	    ;;
>      --no-copy-conf)
>  	    nocopyconf=1
> @@ -306,21 +306,26 @@ if [ -n "$prepareosdfs" ]; then
>  
>      get_conf osd_data "/var/lib/ceph/osd/ceph-$id" "osd data"
>      get_conf osd_journal "$osd_data/journal" "osd journal"
> -    get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
> -    get_conf btrfs_devs "" "btrfs devs"
> +    get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
> +    get_conf fs_devs "" "devs"
> +    get_conf fs_type "" "fstype"
>  
> -    if [ -z "$btrfs_devs" ]; then
> -	echo "no btrfs devs defined for $name"
> +    if [ -z "$fs_devs" ]; then
> +	echo "no devs defined for $name"
> +	exit 0
> +    fi
> +    if [ -z "$fs_type" ]; then
> +	echo "no filesystem type defined for $name"
>  	exit 0
>      fi
>  
> -    first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
> -    get_conf btrfs_opt "noatime" "btrfs options"
> -    [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
> +    first_dev=`echo $fs_devs | cut '-d ' -f 1`
> +    get_conf fs_opt "noatime" "fs options"
> +    [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
>      get_conf osd_user "root" "user"
>      
> -    if [ -n "$osd_journal" ] && echo "$btrfs_devs" | grep -q -w "$osd_journal" ; then
> -	echo "ERROR: osd journal device ($osd_journal) also used by btrfs devs ($btrfs_devs)"
> +    if [ -n "$osd_journal" ] && echo "fs_devs" | grep -q -w "$osd_journal" ; then
> +	echo "ERROR: osd journal device ($osd_journal) also used by devs ($fs_devs)"
>  	exit 1
>      fi
>      
> @@ -330,18 +335,27 @@ if [ -n "$prepareosdfs" ]; then
>  	test -d $osd_journal || mkdir -p `dirname $osd_journal`
>      fi
>  
> -    umount $btrfs_path || true
> -    for f in $btrfs_devs ; do
> +    umount $fs_path || true
> +    for f in $fs_devs ; do
>  	umount $f || true
>      done
>  
> -    modprobe btrfs || true
> -    mkfs.btrfs $btrfs_devs
> -    btrfs device scan || btrfsctl -a
> -    sync   # seems to fix problems for some people...
> -    mount -t btrfs $btrfs_opt $first_dev $btrfs_path
> -    chown $osd_user $btrfs_path
> -    chmod +w $btrfs_path
> +    if [ "$fs_type" == "btrfs" ]; then 
> +        modprobe btrfs || true
> +        mkfs.btrfs $fs_devs
> +        btrfs device scan || btrfsctl -a
> +	sync # seems to fix problems for some people...
> +    elif [ "$fs_type" == "xfs" ]; then
> +        modprobe xfs || true
> +	mkfs.xfs -f $fs_devs
> +    else
> +	modprobe $fs_type || true
> +	mkfs.$fs_type $fs_devs
> +    fi
> +
> +    mount -t $fs_type $fs_opt $first_dev $fs_path
> +    chown $osd_user $fs_path
> +    chmod +w $fs_path
>      
>      exit 0
>  fi
> @@ -459,7 +473,7 @@ if [ $allhosts -eq 1 ]; then
>  	    fi
>  	fi
>  	
> -	if [ $mkbtrfs -eq 1 ] && [ "$type" = "osd" ]; then
> +	if [ $mkfs -eq 1 ] && [ "$type" = "osd" ]; then
>  	    do_root_cmd "$0 -d $rdir --prepare-osdfs $name"
>  	fi
>  
> diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf
> index 88f7f02..147777d 100644
> --- a/src/sample.ceph.conf
> +++ b/src/sample.ceph.conf
> @@ -131,27 +131,30 @@
>  	;debug filestore = 20
>  	;debug journal = 20
>  
> +	; The filesystem used on the volumes
> +	fstype = btrfs
> +
>  [osd.0]
>  	host = delta
>  
> -	; if 'btrfs devs' is not specified, you're responsible for
> +	; if 'devs' is not specified, you're responsible for
>  	; setting up the 'osd data' dir.  if it is not btrfs, things
>  	; will behave up until you try to recover from a crash (which
>  	; usually fine for basic testing).
> -	btrfs devs = /dev/sdx
> +	devs = /dev/sdx
>  
>          ; If you want to specify some other mount options, you can do so.
>          ; The default values are rw,noatime
> -        ;btrfs options = rw,noatime
> +        ; options = rw,noatime
>  
>  [osd.1]
>  	host = epsilon
> -	btrfs devs = /dev/sdy
> +	devs = /dev/sdy
>  
>  [osd.2]
>  	host = zeta
> -	btrfs devs = /dev/sdx
> +	devs = /dev/sdx
>  
>  [osd.3]
>  	host = eta
> -	btrfs devs = /dev/sdy
> +	devs = /dev/sdy
> diff --git a/src/test/cli/osdmaptool/ceph.conf.withracks b/src/test/cli/osdmaptool/ceph.conf.withracks
> index 1e14411..87b0716 100644
> --- a/src/test/cli/osdmaptool/ceph.conf.withracks
> +++ b/src/test/cli/osdmaptool/ceph.conf.withracks
> @@ -42,7 +42,8 @@
>    keyring = /mnt/osd.$id/keyring
>    osd data = /mnt/osd.$id
>    osd journal = /dev/disk/by-label/osd.$id.journal
> -  btrfs devs = /dev/disk/by-label/osd.$id.data
> +  devs = /dev/disk/by-label/osd.$id.data
> +  fstype = btrfs
>  ; temp sage
>    debug osd = 20
>    debug ms = 1
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Danny Al-Gaaf Nov. 21, 2012, 10:20 a.m. UTC | #24
Hi,

no, I have it basically ready but I have to run some tests before.
You'll have it in the next days!

Danny

Am 21.11.2012 01:23, schrieb Sage Weil:
> If you haven't gotten to this yet, I'll go ahead and jump on it..
> let me know!
> 
> Thanks- sage
> 
> 
> On Thu, 9 Aug 2012, Danny Kukawka wrote:
> 
>> Remove btrfs specific keys and replace them by more generic keys
>> to be able to replace btrfs with e.g. xfs or ext4 easily.
>> 
>> Add new key to define the osd fs type: 'fstype', which can get 
>> defined in the [osd] section for all OSDs.
>> 
>> Replace: - 'btrfs devs' -> 'devs' - 'btrfs path' -> 'fs path' -
>> 'btrfs options' -> 'fs options' - mkcephfs: replace --mkbtrfs
>> with --mkfs - init-ceph: replace --btrfs with --fsmount,
>> --nobtrfs with --nofsmount, --btrfsumount with --fsumount
>> 
>> Update documentation, manpage and example config files.
>> 
>> Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de> --- 
>> doc/man/8/mkcephfs.rst                      |   17 +++----- 
>> man/mkcephfs.8                              |   15 +++---- 
>> src/ceph.conf.twoosds                       |    7 ++-- 
>> src/init-ceph.in                            |   50
>> +++++++++++++--------- src/mkcephfs.in
>> |   60 +++++++++++++++++---------- src/sample.ceph.conf
>> |   15 ++++--- src/test/cli/osdmaptool/ceph.conf.withracks |    3
>> +- 7 Dateien ge?ndert, 95 Zeilen hinzugef?gt(+), 72 Zeilen
>> entfernt(-)
>> 
>> diff --git a/doc/man/8/mkcephfs.rst b/doc/man/8/mkcephfs.rst 
>> index ddc378a..dd3fbd5 100644 --- a/doc/man/8/mkcephfs.rst +++
>> b/doc/man/8/mkcephfs.rst @@ -70,20 +70,15 @@ Options default is
>> ``/etc/ceph/keyring`` (or whatever is specified in the config
>> file).
>> 
>> -.. option:: --mkbtrfs +.. option:: --mkfs
>> 
>> -   Create and mount the any btrfs file systems specified in the 
>> -   ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs
>> devs" -   and (if it differs from "osd data") "btrfs path"
>> options must be -   defined. +   Create and mount any file system
>> specified in the ceph.conf for +   OSD data storage using mkfs.
>> The "devs" and (if it differs from +   "osd data") "fs path"
>> options must be defined.
>> 
>> **NOTE** Btrfs is still considered experimental.  This option -
>> can ease some configuration pain, but is the use of btrfs is not 
>> -   required when ``osd data`` directories are mounted manually
>> by the -   adminstrator. - -   **NOTE** This option is deprecated
>> and will be removed in a future -   release. +   can ease some
>> configuration pain, but is not required when +   ``osd data``
>> directories are mounted manually by the adminstrator.
>> 
>> .. option:: --no-copy-conf
>> 
>> diff --git a/man/mkcephfs.8 b/man/mkcephfs.8 index
>> 8544a01..22a5335 100644 --- a/man/mkcephfs.8 +++
>> b/man/mkcephfs.8 @@ -32,7 +32,7 @@ level margin:
>> \\n[rst2man-indent\\n[rst2man-indent-level]] . .SH SYNOPSIS .nf 
>> -\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkbtrfs ] [ \-a,
>> \-\-all\-hosts [ \-k +\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [
>> \-\-mkfs ] [ \-a, \-\-all\-hosts [ \-k 
>> \fI/path/to/admin.keyring\fP ] ] .fi .sp @@ -111,19 +111,16 @@
>> config file). .UNINDENT .INDENT 0.0 .TP -.B \-\-mkbtrfs -Create
>> and mount the any btrfs file systems specified in the -ceph.conf
>> for OSD data storage using mkfs.btrfs. The "btrfs devs" -and (if
>> it differs from "osd data") "btrfs path" options must be +.B
>> \-\-mkfs +Create and mount any file systems specified in the 
>> +ceph.conf for OSD data storage using mkfs.*. The "devs" +and (if
>> it differs from "osd data") "fs path" options must be defined. 
>> .sp \fBNOTE\fP Btrfs is still considered experimental.  This
>> option -can ease some configuration pain, but is the use of btrfs
>> is not +can ease some configuration pain, but the use of this
>> option is not required when \fBosd data\fP directories are
>> mounted manually by the adminstrator. -.sp -\fBNOTE\fP This
>> option is deprecated and will be removed in a future -release. 
>> .UNINDENT .INDENT 0.0 .TP diff --git a/src/ceph.conf.twoosds
>> b/src/ceph.conf.twoosds index c0cfc68..05ca754 100644 ---
>> a/src/ceph.conf.twoosds +++ b/src/ceph.conf.twoosds @@ -67,7
>> +67,8 @@ debug journal = 20 log dir = /data/cosd$id osd data =
>> /mnt/osd$id -	btrfs options = "flushoncommit,usertrans" +	fs
>> options = "flushoncommit,usertrans" +	fstype = btrfs ;	user =
>> root
>> 
>> ;	osd journal = /mnt/osd$id/journal @@ -75,8 +76,8 @@ osd journal
>> = "/dev/disk/by-path/pci-0000:05:02.0-scsi-6:0:0:0" ;	filestore
>> max sync interval = 1
>> 
>> -	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0" 
>> -;	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0
>> \ +	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0" +;
>> devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \ ;
>> /dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 \ ;
>> /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 \ ;
>> /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 \ diff --git
>> a/src/init-ceph.in b/src/init-ceph.in index a8c5a29..32bcc9a
>> 100644 --- a/src/init-ceph.in +++ b/src/init-ceph.in @@ -100,8
>> +100,8 @@ docrun= allhosts=0 debug=0 monaddr= -dobtrfs=1 
>> -dobtrfsumount=0 +dofsmount=1 +dofsumount=0 verbose=0
>> 
>> while echo $1 | grep -q '^-'; do     # FIXME: why not '^-'? @@
>> -130,14 +130,14 @@ case $1 in shift MON_ADDR=$1 ;; -    --btrfs) 
>> -	    dobtrfs=1 +    --fsmount) +	    dofsmount=1 ;; -
>> --nobtrfs) -	    dobtrfs=0 +    --nofsmount) +	    dofsmount=0 
>> ;; -    --btrfsumount) -	    dobtrfsumount=1 +    --fsumount) +
>> dofsumount=1 ;; --conf | -c) [ -z "$2" ] && usage_exit @@ -222,9
>> +222,9 @@ for name in $what; do
>> 
>> if echo $name | grep -q ^osd; then get_conf osd_data "" "osd
>> data" -	get_conf btrfs_path "$osd_data" "btrfs path"  # mount
>> point defaults so osd data -	get_conf btrfs_devs "" "btrfs devs" 
>> -	first_dev=`echo $btrfs_devs | cut '-d ' -f 1` +	get_conf
>> fs_path "$osd_data" "fs path"  # mount point defaults so osd
>> data +	get_conf fs_devs "" "devs" +	first_dev=`echo $fs_devs |
>> cut '-d ' -f 1` fi
>> 
>> # do lockfile, if RH @@ -262,13 +262,25 @@ for name in $what; do
>> 
>> cmd="$wrap $cmd $runmode"  -	    if [ $dobtrfs -eq 1 ] && [ -n
>> "$btrfs_devs" ]; then +	    if [ $dofsmount -eq 1 ] && [ -n
>> "$fs_devs" ]; then get_conf pre_mount "true" "pre mount command" 
>> -		get_conf btrfs_opt "noatime" "btrfs options" -		[ -n
>> "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt" +		get_conf fs_opt
>> "noatime" "fs options" +		get_conf fs_type "" "fstype" + +		if [
>> -z "$fs_type" ]; then +		    echo No filesystem type defined! +
>> exit 0 +                fi + +		[ -n "$fs_opt" ] && fs_opt="-o
>> $fs_opt" [ -n "$pre_mount" ] && do_cmd "$pre_mount" -		echo
>> Mounting Btrfs on $host:$btrfs_path -		do_root_cmd "modprobe
>> btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+
>> $btrfs_path' /proc/mounts || mount -t btrfs $btrfs_opt $first_dev
>> $btrfs_path" + +		if [ "$fs_type" == "btrfs" ]; then +		    echo
>> Mounting Btrfs on $host:$fs_path +		    do_root_cmd "modprobe
>> btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+
>> $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev
>> $fs_path" +		else +		    do_root_cmd "modprobe $fs_type ; egrep
>> -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt
>> $first_dev $fs_path" +		fi fi echo Starting Ceph $name on
>> $host... mkdir -p $RUN_DIR @@ -289,9 +301,9 @@ for name in $what;
>> do stop_daemon $name ceph-$type $pid_file [ -n "$post_stop" ] &&
>> do_cmd "$post_stop" [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f
>> $lockfile -	    if [ $dobtrfsumount -eq 1 ] && [ -n "$btrfs_devs"
>> ]; then -		echo Unmounting Btrfs on $host:$btrfs_path -
>> do_root_cmd "umount $btrfs_path || true" +	    if [ $dofsumount
>> -eq 1 ] && [ -n "$fs_devs" ]; then +		echo Unmounting OSD volume
>> on $host:$fs_path +		do_root_cmd "umount $fs_path || true" fi ;;
>> 
>> diff --git a/src/mkcephfs.in b/src/mkcephfs.in index
>> c507709..492d4b6 100644 --- a/src/mkcephfs.in +++
>> b/src/mkcephfs.in @@ -60,7 +60,7 @@ else fi
>> 
>> usage_exit() { -    echo "usage: $0 -a -c ceph.conf [-k
>> adminkeyring] [--mkbtrfs]" +    echo "usage: $0 -a -c ceph.conf
>> [-k adminkeyring] [--mkfs]" echo "   to generate a new ceph
>> cluster on all nodes; for advanced usage see man page" echo "
>> ** be careful, this WILL clobber old data; check your ceph.conf
>> carefully **" exit @@ -70,7 +70,7 @@ usage_exit() {
>> 
>> 
>> allhosts=0 -mkbtrfs=0 +mkfs=0 preparemonmap=0 prepareosdfs="" 
>> initdaemon="" @@ -130,8 +130,8 @@ case $1 in preparemon=1 
>> manual_action=1 ;; -    --mkbtrfs) -	    mkbtrfs=1 +    --mkfs) +
>> mkfs=1 ;; --no-copy-conf) nocopyconf=1 @@ -306,21 +306,26 @@ if [
>> -n "$prepareosdfs" ]; then
>> 
>> get_conf osd_data "/var/lib/ceph/osd/ceph-$id" "osd data" 
>> get_conf osd_journal "$osd_data/journal" "osd journal" -
>> get_conf btrfs_path "$osd_data" "btrfs path"  # mount point
>> defaults so osd data -    get_conf btrfs_devs "" "btrfs devs" +
>> get_conf fs_path "$osd_data" "fs path"  # mount point defaults so
>> osd data +    get_conf fs_devs "" "devs" +    get_conf fs_type ""
>> "fstype"
>> 
>> -    if [ -z "$btrfs_devs" ]; then -	echo "no btrfs devs defined
>> for $name" +    if [ -z "$fs_devs" ]; then +	echo "no devs
>> defined for $name" +	exit 0 +    fi +    if [ -z "$fs_type" ];
>> then +	echo "no filesystem type defined for $name" exit 0 fi
>> 
>> -    first_dev=`echo $btrfs_devs | cut '-d ' -f 1` -    get_conf
>> btrfs_opt "noatime" "btrfs options" -    [ -n "$btrfs_opt" ] &&
>> btrfs_opt="-o $btrfs_opt" +    first_dev=`echo $fs_devs | cut '-d
>> ' -f 1` +    get_conf fs_opt "noatime" "fs options" +    [ -n
>> "$fs_opt" ] && fs_opt="-o $fs_opt" get_conf osd_user "root"
>> "user"
>> 
>> -    if [ -n "$osd_journal" ] && echo "$btrfs_devs" | grep -q -w
>> "$osd_journal" ; then -	echo "ERROR: osd journal device
>> ($osd_journal) also used by btrfs devs ($btrfs_devs)" +    if [
>> -n "$osd_journal" ] && echo "fs_devs" | grep -q -w "$osd_journal"
>> ; then +	echo "ERROR: osd journal device ($osd_journal) also used
>> by devs ($fs_devs)" exit 1 fi
>> 
>> @@ -330,18 +335,27 @@ if [ -n "$prepareosdfs" ]; then test -d
>> $osd_journal || mkdir -p `dirname $osd_journal` fi
>> 
>> -    umount $btrfs_path || true -    for f in $btrfs_devs ; do +
>> umount $fs_path || true +    for f in $fs_devs ; do umount $f ||
>> true done
>> 
>> -    modprobe btrfs || true -    mkfs.btrfs $btrfs_devs -
>> btrfs device scan || btrfsctl -a -    sync   # seems to fix
>> problems for some people... -    mount -t btrfs $btrfs_opt
>> $first_dev $btrfs_path -    chown $osd_user $btrfs_path -
>> chmod +w $btrfs_path +    if [ "$fs_type" == "btrfs" ]; then +
>> modprobe btrfs || true +        mkfs.btrfs $fs_devs +
>> btrfs device scan || btrfsctl -a +	sync # seems to fix problems
>> for some people... +    elif [ "$fs_type" == "xfs" ]; then +
>> modprobe xfs || true +	mkfs.xfs -f $fs_devs +    else +	modprobe
>> $fs_type || true +	mkfs.$fs_type $fs_devs +    fi + +    mount -t
>> $fs_type $fs_opt $first_dev $fs_path +    chown $osd_user
>> $fs_path +    chmod +w $fs_path
>> 
>> exit 0 fi @@ -459,7 +473,7 @@ if [ $allhosts -eq 1 ]; then fi fi 
>> 

>> -	if [ $mkbtrfs -eq 1 ] && [ "$type" = "osd" ]; then +	if [ $mkfs
>> -eq 1 ] && [ "$type" = "osd" ]; then do_root_cmd "$0 -d $rdir
>> --prepare-osdfs $name" fi
>> 
>> diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf index
>> 88f7f02..147777d 100644 --- a/src/sample.ceph.conf +++
>> b/src/sample.ceph.conf @@ -131,27 +131,30 @@ ;debug filestore =
>> 20 ;debug journal = 20
>> 
>> +	; The filesystem used on the volumes +	fstype = btrfs + 
>> [osd.0] host = delta
>> 
>> -	; if 'btrfs devs' is not specified, you're responsible for +	;
>> if 'devs' is not specified, you're responsible for ; setting up
>> the 'osd data' dir.  if it is not btrfs, things ; will behave up
>> until you try to recover from a crash (which ; usually fine for
>> basic testing). -	btrfs devs = /dev/sdx +	devs = /dev/sdx
>> 
>> ; If you want to specify some other mount options, you can do
>> so. ; The default values are rw,noatime -        ;btrfs options =
>> rw,noatime +        ; options = rw,noatime
>> 
>> [osd.1] host = epsilon -	btrfs devs = /dev/sdy +	devs = /dev/sdy
>> 
>> [osd.2] host = zeta -	btrfs devs = /dev/sdx +	devs = /dev/sdx
>> 
>> [osd.3] host = eta -	btrfs devs = /dev/sdy +	devs = /dev/sdy diff
>> --git a/src/test/cli/osdmaptool/ceph.conf.withracks
>> b/src/test/cli/osdmaptool/ceph.conf.withracks index
>> 1e14411..87b0716 100644 ---
>> a/src/test/cli/osdmaptool/ceph.conf.withracks +++
>> b/src/test/cli/osdmaptool/ceph.conf.withracks @@ -42,7 +42,8 @@ 
>> keyring = /mnt/osd.$id/keyring osd data = /mnt/osd.$id osd
>> journal = /dev/disk/by-label/osd.$id.journal -  btrfs devs =
>> /dev/disk/by-label/osd.$id.data +  devs =
>> /dev/disk/by-label/osd.$id.data +  fstype = btrfs ; temp sage 
>> debug osd = 20 debug ms = 1 -- 1.7.10.4
>> 
>> -- To unsubscribe from this list: send the line "unsubscribe
>> ceph-devel" in the body of a message to
>> majordomo@vger.kernel.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
>> 
>> 
> 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/doc/man/8/mkcephfs.rst b/doc/man/8/mkcephfs.rst
index ddc378a..dd3fbd5 100644
--- a/doc/man/8/mkcephfs.rst
+++ b/doc/man/8/mkcephfs.rst
@@ -70,20 +70,15 @@  Options
    default is ``/etc/ceph/keyring`` (or whatever is specified in the
    config file).
 
-.. option:: --mkbtrfs
+.. option:: --mkfs
 
-   Create and mount the any btrfs file systems specified in the
-   ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
-   and (if it differs from "osd data") "btrfs path" options must be
-   defined.
+   Create and mount any file system specified in the ceph.conf for 
+   OSD data storage using mkfs. The "devs" and (if it differs from 
+   "osd data") "fs path" options must be defined.
 
    **NOTE** Btrfs is still considered experimental.  This option
-   can ease some configuration pain, but is the use of btrfs is not
-   required when ``osd data`` directories are mounted manually by the
-   adminstrator.
-
-   **NOTE** This option is deprecated and will be removed in a future
-   release.
+   can ease some configuration pain, but is not required when 
+   ``osd data`` directories are mounted manually by the adminstrator.
 
 .. option:: --no-copy-conf
 
diff --git a/man/mkcephfs.8 b/man/mkcephfs.8
index 8544a01..22a5335 100644
--- a/man/mkcephfs.8
+++ b/man/mkcephfs.8
@@ -32,7 +32,7 @@  level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
 .
 .SH SYNOPSIS
 .nf
-\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkbtrfs ] [ \-a, \-\-all\-hosts [ \-k
+\fBmkcephfs\fP [ \-c \fIceph.conf\fP ] [ \-\-mkfs ] [ \-a, \-\-all\-hosts [ \-k
 \fI/path/to/admin.keyring\fP ] ]
 .fi
 .sp
@@ -111,19 +111,16 @@  config file).
 .UNINDENT
 .INDENT 0.0
 .TP
-.B \-\-mkbtrfs
-Create and mount the any btrfs file systems specified in the
-ceph.conf for OSD data storage using mkfs.btrfs. The "btrfs devs"
-and (if it differs from "osd data") "btrfs path" options must be
+.B \-\-mkfs
+Create and mount any file systems specified in the
+ceph.conf for OSD data storage using mkfs.*. The "devs"
+and (if it differs from "osd data") "fs path" options must be
 defined.
 .sp
 \fBNOTE\fP Btrfs is still considered experimental.  This option
-can ease some configuration pain, but is the use of btrfs is not
+can ease some configuration pain, but the use of this option is not
 required when \fBosd data\fP directories are mounted manually by the
 adminstrator.
-.sp
-\fBNOTE\fP This option is deprecated and will be removed in a future
-release.
 .UNINDENT
 .INDENT 0.0
 .TP
diff --git a/src/ceph.conf.twoosds b/src/ceph.conf.twoosds
index c0cfc68..05ca754 100644
--- a/src/ceph.conf.twoosds
+++ b/src/ceph.conf.twoosds
@@ -67,7 +67,8 @@ 
 	debug journal = 20
 	log dir = /data/cosd$id
 	osd data = /mnt/osd$id
-	btrfs options = "flushoncommit,usertrans"
+	fs options = "flushoncommit,usertrans"
+	fstype = btrfs
 ;	user = root
 
 ;	osd journal = /mnt/osd$id/journal
@@ -75,8 +76,8 @@ 
 	osd journal = "/dev/disk/by-path/pci-0000:05:02.0-scsi-6:0:0:0"
 ;	filestore max sync interval = 1
 
-	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
-;	btrfs devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
+	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0"
+;	devs = "/dev/disk/by-path/pci-0000:05:01.0-scsi-2:0:0:0 \
 ;	      	   /dev/disk/by-path/pci-0000:05:01.0-scsi-3:0:0:0 \
 ;		   /dev/disk/by-path/pci-0000:05:01.0-scsi-4:0:0:0 \
 ;		   /dev/disk/by-path/pci-0000:05:01.0-scsi-5:0:0:0 \
diff --git a/src/init-ceph.in b/src/init-ceph.in
index a8c5a29..32bcc9a 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -100,8 +100,8 @@  docrun=
 allhosts=0
 debug=0
 monaddr=
-dobtrfs=1
-dobtrfsumount=0
+dofsmount=1
+dofsumount=0
 verbose=0
 
 while echo $1 | grep -q '^-'; do     # FIXME: why not '^-'?
@@ -130,14 +130,14 @@  case $1 in
 	    shift
 	    MON_ADDR=$1
 	    ;;
-    --btrfs)
-	    dobtrfs=1
+    --fsmount)
+	    dofsmount=1
 	    ;;
-    --nobtrfs)
-	    dobtrfs=0
+    --nofsmount)
+	    dofsmount=0
 	    ;;
-    --btrfsumount)
-	    dobtrfsumount=1
+    --fsumount)
+	    dofsumount=1
 	    ;;
     --conf | -c)
 	    [ -z "$2" ] && usage_exit
@@ -222,9 +222,9 @@  for name in $what; do
 
     if echo $name | grep -q ^osd; then
 	get_conf osd_data "" "osd data"
-	get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
-	get_conf btrfs_devs "" "btrfs devs"
-	first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
+	get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
+	get_conf fs_devs "" "devs"
+	first_dev=`echo $fs_devs | cut '-d ' -f 1`
     fi
 
     # do lockfile, if RH
@@ -262,13 +262,25 @@  for name in $what; do
 
 	    cmd="$wrap $cmd $runmode"
 	    
-	    if [ $dobtrfs -eq 1 ] && [ -n "$btrfs_devs" ]; then
+	    if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
 		get_conf pre_mount "true" "pre mount command"
-		get_conf btrfs_opt "noatime" "btrfs options"
-		[ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
+		get_conf fs_opt "noatime" "fs options"
+		get_conf fs_type "" "fstype"
+
+		if [ -z "$fs_type" ]; then
+		    echo No filesystem type defined!
+		    exit 0
+                fi 
+
+		[ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
 		[ -n "$pre_mount" ] && do_cmd "$pre_mount"
-		echo Mounting Btrfs on $host:$btrfs_path
-		do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $btrfs_path' /proc/mounts || mount -t btrfs $btrfs_opt $first_dev $btrfs_path"
+
+		if [ "$fs_type" == "btrfs" ]; then
+		    echo Mounting Btrfs on $host:$fs_path
+		    do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
+		else
+		    do_root_cmd "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
+		fi
 	    fi
 	    echo Starting Ceph $name on $host...
 	    mkdir -p $RUN_DIR
@@ -289,9 +301,9 @@  for name in $what; do
 	    stop_daemon $name ceph-$type $pid_file
 	    [ -n "$post_stop" ] && do_cmd "$post_stop"
 	    [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
-	    if [ $dobtrfsumount -eq 1 ] && [ -n "$btrfs_devs" ]; then
-		echo Unmounting Btrfs on $host:$btrfs_path
-		do_root_cmd "umount $btrfs_path || true"
+	    if [ $dofsumount -eq 1 ] && [ -n "$fs_devs" ]; then
+		echo Unmounting OSD volume on $host:$fs_path
+		do_root_cmd "umount $fs_path || true"
 	    fi
 	    ;;
 
diff --git a/src/mkcephfs.in b/src/mkcephfs.in
index c507709..492d4b6 100644
--- a/src/mkcephfs.in
+++ b/src/mkcephfs.in
@@ -60,7 +60,7 @@  else
 fi
 
 usage_exit() {
-    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkbtrfs]"
+    echo "usage: $0 -a -c ceph.conf [-k adminkeyring] [--mkfs]"
     echo "   to generate a new ceph cluster on all nodes; for advanced usage see man page"
     echo "   ** be careful, this WILL clobber old data; check your ceph.conf carefully **"
     exit
@@ -70,7 +70,7 @@  usage_exit() {
 
 
 allhosts=0
-mkbtrfs=0
+mkfs=0
 preparemonmap=0
 prepareosdfs=""
 initdaemon=""
@@ -130,8 +130,8 @@  case $1 in
 	    preparemon=1
             manual_action=1
 	    ;;
-    --mkbtrfs)
-	    mkbtrfs=1
+    --mkfs)
+	    mkfs=1
 	    ;;
     --no-copy-conf)
 	    nocopyconf=1
@@ -306,21 +306,26 @@  if [ -n "$prepareosdfs" ]; then
 
     get_conf osd_data "/var/lib/ceph/osd/ceph-$id" "osd data"
     get_conf osd_journal "$osd_data/journal" "osd journal"
-    get_conf btrfs_path "$osd_data" "btrfs path"  # mount point defaults so osd data
-    get_conf btrfs_devs "" "btrfs devs"
+    get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
+    get_conf fs_devs "" "devs"
+    get_conf fs_type "" "fstype"
 
-    if [ -z "$btrfs_devs" ]; then
-	echo "no btrfs devs defined for $name"
+    if [ -z "$fs_devs" ]; then
+	echo "no devs defined for $name"
+	exit 0
+    fi
+    if [ -z "$fs_type" ]; then
+	echo "no filesystem type defined for $name"
 	exit 0
     fi
 
-    first_dev=`echo $btrfs_devs | cut '-d ' -f 1`
-    get_conf btrfs_opt "noatime" "btrfs options"
-    [ -n "$btrfs_opt" ] && btrfs_opt="-o $btrfs_opt"
+    first_dev=`echo $fs_devs | cut '-d ' -f 1`
+    get_conf fs_opt "noatime" "fs options"
+    [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
     get_conf osd_user "root" "user"
     
-    if [ -n "$osd_journal" ] && echo "$btrfs_devs" | grep -q -w "$osd_journal" ; then
-	echo "ERROR: osd journal device ($osd_journal) also used by btrfs devs ($btrfs_devs)"
+    if [ -n "$osd_journal" ] && echo "fs_devs" | grep -q -w "$osd_journal" ; then
+	echo "ERROR: osd journal device ($osd_journal) also used by devs ($fs_devs)"
 	exit 1
     fi
     
@@ -330,18 +335,27 @@  if [ -n "$prepareosdfs" ]; then
 	test -d $osd_journal || mkdir -p `dirname $osd_journal`
     fi
 
-    umount $btrfs_path || true
-    for f in $btrfs_devs ; do
+    umount $fs_path || true
+    for f in $fs_devs ; do
 	umount $f || true
     done
 
-    modprobe btrfs || true
-    mkfs.btrfs $btrfs_devs
-    btrfs device scan || btrfsctl -a
-    sync   # seems to fix problems for some people...
-    mount -t btrfs $btrfs_opt $first_dev $btrfs_path
-    chown $osd_user $btrfs_path
-    chmod +w $btrfs_path
+    if [ "$fs_type" == "btrfs" ]; then 
+        modprobe btrfs || true
+        mkfs.btrfs $fs_devs
+        btrfs device scan || btrfsctl -a
+	sync # seems to fix problems for some people...
+    elif [ "$fs_type" == "xfs" ]; then
+        modprobe xfs || true
+	mkfs.xfs -f $fs_devs
+    else
+	modprobe $fs_type || true
+	mkfs.$fs_type $fs_devs
+    fi
+
+    mount -t $fs_type $fs_opt $first_dev $fs_path
+    chown $osd_user $fs_path
+    chmod +w $fs_path
     
     exit 0
 fi
@@ -459,7 +473,7 @@  if [ $allhosts -eq 1 ]; then
 	    fi
 	fi
 	
-	if [ $mkbtrfs -eq 1 ] && [ "$type" = "osd" ]; then
+	if [ $mkfs -eq 1 ] && [ "$type" = "osd" ]; then
 	    do_root_cmd "$0 -d $rdir --prepare-osdfs $name"
 	fi
 
diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf
index 88f7f02..147777d 100644
--- a/src/sample.ceph.conf
+++ b/src/sample.ceph.conf
@@ -131,27 +131,30 @@ 
 	;debug filestore = 20
 	;debug journal = 20
 
+	; The filesystem used on the volumes
+	fstype = btrfs
+
 [osd.0]
 	host = delta
 
-	; if 'btrfs devs' is not specified, you're responsible for
+	; if 'devs' is not specified, you're responsible for
 	; setting up the 'osd data' dir.  if it is not btrfs, things
 	; will behave up until you try to recover from a crash (which
 	; usually fine for basic testing).
-	btrfs devs = /dev/sdx
+	devs = /dev/sdx
 
         ; If you want to specify some other mount options, you can do so.
         ; The default values are rw,noatime
-        ;btrfs options = rw,noatime
+        ; options = rw,noatime
 
 [osd.1]
 	host = epsilon
-	btrfs devs = /dev/sdy
+	devs = /dev/sdy
 
 [osd.2]
 	host = zeta
-	btrfs devs = /dev/sdx
+	devs = /dev/sdx
 
 [osd.3]
 	host = eta
-	btrfs devs = /dev/sdy
+	devs = /dev/sdy
diff --git a/src/test/cli/osdmaptool/ceph.conf.withracks b/src/test/cli/osdmaptool/ceph.conf.withracks
index 1e14411..87b0716 100644
--- a/src/test/cli/osdmaptool/ceph.conf.withracks
+++ b/src/test/cli/osdmaptool/ceph.conf.withracks
@@ -42,7 +42,8 @@ 
   keyring = /mnt/osd.$id/keyring
   osd data = /mnt/osd.$id
   osd journal = /dev/disk/by-label/osd.$id.journal
-  btrfs devs = /dev/disk/by-label/osd.$id.data
+  devs = /dev/disk/by-label/osd.$id.data
+  fstype = btrfs
 ; temp sage
   debug osd = 20
   debug ms = 1