4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
48 #include "../include/obd.h"
49 #include "../include/lvfs.h"
50 #include "../include/obd_class.h"
51 #include "../include/lustre/lustre_user.h"
52 #include "../include/lustre_log.h"
53 #include "../include/lustre_disk.h"
54 #include "../include/lustre_param.h"
56 static int (*client_fill_super)(struct super_block *sb,
57 struct vfsmount *mnt);
59 static void (*kill_super_cb)(struct super_block *sb);
61 /**************** config llog ********************/
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
68 * @param sb The superblock is used by the MGC to write to the local copy of
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
75 int lustre_process_log(struct super_block *sb, char *logname,
76 struct config_llog_instance *cfg)
78 struct lustre_cfg *lcfg;
79 struct lustre_cfg_bufs *bufs;
80 struct lustre_sb_info *lsi = s2lsi(sb);
81 struct obd_device *mgc = lsi->lsi_mgc;
91 /* mgc_process_config */
92 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
93 lustre_cfg_bufs_set_string(bufs, 1, logname);
94 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
95 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
96 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
97 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
98 lustre_cfg_free(lcfg);
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
104 "failed from the MGS (%d). Make sure this "
105 "client and the MGS are running compatible "
106 "versions of Lustre.\n",
107 mgc->obd_name, logname, rc);
110 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
111 "failed (%d). This may be the result of "
112 "communication errors between this node and "
113 "the MGS, a bad configuration, or other "
114 "errors. See the syslog for more "
115 "information.\n", mgc->obd_name, logname,
118 /* class_obd_list(); */
121 EXPORT_SYMBOL(lustre_process_log);
123 /* Stop watching this config log for updates */
124 int lustre_end_log(struct super_block *sb, char *logname,
125 struct config_llog_instance *cfg)
127 struct lustre_cfg *lcfg;
128 struct lustre_cfg_bufs bufs;
129 struct lustre_sb_info *lsi = s2lsi(sb);
130 struct obd_device *mgc = lsi->lsi_mgc;
136 /* mgc_process_config */
137 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
138 lustre_cfg_bufs_set_string(&bufs, 1, logname);
140 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
141 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
142 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
143 lustre_cfg_free(lcfg);
146 EXPORT_SYMBOL(lustre_end_log);
148 /**************** obd start *******************/
150 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
151 * lctl (and do for echo cli/srv.
153 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
154 char *s1, char *s2, char *s3, char *s4)
156 struct lustre_cfg_bufs bufs;
157 struct lustre_cfg * lcfg = NULL;
160 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
161 cmd, s1, s2, s3, s4);
163 lustre_cfg_bufs_reset(&bufs, cfgname);
165 lustre_cfg_bufs_set_string(&bufs, 1, s1);
167 lustre_cfg_bufs_set_string(&bufs, 2, s2);
169 lustre_cfg_bufs_set_string(&bufs, 3, s3);
171 lustre_cfg_bufs_set_string(&bufs, 4, s4);
173 lcfg = lustre_cfg_new(cmd, &bufs);
174 lcfg->lcfg_nid = nid;
175 rc = class_process_config(lcfg);
176 lustre_cfg_free(lcfg);
179 EXPORT_SYMBOL(do_lcfg);
181 /** Call class_attach and class_setup. These methods in turn call
182 * obd type-specific methods.
184 int lustre_start_simple(char *obdname, char *type, char *uuid,
185 char *s1, char *s2, char *s3, char *s4)
188 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
190 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
192 CERROR("%s attach error %d\n", obdname, rc);
195 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
197 CERROR("%s setup error %d\n", obdname, rc);
198 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
203 DEFINE_MUTEX(mgc_start_lock);
205 /** Set up a mgc obd to process startup logs
207 * \param sb [in] super block of the mgc obd
209 * \retval 0 success, otherwise error code
211 int lustre_start_mgc(struct super_block *sb)
213 struct obd_connect_data *data = NULL;
214 struct lustre_sb_info *lsi = s2lsi(sb);
215 struct obd_device *obd;
216 struct obd_export *exp;
217 struct obd_uuid *uuid;
220 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
222 int rc = 0, i = 0, j, len;
224 LASSERT(lsi->lsi_lmd);
226 /* Find the first non-lo MGS nid for our MGC name */
227 if (IS_SERVER(lsi)) {
228 /* mount -o mgsnode=nid */
229 ptr = lsi->lsi_lmd->lmd_mgs;
230 if (lsi->lsi_lmd->lmd_mgs &&
231 (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
233 } else if (IS_MGS(lsi)) {
234 lnet_process_id_t id;
235 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
236 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
243 } else { /* client */
244 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
245 ptr = lsi->lsi_lmd->lmd_dev;
246 if (class_parse_nid(ptr, &nid, &ptr) == 0)
250 CERROR("No valid MGS nids found.\n");
254 mutex_lock(&mgc_start_lock);
256 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
257 OBD_ALLOC(mgcname, len);
258 OBD_ALLOC(niduuid, len + 2);
259 if (!mgcname || !niduuid)
260 GOTO(out_free, rc = -ENOMEM);
261 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
263 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
267 GOTO(out_free, rc = -ENOMEM);
269 obd = class_name2obd(mgcname);
270 if (obd && !obd->obd_stopping) {
273 rc = obd_set_info_async(NULL, obd->obd_self_export,
274 strlen(KEY_MGSSEC), KEY_MGSSEC,
275 strlen(mgssec), mgssec, NULL);
279 /* Re-using an existing MGC */
280 atomic_inc(&obd->u.cli.cl_mgc_refcount);
282 /* IR compatibility check, only for clients */
283 if (lmd_is_client(lsi->lsi_lmd)) {
285 int vallen = sizeof(*data);
286 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
288 rc = obd_get_info(NULL, obd->obd_self_export,
289 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
290 &vallen, data, NULL);
292 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
293 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
294 /* LMD_FLG_NOIR is for test purpose only */
296 "Trying to mount a client with IR setting "
297 "not compatible with current mgc. "
298 "Force to use current mgc setting that is "
300 has_ir ? "enabled" : "disabled");
302 *flags &= ~LMD_FLG_NOIR;
304 *flags |= LMD_FLG_NOIR;
309 /* If we are restarting the MGS, don't try to keep the MGC's
310 old connection, or registration will fail. */
312 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
316 /* Try all connections, but only once (again).
317 We don't want to block another target from starting
318 (using its local copy of the log), but we do want to connect
319 if at all possible. */
321 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,recov_bk);
322 rc = obd_set_info_async(NULL, obd->obd_self_export,
323 sizeof(KEY_INIT_RECOV_BACKUP),
324 KEY_INIT_RECOV_BACKUP,
325 sizeof(recov_bk), &recov_bk, NULL);
329 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
331 /* Add the primary nids for the MGS */
333 sprintf(niduuid, "%s_%x", mgcname, i);
334 if (IS_SERVER(lsi)) {
335 ptr = lsi->lsi_lmd->lmd_mgs;
337 /* Use local nids (including LO) */
338 lnet_process_id_t id;
339 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
340 rc = do_lcfg(mgcname, id.nid,
341 LCFG_ADD_UUID, niduuid,
345 /* Use mgsnode= nids */
346 /* mount -o mgsnode=nid */
347 if (lsi->lsi_lmd->lmd_mgs) {
348 ptr = lsi->lsi_lmd->lmd_mgs;
349 } else if (class_find_param(ptr, PARAM_MGSNODE,
351 CERROR("No MGS nids given.\n");
352 GOTO(out_free, rc = -EINVAL);
354 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
355 rc = do_lcfg(mgcname, nid,
356 LCFG_ADD_UUID, niduuid,
361 } else { /* client */
362 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
363 ptr = lsi->lsi_lmd->lmd_dev;
364 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
365 rc = do_lcfg(mgcname, nid,
366 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
368 /* Stop at the first failover nid */
374 CERROR("No valid MGS nids found.\n");
375 GOTO(out_free, rc = -EINVAL);
377 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
379 /* Random uuid for MGC allows easier reconnects */
381 ll_generate_random_uuid(uuidc);
382 class_uuid_unparse(uuidc, uuid);
385 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
386 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
387 niduuid, NULL, NULL);
392 /* Add any failover MGS nids */
394 while (ptr && ((*ptr == ':' ||
395 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
396 /* New failover node */
397 sprintf(niduuid, "%s_%x", mgcname, i);
399 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
401 rc = do_lcfg(mgcname, nid,
402 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
407 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
408 niduuid, NULL, NULL, NULL);
415 lsi->lsi_lmd->lmd_mgs_failnodes = i;
417 obd = class_name2obd(mgcname);
419 CERROR("Can't find mgcobd %s\n", mgcname);
420 GOTO(out_free, rc = -ENOTCONN);
423 rc = obd_set_info_async(NULL, obd->obd_self_export,
424 strlen(KEY_MGSSEC), KEY_MGSSEC,
425 strlen(mgssec), mgssec, NULL);
429 /* Keep a refcount of servers/clients who started with "mount",
430 so we know when we can get rid of the mgc. */
431 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
433 /* We connect to the MGS at setup, and don't disconnect until cleanup */
434 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
435 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
436 OBD_CONNECT_LVB_TYPE;
438 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
439 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
441 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
444 if (lmd_is_client(lsi->lsi_lmd) &&
445 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
446 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
447 data->ocd_version = LUSTRE_VERSION_CODE;
448 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
450 CERROR("connect failed %d\n", rc);
454 obd->u.cli.cl_mgc_mgsexp = exp;
457 /* Keep the mgc info in the sb. Note that many lsi's can point
461 mutex_unlock(&mgc_start_lock);
466 OBD_FREE(mgcname, len);
468 OBD_FREE(niduuid, len + 2);
472 static int lustre_stop_mgc(struct super_block *sb)
474 struct lustre_sb_info *lsi = s2lsi(sb);
475 struct obd_device *obd;
476 char *niduuid = NULL, *ptr = NULL;
477 int i, rc = 0, len = 0;
486 mutex_lock(&mgc_start_lock);
487 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
488 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
489 /* This is not fatal, every client that stops
490 will call in here. */
491 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
492 atomic_read(&obd->u.cli.cl_mgc_refcount));
493 GOTO(out, rc = -EBUSY);
496 /* The MGC has no recoverable data in any case.
497 * force shutdown set in umount_begin */
498 obd->obd_no_recov = 1;
500 if (obd->u.cli.cl_mgc_mgsexp) {
501 /* An error is not fatal, if we are unable to send the
502 disconnect mgs ping evictor cleans up the export */
503 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
505 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
508 /* Save the obdname for cleaning the nid uuids, which are
510 len = strlen(obd->obd_name) + 6;
511 OBD_ALLOC(niduuid, len);
513 strcpy(niduuid, obd->obd_name);
514 ptr = niduuid + strlen(niduuid);
517 rc = class_manual_cleanup(obd);
521 /* Clean the nid uuids */
523 GOTO(out, rc = -ENOMEM);
525 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
526 sprintf(ptr, "_%x", i);
527 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
528 niduuid, NULL, NULL, NULL);
530 CERROR("del MDC UUID %s failed: rc = %d\n",
535 OBD_FREE(niduuid, len);
537 /* class_import_put will get rid of the additional connections */
538 mutex_unlock(&mgc_start_lock);
542 /***************** lustre superblock **************/
544 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
546 struct lustre_sb_info *lsi;
551 OBD_ALLOC_PTR(lsi->lsi_lmd);
557 lsi->lsi_lmd->lmd_exclude_count = 0;
558 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
559 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
560 s2lsi_nocast(sb) = lsi;
561 /* we take 1 extra ref for our setup */
562 atomic_set(&lsi->lsi_mounts, 1);
564 /* Default umount style */
565 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
570 static int lustre_free_lsi(struct super_block *sb)
572 struct lustre_sb_info *lsi = s2lsi(sb);
574 LASSERT(lsi != NULL);
575 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
577 /* someone didn't call server_put_mount. */
578 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
580 if (lsi->lsi_lmd != NULL) {
581 if (lsi->lsi_lmd->lmd_dev != NULL)
582 OBD_FREE(lsi->lsi_lmd->lmd_dev,
583 strlen(lsi->lsi_lmd->lmd_dev) + 1);
584 if (lsi->lsi_lmd->lmd_profile != NULL)
585 OBD_FREE(lsi->lsi_lmd->lmd_profile,
586 strlen(lsi->lsi_lmd->lmd_profile) + 1);
587 if (lsi->lsi_lmd->lmd_mgssec != NULL)
588 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
589 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
590 if (lsi->lsi_lmd->lmd_opts != NULL)
591 OBD_FREE(lsi->lsi_lmd->lmd_opts,
592 strlen(lsi->lsi_lmd->lmd_opts) + 1);
593 if (lsi->lsi_lmd->lmd_exclude_count)
594 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
595 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
596 lsi->lsi_lmd->lmd_exclude_count);
597 if (lsi->lsi_lmd->lmd_mgs != NULL)
598 OBD_FREE(lsi->lsi_lmd->lmd_mgs,
599 strlen(lsi->lsi_lmd->lmd_mgs) + 1);
600 if (lsi->lsi_lmd->lmd_osd_type != NULL)
601 OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
602 strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
603 if (lsi->lsi_lmd->lmd_params != NULL)
604 OBD_FREE(lsi->lsi_lmd->lmd_params, 4096);
606 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
609 LASSERT(lsi->lsi_llsbi == NULL);
610 OBD_FREE(lsi, sizeof(*lsi));
611 s2lsi_nocast(sb) = NULL;
616 /* The lsi has one reference for every server that is using the disk -
617 e.g. MDT, MGS, and potentially MGC */
618 int lustre_put_lsi(struct super_block *sb)
620 struct lustre_sb_info *lsi = s2lsi(sb);
622 LASSERT(lsi != NULL);
624 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
625 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
626 if (IS_SERVER(lsi) && lsi->lsi_osd_exp) {
627 lu_device_put(&lsi->lsi_dt_dev->dd_lu_dev);
628 lsi->lsi_osd_exp->exp_obd->obd_lvfs_ctxt.dt = NULL;
629 lsi->lsi_dt_dev = NULL;
630 obd_disconnect(lsi->lsi_osd_exp);
631 /* wait till OSD is gone */
632 obd_zombie_barrier();
641 * <FSNAME><SEPERATOR><TYPE><INDEX>
642 * FSNAME is between 1 and 8 characters (inclusive).
643 * Excluded characters are '/' and ':'
644 * SEPERATOR is either ':' or '-'
645 * TYPE: "OST", "MDT", etc.
646 * INDEX: Hex representation of the index
649 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
650 * @param [in] svname server name including type and index
651 * @param [out] fsname Buffer to copy filesystem name prefix into.
652 * Must have at least 'strlen(fsname) + 1' chars.
653 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
656 int server_name2fsname(const char *svname, char *fsname, const char **endptr)
660 dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
661 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
666 if (fsname != NULL) {
667 strncpy(fsname, svname, dash - svname);
668 fsname[dash - svname] = '\0';
676 EXPORT_SYMBOL(server_name2fsname);
679 * Get service name (svname) from string
681 * if endptr isn't NULL it is set to end of fsname *
683 int server_name2svname(const char *label, char *svname, const char **endptr,
689 /* We use server_name2fsname() just for parsing */
690 rc = server_name2fsname(label, NULL, &dash);
697 if (strlcpy(svname, dash + 1, svsize) >= svsize)
702 EXPORT_SYMBOL(server_name2svname);
705 /* Get the index from the obd name.
708 if endptr isn't NULL it is set to end of name */
709 int server_name2index(const char *svname, __u32 *idx, const char **endptr)
715 /* We use server_name2fsname() just for parsing */
716 rc = server_name2fsname(svname, NULL, &dash);
722 if (strncmp(dash, "MDT", 3) == 0)
723 rc = LDD_F_SV_TYPE_MDT;
724 else if (strncmp(dash, "OST", 3) == 0)
725 rc = LDD_F_SV_TYPE_OST;
731 if (strncmp(dash, "all", 3) == 0) {
734 return rc | LDD_F_SV_ALL;
737 index = simple_strtoul(dash, (char **)endptr, 16);
741 /* Account for -mdc after index that is possible when specifying mdt */
742 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
743 sizeof(LUSTRE_MDC_NAME)-1) == 0)
744 *endptr += sizeof(LUSTRE_MDC_NAME);
748 EXPORT_SYMBOL(server_name2index);
750 /*************** mount common between server and client ***************/
753 int lustre_common_put_super(struct super_block *sb)
757 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
759 /* Drop a ref to the MGC */
760 rc = lustre_stop_mgc(sb);
761 if (rc && (rc != -ENOENT)) {
763 CERROR("Can't stop MGC: %d\n", rc);
766 /* BUSY just means that there's some other obd that
767 needs the mgc. Let him clean it up. */
768 CDEBUG(D_MOUNT, "MGC still in use\n");
770 /* Drop a ref to the mounted disk */
775 EXPORT_SYMBOL(lustre_common_put_super);
777 static void lmd_print(struct lustre_mount_data *lmd)
781 PRINT_CMD(D_MOUNT, " mount data:\n");
782 if (lmd_is_client(lmd))
783 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
784 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
785 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
788 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
790 if (lmd->lmd_recovery_time_soft)
791 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
792 lmd->lmd_recovery_time_soft);
794 if (lmd->lmd_recovery_time_hard)
795 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
796 lmd->lmd_recovery_time_hard);
798 for (i = 0; i < lmd->lmd_exclude_count; i++) {
799 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
800 lmd->lmd_exclude[i]);
804 /* Is this server on the exclusion list */
805 int lustre_check_exclusion(struct super_block *sb, char *svname)
807 struct lustre_sb_info *lsi = s2lsi(sb);
808 struct lustre_mount_data *lmd = lsi->lsi_lmd;
812 rc = server_name2index(svname, &index, NULL);
813 if (rc != LDD_F_SV_TYPE_OST)
814 /* Only exclude OSTs */
817 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
818 index, lmd->lmd_exclude_count, lmd->lmd_dev);
820 for(i = 0; i < lmd->lmd_exclude_count; i++) {
821 if (index == lmd->lmd_exclude[i]) {
822 CWARN("Excluding %s (on exclusion list)\n", svname);
829 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
830 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
832 const char *s1 = ptr, *s2;
833 __u32 index, *exclude_list;
836 /* The shortest an ost name can be is 8 chars: -OST0000.
837 We don't actually know the fsname at this time, so in fact
838 a user could specify any fsname. */
839 devmax = strlen(ptr) / 8 + 1;
841 /* temp storage until we figure out how many we have */
842 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
846 /* we enter this fn pointing at the '=' */
847 while (*s1 && *s1 != ' ' && *s1 != ',') {
849 rc = server_name2index(s1, &index, &s2);
851 CERROR("Can't parse server name '%s': rc = %d\n",
855 if (rc == LDD_F_SV_TYPE_OST)
856 exclude_list[lmd->lmd_exclude_count++] = index;
858 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
859 (uint)(s2-s1), s1, rc);
861 /* now we are pointing at ':' (next exclude)
862 or ',' (end of excludes) */
863 if (lmd->lmd_exclude_count >= devmax)
866 if (rc >= 0) /* non-err */
869 if (lmd->lmd_exclude_count) {
870 /* permanent, freed in lustre_free_lsi */
871 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
872 lmd->lmd_exclude_count);
873 if (lmd->lmd_exclude) {
874 memcpy(lmd->lmd_exclude, exclude_list,
875 sizeof(index) * lmd->lmd_exclude_count);
878 lmd->lmd_exclude_count = 0;
881 OBD_FREE(exclude_list, sizeof(index) * devmax);
885 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
890 if (lmd->lmd_mgssec != NULL) {
891 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
892 lmd->lmd_mgssec = NULL;
895 tail = strchr(ptr, ',');
897 length = strlen(ptr);
901 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
902 if (lmd->lmd_mgssec == NULL)
905 memcpy(lmd->lmd_mgssec, ptr, length);
906 lmd->lmd_mgssec[length] = '\0';
910 static int lmd_parse_string(char **handle, char *ptr)
915 if ((handle == NULL) || (ptr == NULL))
918 if (*handle != NULL) {
919 OBD_FREE(*handle, strlen(*handle) + 1);
923 tail = strchr(ptr, ',');
925 length = strlen(ptr);
929 OBD_ALLOC(*handle, length + 1);
933 memcpy(*handle, ptr, length);
934 (*handle)[length] = '\0';
939 /* Collect multiple values for mgsnid specifiers */
940 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
948 /* Find end of nidlist */
949 while (class_parse_nid_quiet(tail, &nid, &tail) == 0) {}
950 length = tail - *ptr;
952 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
956 if (lmd->lmd_mgs != NULL)
957 oldlen = strlen(lmd->lmd_mgs) + 1;
959 OBD_ALLOC(mgsnid, oldlen + length + 1);
963 if (lmd->lmd_mgs != NULL) {
964 /* Multiple mgsnid= are taken to mean failover locations */
965 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
966 mgsnid[oldlen - 1] = ':';
967 OBD_FREE(lmd->lmd_mgs, oldlen);
969 memcpy(mgsnid + oldlen, *ptr, length);
970 mgsnid[oldlen + length] = '\0';
971 lmd->lmd_mgs = mgsnid;
977 /** Parse mount line options
978 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
979 * dev is passed as device=uml1:/lustre by mount.lustre
981 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
983 char *s1, *s2, *devname = NULL;
984 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
989 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
990 "/sbin/mount.lustre is installed.\n");
994 /* Options should be a string - try to detect old lmd data */
995 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
996 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
997 "/sbin/mount.lustre. Please install "
998 "version %s\n", LUSTRE_VERSION_STRING);
1001 lmd->lmd_magic = LMD_MAGIC;
1003 OBD_ALLOC(lmd->lmd_params, 4096);
1004 if (lmd->lmd_params == NULL)
1006 lmd->lmd_params[0] = '\0';
1008 /* Set default flags here */
1013 int time_min = OBD_RECOVERY_TIME_MIN;
1015 /* Skip whitespace and extra commas */
1016 while (*s1 == ' ' || *s1 == ',')
1019 /* Client options are parsed in ll_options: eg. flock,
1022 /* Parse non-ldiskfs options here. Rather than modifying
1023 ldiskfs, we just zero these out here */
1024 if (strncmp(s1, "abort_recov", 11) == 0) {
1025 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
1027 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
1028 lmd->lmd_recovery_time_soft = max_t(int,
1029 simple_strtoul(s1 + 19, NULL, 10), time_min);
1031 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
1032 lmd->lmd_recovery_time_hard = max_t(int,
1033 simple_strtoul(s1 + 19, NULL, 10), time_min);
1035 } else if (strncmp(s1, "noir", 4) == 0) {
1036 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
1038 } else if (strncmp(s1, "nosvc", 5) == 0) {
1039 lmd->lmd_flags |= LMD_FLG_NOSVC;
1041 } else if (strncmp(s1, "nomgs", 5) == 0) {
1042 lmd->lmd_flags |= LMD_FLG_NOMGS;
1044 } else if (strncmp(s1, "noscrub", 7) == 0) {
1045 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
1047 } else if (strncmp(s1, PARAM_MGSNODE,
1048 sizeof(PARAM_MGSNODE) - 1) == 0) {
1049 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
1050 /* Assume the next mount opt is the first
1051 invalid nid we get to. */
1052 rc = lmd_parse_mgs(lmd, &s2);
1056 } else if (strncmp(s1, "writeconf", 9) == 0) {
1057 lmd->lmd_flags |= LMD_FLG_WRITECONF;
1059 } else if (strncmp(s1, "update", 6) == 0) {
1060 lmd->lmd_flags |= LMD_FLG_UPDATE;
1062 } else if (strncmp(s1, "virgin", 6) == 0) {
1063 lmd->lmd_flags |= LMD_FLG_VIRGIN;
1065 } else if (strncmp(s1, "noprimnode", 10) == 0) {
1066 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
1068 } else if (strncmp(s1, "mgssec=", 7) == 0) {
1069 rc = lmd_parse_mgssec(lmd, s1 + 7);
1073 /* ost exclusion list */
1074 } else if (strncmp(s1, "exclude=", 8) == 0) {
1075 rc = lmd_make_exclusion(lmd, s1 + 7);
1079 } else if (strncmp(s1, "mgs", 3) == 0) {
1081 lmd->lmd_flags |= LMD_FLG_MGS;
1083 } else if (strncmp(s1, "svname=", 7) == 0) {
1084 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
1088 } else if (strncmp(s1, "param=", 6) == 0) {
1090 char *tail = strchr(s1 + 6, ',');
1092 length = strlen(s1);
1096 strncat(lmd->lmd_params, s1 + 6, length);
1097 strcat(lmd->lmd_params, " ");
1099 } else if (strncmp(s1, "osd=", 4) == 0) {
1100 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1105 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1106 end of the options. */
1107 else if (strncmp(s1, "device=", 7) == 0) {
1109 /* terminate options right before device. device
1110 must be the last one. */
1116 s2 = strchr(s1, ',');
1124 memmove(s1, s2, strlen(s2) + 1);
1130 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
1131 "(need mount option 'device=...')\n");
1135 s1 = strstr(devname, ":/");
1138 lmd->lmd_flags |= LMD_FLG_CLIENT;
1139 /* Remove leading /s from fsname */
1140 while (*++s1 == '/') ;
1141 /* Freed in lustre_free_lsi */
1142 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
1143 if (!lmd->lmd_profile)
1145 sprintf(lmd->lmd_profile, "%s-client", s1);
1148 /* Freed in lustre_free_lsi */
1149 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
1152 strcpy(lmd->lmd_dev, devname);
1154 /* Save mount options */
1155 s1 = options + strlen(options) - 1;
1156 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1158 if (*options != 0) {
1159 /* Freed in lustre_free_lsi */
1160 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1163 strcpy(lmd->lmd_opts, options);
1167 lmd->lmd_magic = LMD_MAGIC;
1172 CERROR("Bad mount options %s\n", options);
1176 struct lustre_mount_data2 {
1178 struct vfsmount *lmd2_mnt;
1181 /** This is the entry point for the mount call into Lustre.
1182 * This is called when a server or client is mounted,
1183 * and this is where we start setting things up.
1184 * @param data Mount options (e.g. -o flock,abort_recov)
1186 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1188 struct lustre_mount_data *lmd;
1189 struct lustre_mount_data2 *lmd2 = data;
1190 struct lustre_sb_info *lsi;
1193 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1195 lsi = lustre_init_lsi(sb);
1201 * Disable lockdep during mount, because mount locking patterns are
1207 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1209 obd_zombie_barrier();
1211 /* Figure out the lmd from the mount options */
1212 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
1214 GOTO(out, rc = -EINVAL);
1217 if (lmd_is_client(lmd)) {
1218 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1219 if (client_fill_super == NULL)
1220 request_module("lustre");
1221 if (client_fill_super == NULL) {
1222 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
1223 "client mount! Is the 'lustre' "
1224 "module loaded?\n");
1228 rc = lustre_start_mgc(sb);
1233 /* Connect and start */
1234 /* (should always be ll_fill_super) */
1235 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1236 /* c_f_s will call lustre_common_put_super on failure */
1239 CERROR("This is client-side-only module, "
1240 "cannot handle server mount.\n");
1244 /* If error happens in fill_super() call, @lsi will be killed there.
1245 * This is why we do not put it here. */
1249 CERROR("Unable to mount %s (%d)\n",
1250 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1252 CDEBUG(D_SUPER, "Mount %s complete\n",
1260 /* We can't call ll_fill_super by name because it lives in a module that
1261 must be loaded after this one. */
1262 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1263 struct vfsmount *mnt))
1265 client_fill_super = cfs;
1267 EXPORT_SYMBOL(lustre_register_client_fill_super);
1269 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1271 kill_super_cb = cfs;
1273 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1275 /***************** FS registration ******************/
1276 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1277 const char *devname, void *data)
1279 struct lustre_mount_data2 lmd2 = { data, NULL };
1281 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1284 void lustre_kill_super(struct super_block *sb)
1286 struct lustre_sb_info *lsi = s2lsi(sb);
1288 if (kill_super_cb && lsi && !IS_SERVER(lsi))
1289 (*kill_super_cb)(sb);
1291 kill_anon_super(sb);
1294 /** Register the "lustre" fs type
1296 struct file_system_type lustre_fs_type = {
1297 .owner = THIS_MODULE,
1299 .mount = lustre_mount,
1300 .kill_sb = lustre_kill_super,
1301 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1302 FS_HAS_FIEMAP | FS_RENAME_DOES_D_MOVE,
1304 MODULE_ALIAS_FS("lustre");
1306 int lustre_register_fs(void)
1308 return register_filesystem(&lustre_fs_type);
1311 int lustre_unregister_fs(void)
1313 return unregister_filesystem(&lustre_fs_type);