blob: 8fc2d3f2dfd67e9702d8f89bd8506df2ba1eeca0 [file] [log] [blame]
Peng Taod7e09d02013-05-02 16:46:55 +08001/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
Oleg Drokin6a5b99a2016-06-14 23:33:40 -040018 * http://www.gnu.org/licenses/gpl-2.0.html
Peng Taod7e09d02013-05-02 16:46:55 +080019 *
Peng Taod7e09d02013-05-02 16:46:55 +080020 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 */
26/*
27 * Copyright (c) 2011, 2012, Intel Corporation.
28 */
29/*
30 * This file is part of Lustre, http://www.lustre.org/
31 * Lustre is a trademark of Sun Microsystems, Inc.
32 *
33 * lustre/include/lustre_mdc.h
34 *
35 * MDS data structures.
36 * See also lustre_idl.h for wire formats of requests.
37 */
38
39#ifndef _LUSTRE_MDC_H
40#define _LUSTRE_MDC_H
41
42/** \defgroup mdc mdc
43 *
44 * @{
45 */
46
Peng Tao04eb2b72013-12-03 22:11:38 +080047#include <linux/fs.h>
48#include <linux/dcache.h>
John L. Hammond00d65ec2014-08-28 18:35:13 -050049#include "lustre_intent.h"
Greg Kroah-Hartman1accaad2014-07-11 21:34:24 -070050#include "lustre_handles.h"
Greg Kroah-Hartman9fdaf8c2014-07-11 20:51:16 -070051#include "../../include/linux/libcfs/libcfs.h"
Greg Kroah-Hartman1accaad2014-07-11 21:34:24 -070052#include "obd_class.h"
53#include "lustre/lustre_idl.h"
54#include "lustre_lib.h"
55#include "lustre_dlm.h"
56#include "lustre_export.h"
Peng Taod7e09d02013-05-02 16:46:55 +080057
58struct ptlrpc_client;
59struct obd_export;
60struct ptlrpc_request;
61struct obd_device;
62
Ned Bass6a1938d2016-04-04 21:36:57 -040063/**
64 * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
65 *
66 * This mutex is used to implement execute-once semantics on the MDT.
67 * The MDT stores the last transaction ID and result for every client in
68 * its last_rcvd file. If the client doesn't get a reply, it can safely
69 * resend the request and the MDT will reconstruct the reply being aware
70 * that the request has already been executed. Without this lock,
71 * execution status of concurrent in-flight requests would be
72 * overwritten.
73 *
74 * This design limits the extent to which we can keep a full pipeline of
75 * in-flight requests from a single client. This limitation could be
76 * overcome by allowing multiple slots per client in the last_rcvd file.
77 */
Peng Taod7e09d02013-05-02 16:46:55 +080078struct mdc_rpc_lock {
Ned Bass6a1938d2016-04-04 21:36:57 -040079 /** Lock protecting in-flight RPC concurrency. */
Peng Taod7e09d02013-05-02 16:46:55 +080080 struct mutex rpcl_mutex;
Ned Bass6a1938d2016-04-04 21:36:57 -040081 /** Intent associated with currently executing request. */
Peng Taod7e09d02013-05-02 16:46:55 +080082 struct lookup_intent *rpcl_it;
Ned Bass6a1938d2016-04-04 21:36:57 -040083 /** Used for MDS/RPC load testing purposes. */
Peng Taod7e09d02013-05-02 16:46:55 +080084 int rpcl_fakes;
85};
86
87#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
88
89static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
90{
91 mutex_init(&lck->rpcl_mutex);
92 lck->rpcl_it = NULL;
93}
94
95static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
96 struct lookup_intent *it)
97{
Oleg Drokind2a13982016-02-16 00:46:52 -050098 if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
wang dia0d73eb2016-08-16 16:18:19 -040099 it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
Peng Taod7e09d02013-05-02 16:46:55 +0800100 return;
101
102 /* This would normally block until the existing request finishes.
103 * If fail_loc is set it will block until the regular request is
104 * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set
105 * it will only be cleared when all fake requests are finished.
106 * Only when all fake requests are finished can normal requests
Oleg Drokinc56e2562016-02-24 22:00:25 -0500107 * be sent, to ensure they are recoverable again.
108 */
Peng Taod7e09d02013-05-02 16:46:55 +0800109 again:
110 mutex_lock(&lck->rpcl_mutex);
111
112 if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
113 lck->rpcl_it = MDC_FAKE_RPCL_IT;
114 lck->rpcl_fakes++;
115 mutex_unlock(&lck->rpcl_mutex);
116 return;
117 }
118
119 /* This will only happen when the CFS_FAIL_CHECK() was
120 * just turned off but there are still requests in progress.
121 * Wait until they finish. It doesn't need to be efficient
122 * in this extremely rare case, just have low overhead in
Oleg Drokinc56e2562016-02-24 22:00:25 -0500123 * the common case when it isn't true.
124 */
Peng Taod7e09d02013-05-02 16:46:55 +0800125 while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
126 mutex_unlock(&lck->rpcl_mutex);
127 schedule_timeout(cfs_time_seconds(1) / 4);
128 goto again;
129 }
130
Oleg Drokind2a13982016-02-16 00:46:52 -0500131 LASSERT(!lck->rpcl_it);
Peng Taod7e09d02013-05-02 16:46:55 +0800132 lck->rpcl_it = it;
133}
134
135static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
136 struct lookup_intent *it)
137{
Oleg Drokind2a13982016-02-16 00:46:52 -0500138 if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
wang dia0d73eb2016-08-16 16:18:19 -0400139 it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
Greg Kroah-Hartman23f14e72013-08-03 06:01:58 +0800140 return;
Peng Taod7e09d02013-05-02 16:46:55 +0800141
142 if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
143 mutex_lock(&lck->rpcl_mutex);
144
145 LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
146 lck->rpcl_fakes--;
147
148 if (lck->rpcl_fakes == 0)
149 lck->rpcl_it = NULL;
150
151 } else {
152 LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
153 lck->rpcl_it = NULL;
154 }
155
156 mutex_unlock(&lck->rpcl_mutex);
Peng Taod7e09d02013-05-02 16:46:55 +0800157}
158
Ned Bass8ed62e92016-09-18 16:38:56 -0400159/**
160 * Update the maximum possible easize and cookiesize.
161 *
162 * The values are learned from ptlrpc replies sent by the MDT. The
163 * default easize and cookiesize is initialized to the minimum value but
164 * allowed to grow up to a single page in size if required to handle the
165 * common case.
166 *
167 * \see client_obd::cl_default_mds_easize and
168 * client_obd::cl_default_mds_cookiesize
169 *
170 * \param[in] exp export for MDC device
171 * \param[in] body body of ptlrpc reply from MDT
172 *
Brian Behlendorf44779342014-04-27 13:06:47 -0400173 */
Peng Taod7e09d02013-05-02 16:46:55 +0800174static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
175 struct mdt_body *body)
176{
John L. Hammond2e1b5b82016-08-16 16:19:08 -0400177 if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
Brian Behlendorf44779342014-04-27 13:06:47 -0400178 struct client_obd *cli = &exp->exp_obd->u.cli;
Ned Bass8ed62e92016-09-18 16:38:56 -0400179 u32 def_cookiesize, def_easize;
Brian Behlendorf44779342014-04-27 13:06:47 -0400180
Ned Bass8ed62e92016-09-18 16:38:56 -0400181 if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
John L. Hammond2e1b5b82016-08-16 16:19:08 -0400182 cli->cl_max_mds_easize = body->mbo_max_mdsize;
Ned Bass8ed62e92016-09-18 16:38:56 -0400183
184 def_easize = min_t(__u32, body->mbo_max_mdsize,
185 OBD_MAX_DEFAULT_EA_SIZE);
186 cli->cl_default_mds_easize = def_easize;
187
188 if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize)
John L. Hammond2e1b5b82016-08-16 16:19:08 -0400189 cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize;
Ned Bass8ed62e92016-09-18 16:38:56 -0400190
191 def_cookiesize = min_t(__u32, body->mbo_max_cookiesize,
192 OBD_MAX_DEFAULT_COOKIE_SIZE);
193 cli->cl_default_mds_cookiesize = def_cookiesize;
Peng Taod7e09d02013-05-02 16:46:55 +0800194 }
195}
196
Peng Taod7e09d02013-05-02 16:46:55 +0800197/* mdc/mdc_locks.c */
Peng Taod7e09d02013-05-02 16:46:55 +0800198int it_open_error(int phase, struct lookup_intent *it);
199
Andreas Dilger38585cc2014-02-11 02:52:05 -0700200static inline bool cl_is_lov_delay_create(unsigned int flags)
201{
202 return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE;
203}
204
205static inline void cl_lov_delay_create_clear(unsigned int *flags)
206{
207 if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE)
208 *flags &= ~O_LOV_DELAY_CREATE;
209}
210
Peng Taod7e09d02013-05-02 16:46:55 +0800211/** @} mdc */
212
213#endif