blob: 749781f022e2ee0f1db41f81c8010ea34c9863d3 [file] [log] [blame]
Peng Taod7e09d02013-05-02 16:46:55 +08001/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
Oleg Drokin6a5b99a2016-06-14 23:33:40 -040018 * http://www.gnu.org/licenses/gpl-2.0.html
Peng Taod7e09d02013-05-02 16:46:55 +080019 *
Peng Taod7e09d02013-05-02 16:46:55 +080020 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
Andreas Dilger1dc563a2015-11-08 18:09:37 -050026 * Copyright (c) 2011, 2015, Intel Corporation.
Peng Taod7e09d02013-05-02 16:46:55 +080027 */
28/*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 */
32
33#define DEBUG_SUBSYSTEM S_OSC
34
Greg Kroah-Hartman9fdaf8c2014-07-11 20:51:16 -070035#include "../../include/linux/libcfs/libcfs.h"
Peng Taod7e09d02013-05-02 16:46:55 +080036
Greg Kroah-Hartman3ee30012014-07-11 22:16:18 -070037#include "../include/lustre_dlm.h"
38#include "../include/lustre_net.h"
39#include "../include/lustre/lustre_user.h"
40#include "../include/obd_cksum.h"
Peng Taod7e09d02013-05-02 16:46:55 +080041
Greg Kroah-Hartman3ee30012014-07-11 22:16:18 -070042#include "../include/lustre_ha.h"
43#include "../include/lprocfs_status.h"
John L. Hammond8877d3b2016-08-16 16:18:51 -040044#include "../include/lustre/lustre_ioctl.h"
Greg Kroah-Hartman3ee30012014-07-11 22:16:18 -070045#include "../include/lustre_debug.h"
46#include "../include/lustre_param.h"
47#include "../include/lustre_fid.h"
John L. Hammonddd45f472014-09-05 15:08:09 -050048#include "../include/obd_class.h"
Li Xiaefd9d72015-09-14 18:41:32 -040049#include "../include/obd.h"
Peng Taod7e09d02013-05-02 16:46:55 +080050#include "osc_internal.h"
51#include "osc_cl_internal.h"
52
Li Xiaefd9d72015-09-14 18:41:32 -040053atomic_t osc_pool_req_count;
54unsigned int osc_reqpool_maxreqcount;
55struct ptlrpc_request_pool *osc_rq_pool;
56
57/* max memory used for request pool, unit is MB */
58static unsigned int osc_reqpool_mem_max = 5;
59module_param(osc_reqpool_mem_max, uint, 0444);
60
John L. Hammondf024bad2014-09-05 15:08:10 -050061struct osc_brw_async_args {
62 struct obdo *aa_oa;
63 int aa_requested_nob;
64 int aa_nio_count;
65 u32 aa_page_count;
66 int aa_resends;
67 struct brw_page **aa_ppga;
68 struct client_obd *aa_cli;
69 struct list_head aa_oaps;
70 struct list_head aa_exts;
John L. Hammondf024bad2014-09-05 15:08:10 -050071 struct cl_req *aa_clerq;
72};
73
74struct osc_async_args {
75 struct obd_info *aa_oi;
76};
77
78struct osc_setattr_args {
79 struct obdo *sa_oa;
80 obd_enqueue_update_f sa_upcall;
81 void *sa_cookie;
82};
83
84struct osc_fsync_args {
85 struct obd_info *fa_oi;
86 obd_enqueue_update_f fa_upcall;
87 void *fa_cookie;
88};
89
90struct osc_enqueue_args {
91 struct obd_export *oa_exp;
Jinshan Xiong06563b52016-03-30 19:48:40 -040092 enum ldlm_type oa_type;
93 enum ldlm_mode oa_mode;
John L. Hammondf024bad2014-09-05 15:08:10 -050094 __u64 *oa_flags;
Jinshan Xiong06563b52016-03-30 19:48:40 -040095 osc_enqueue_upcall_f oa_upcall;
John L. Hammondf024bad2014-09-05 15:08:10 -050096 void *oa_cookie;
97 struct ost_lvb *oa_lvb;
Jinshan Xiong06563b52016-03-30 19:48:40 -040098 struct lustre_handle oa_lockh;
John L. Hammondf024bad2014-09-05 15:08:10 -050099 unsigned int oa_agl:1;
100};
101
Oleg Drokin21aef7d2014-08-15 12:55:56 -0400102static void osc_release_ppga(struct brw_page **ppga, u32 count);
Peng Taod7e09d02013-05-02 16:46:55 +0800103static int brw_interpret(const struct lu_env *env,
104 struct ptlrpc_request *req, void *data, int rc);
Peng Taod7e09d02013-05-02 16:46:55 +0800105
Peng Taod7e09d02013-05-02 16:46:55 +0800106/* Unpack OSC object metadata from disk storage (LE byte order). */
107static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
108 struct lov_mds_md *lmm, int lmm_bytes)
109{
110 int lsm_size;
111 struct obd_import *imp = class_exp2cliimp(exp);
Peng Taod7e09d02013-05-02 16:46:55 +0800112
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500113 if (lmm) {
Peng Taod7e09d02013-05-02 16:46:55 +0800114 if (lmm_bytes < sizeof(*lmm)) {
115 CERROR("%s: lov_mds_md too small: %d, need %d\n",
116 exp->exp_obd->obd_name, lmm_bytes,
117 (int)sizeof(*lmm));
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800118 return -EINVAL;
Peng Taod7e09d02013-05-02 16:46:55 +0800119 }
120 /* XXX LOV_MAGIC etc check? */
121
122 if (unlikely(ostid_id(&lmm->lmm_oi) == 0)) {
123 CERROR("%s: zero lmm_object_id: rc = %d\n",
124 exp->exp_obd->obd_name, -EINVAL);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800125 return -EINVAL;
Peng Taod7e09d02013-05-02 16:46:55 +0800126 }
127 }
128
129 lsm_size = lov_stripe_md_size(1);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500130 if (!lsmp)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800131 return lsm_size;
Peng Taod7e09d02013-05-02 16:46:55 +0800132
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500133 if (*lsmp && !lmm) {
Julia Lawall77951782015-05-01 17:51:13 +0200134 kfree((*lsmp)->lsm_oinfo[0]);
135 kfree(*lsmp);
Peng Taod7e09d02013-05-02 16:46:55 +0800136 *lsmp = NULL;
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800137 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800138 }
139
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500140 if (!*lsmp) {
Julia Lawall77951782015-05-01 17:51:13 +0200141 *lsmp = kzalloc(lsm_size, GFP_NOFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500142 if (unlikely(!*lsmp))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800143 return -ENOMEM;
Julia Lawall77951782015-05-01 17:51:13 +0200144 (*lsmp)->lsm_oinfo[0] = kzalloc(sizeof(struct lov_oinfo),
145 GFP_NOFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500146 if (unlikely(!(*lsmp)->lsm_oinfo[0])) {
Julia Lawall77951782015-05-01 17:51:13 +0200147 kfree(*lsmp);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800148 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800149 }
150 loi_init((*lsmp)->lsm_oinfo[0]);
151 } else if (unlikely(ostid_id(&(*lsmp)->lsm_oi) == 0)) {
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800152 return -EBADF;
Peng Taod7e09d02013-05-02 16:46:55 +0800153 }
154
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500155 if (lmm)
Peng Taod7e09d02013-05-02 16:46:55 +0800156 /* XXX zero *lsmp? */
157 ostid_le_to_cpu(&lmm->lmm_oi, &(*lsmp)->lsm_oi);
158
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500159 if (imp &&
Peng Taod7e09d02013-05-02 16:46:55 +0800160 (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES))
161 (*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes;
162 else
John L. Hammondb9d4b142016-09-18 16:37:39 -0400163 (*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
Peng Taod7e09d02013-05-02 16:46:55 +0800164
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800165 return lsm_size;
Peng Taod7e09d02013-05-02 16:46:55 +0800166}
167
Peng Taod7e09d02013-05-02 16:46:55 +0800168static inline void osc_pack_req_body(struct ptlrpc_request *req,
169 struct obd_info *oinfo)
170{
171 struct ost_body *body;
172
173 body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
174 LASSERT(body);
175
wang di3b2f75f2013-06-03 21:40:50 +0800176 lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
177 oinfo->oi_oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800178}
179
180static int osc_getattr_interpret(const struct lu_env *env,
181 struct ptlrpc_request *req,
182 struct osc_async_args *aa, int rc)
183{
184 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +0800185
186 if (rc != 0)
Tina Johnson26c4ea42014-09-21 00:08:05 +0530187 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800188
189 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
190 if (body) {
191 CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
wang di3b2f75f2013-06-03 21:40:50 +0800192 lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
193 aa->aa_oi->oi_oa, &body->oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800194
195 /* This should really be sent by the OST */
196 aa->aa_oi->oi_oa->o_blksize = DT_MAX_BRW_SIZE;
197 aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
198 } else {
199 CDEBUG(D_INFO, "can't unpack ost_body\n");
200 rc = -EPROTO;
201 aa->aa_oi->oi_oa->o_valid = 0;
202 }
203out:
204 rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800205 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800206}
207
208static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
209 struct ptlrpc_request_set *set)
210{
211 struct ptlrpc_request *req;
212 struct osc_async_args *aa;
Chris Hanna29ac6842015-06-03 10:23:42 -0400213 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800214
215 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500216 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800217 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800218
Peng Taod7e09d02013-05-02 16:46:55 +0800219 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
220 if (rc) {
221 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800222 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800223 }
224
225 osc_pack_req_body(req, oinfo);
226
227 ptlrpc_request_set_replen(req);
228 req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_getattr_interpret;
229
230 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
231 aa = ptlrpc_req_async_args(req);
232 aa->aa_oi = oinfo;
233
234 ptlrpc_set_add_req(set, req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800235 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800236}
237
238static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
239 struct obd_info *oinfo)
240{
241 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -0400242 struct ost_body *body;
243 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800244
245 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500246 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800247 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800248
Peng Taod7e09d02013-05-02 16:46:55 +0800249 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
250 if (rc) {
251 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800252 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800253 }
254
255 osc_pack_req_body(req, oinfo);
256
257 ptlrpc_request_set_replen(req);
258
259 rc = ptlrpc_queue_wait(req);
260 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +0530261 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800262
263 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500264 if (!body) {
Tina Johnson26c4ea42014-09-21 00:08:05 +0530265 rc = -EPROTO;
266 goto out;
267 }
Peng Taod7e09d02013-05-02 16:46:55 +0800268
269 CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
wang di3b2f75f2013-06-03 21:40:50 +0800270 lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
271 &body->oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800272
273 oinfo->oi_oa->o_blksize = cli_brw_size(exp->exp_obd);
274 oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
275
Peng Taod7e09d02013-05-02 16:46:55 +0800276 out:
277 ptlrpc_req_finished(req);
278 return rc;
279}
280
281static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
282 struct obd_info *oinfo, struct obd_trans_info *oti)
283{
284 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -0400285 struct ost_body *body;
286 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800287
288 LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP);
289
290 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500291 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800292 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800293
Peng Taod7e09d02013-05-02 16:46:55 +0800294 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
295 if (rc) {
296 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800297 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800298 }
299
300 osc_pack_req_body(req, oinfo);
301
302 ptlrpc_request_set_replen(req);
303
304 rc = ptlrpc_queue_wait(req);
305 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +0530306 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800307
308 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500309 if (!body) {
Tina Johnson26c4ea42014-09-21 00:08:05 +0530310 rc = -EPROTO;
311 goto out;
312 }
Peng Taod7e09d02013-05-02 16:46:55 +0800313
wang di3b2f75f2013-06-03 21:40:50 +0800314 lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
315 &body->oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800316
Peng Taod7e09d02013-05-02 16:46:55 +0800317out:
318 ptlrpc_req_finished(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800319 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800320}
321
322static int osc_setattr_interpret(const struct lu_env *env,
323 struct ptlrpc_request *req,
324 struct osc_setattr_args *sa, int rc)
325{
326 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +0800327
328 if (rc != 0)
Tina Johnson26c4ea42014-09-21 00:08:05 +0530329 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800330
331 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500332 if (!body) {
Tina Johnson26c4ea42014-09-21 00:08:05 +0530333 rc = -EPROTO;
334 goto out;
335 }
Peng Taod7e09d02013-05-02 16:46:55 +0800336
wang di3b2f75f2013-06-03 21:40:50 +0800337 lustre_get_wire_obdo(&req->rq_import->imp_connect_data, sa->sa_oa,
338 &body->oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800339out:
340 rc = sa->sa_upcall(sa->sa_cookie, rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800341 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800342}
343
344int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
345 struct obd_trans_info *oti,
346 obd_enqueue_update_f upcall, void *cookie,
347 struct ptlrpc_request_set *rqset)
348{
Chris Hanna29ac6842015-06-03 10:23:42 -0400349 struct ptlrpc_request *req;
Peng Taod7e09d02013-05-02 16:46:55 +0800350 struct osc_setattr_args *sa;
Chris Hanna29ac6842015-06-03 10:23:42 -0400351 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800352
353 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500354 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800355 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800356
Peng Taod7e09d02013-05-02 16:46:55 +0800357 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
358 if (rc) {
359 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800360 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800361 }
362
363 if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
364 oinfo->oi_oa->o_lcookie = *oti->oti_logcookies;
365
366 osc_pack_req_body(req, oinfo);
367
368 ptlrpc_request_set_replen(req);
369
370 /* do mds to ost setattr asynchronously */
371 if (!rqset) {
372 /* Do not wait for response. */
Olaf Weberc5c4c6f2015-09-14 18:41:35 -0400373 ptlrpcd_add_req(req);
Peng Taod7e09d02013-05-02 16:46:55 +0800374 } else {
375 req->rq_interpret_reply =
376 (ptlrpc_interpterer_t)osc_setattr_interpret;
377
Shraddha Barkee72f36e2015-09-04 12:08:49 +0530378 CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
Peng Taod7e09d02013-05-02 16:46:55 +0800379 sa = ptlrpc_req_async_args(req);
380 sa->sa_oa = oinfo->oi_oa;
381 sa->sa_upcall = upcall;
382 sa->sa_cookie = cookie;
383
384 if (rqset == PTLRPCD_SET)
Olaf Weberc5c4c6f2015-09-14 18:41:35 -0400385 ptlrpcd_add_req(req);
Peng Taod7e09d02013-05-02 16:46:55 +0800386 else
387 ptlrpc_set_add_req(rqset, req);
388 }
389
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800390 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800391}
392
393static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
394 struct obd_trans_info *oti,
395 struct ptlrpc_request_set *rqset)
396{
397 return osc_setattr_async_base(exp, oinfo, oti,
398 oinfo->oi_cb_up, oinfo, rqset);
399}
400
John L. Hammond972e54a2016-09-18 16:38:11 -0400401static int osc_create(const struct lu_env *env, struct obd_export *exp,
402 struct obdo *oa, struct obd_trans_info *oti)
Peng Taod7e09d02013-05-02 16:46:55 +0800403{
404 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -0400405 struct ost_body *body;
Chris Hanna29ac6842015-06-03 10:23:42 -0400406 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800407
408 LASSERT(oa);
John L. Hammond972e54a2016-09-18 16:38:11 -0400409 LASSERT(oa->o_valid & OBD_MD_FLGROUP);
410 LASSERT(fid_seq_is_echo(ostid_seq(&oa->o_oi)));
Peng Taod7e09d02013-05-02 16:46:55 +0800411
412 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_CREATE);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500413 if (!req) {
Tina Johnson26c4ea42014-09-21 00:08:05 +0530414 rc = -ENOMEM;
415 goto out;
416 }
Peng Taod7e09d02013-05-02 16:46:55 +0800417
418 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE);
419 if (rc) {
420 ptlrpc_request_free(req);
Tina Johnson26c4ea42014-09-21 00:08:05 +0530421 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800422 }
423
424 body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
425 LASSERT(body);
wang di3b2f75f2013-06-03 21:40:50 +0800426
427 lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800428
429 ptlrpc_request_set_replen(req);
430
431 if ((oa->o_valid & OBD_MD_FLFLAGS) &&
432 oa->o_flags == OBD_FL_DELORPHAN) {
433 DEBUG_REQ(D_HA, req,
434 "delorphan from OST integration");
435 /* Don't resend the delorphan req */
Nathaniel Clark04a62842016-06-09 22:35:11 -0400436 req->rq_no_resend = 1;
437 req->rq_no_delay = 1;
Peng Taod7e09d02013-05-02 16:46:55 +0800438 }
439
440 rc = ptlrpc_queue_wait(req);
441 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +0530442 goto out_req;
Peng Taod7e09d02013-05-02 16:46:55 +0800443
444 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500445 if (!body) {
Tina Johnson26c4ea42014-09-21 00:08:05 +0530446 rc = -EPROTO;
447 goto out_req;
448 }
Peng Taod7e09d02013-05-02 16:46:55 +0800449
wang di3b2f75f2013-06-03 21:40:50 +0800450 CDEBUG(D_INFO, "oa flags %x\n", oa->o_flags);
451 lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800452
453 oa->o_blksize = cli_brw_size(exp->exp_obd);
454 oa->o_valid |= OBD_MD_FLBLKSZ;
455
John L. Hammond58c78cd2016-08-19 14:07:29 -0400456 if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
457 if (!oti->oti_logcookies)
458 oti->oti_logcookies = &oti->oti_onecookie;
459 *oti->oti_logcookies = oa->o_lcookie;
Peng Taod7e09d02013-05-02 16:46:55 +0800460 }
461
Greg Kroah-Hartmanf537dd22014-07-12 18:41:09 -0700462 CDEBUG(D_HA, "transno: %lld\n",
Peng Taod7e09d02013-05-02 16:46:55 +0800463 lustre_msg_get_transno(req->rq_repmsg));
464out_req:
465 ptlrpc_req_finished(req);
466out:
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800467 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800468}
469
470int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
471 obd_enqueue_update_f upcall, void *cookie,
472 struct ptlrpc_request_set *rqset)
473{
Chris Hanna29ac6842015-06-03 10:23:42 -0400474 struct ptlrpc_request *req;
Peng Taod7e09d02013-05-02 16:46:55 +0800475 struct osc_setattr_args *sa;
Chris Hanna29ac6842015-06-03 10:23:42 -0400476 struct ost_body *body;
477 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800478
479 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_PUNCH);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500480 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800481 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800482
Peng Taod7e09d02013-05-02 16:46:55 +0800483 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
484 if (rc) {
485 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800486 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800487 }
488 req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
489 ptlrpc_at_set_req_timeout(req);
490
491 body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
492 LASSERT(body);
wang di3b2f75f2013-06-03 21:40:50 +0800493 lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
494 oinfo->oi_oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800495
496 ptlrpc_request_set_replen(req);
497
498 req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
Shraddha Barkee72f36e2015-09-04 12:08:49 +0530499 CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
Peng Taod7e09d02013-05-02 16:46:55 +0800500 sa = ptlrpc_req_async_args(req);
Chris Hanna29ac6842015-06-03 10:23:42 -0400501 sa->sa_oa = oinfo->oi_oa;
Peng Taod7e09d02013-05-02 16:46:55 +0800502 sa->sa_upcall = upcall;
503 sa->sa_cookie = cookie;
504 if (rqset == PTLRPCD_SET)
Olaf Weberc5c4c6f2015-09-14 18:41:35 -0400505 ptlrpcd_add_req(req);
Peng Taod7e09d02013-05-02 16:46:55 +0800506 else
507 ptlrpc_set_add_req(rqset, req);
508
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800509 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800510}
511
Peng Taod7e09d02013-05-02 16:46:55 +0800512static int osc_sync_interpret(const struct lu_env *env,
513 struct ptlrpc_request *req,
514 void *arg, int rc)
515{
516 struct osc_fsync_args *fa = arg;
517 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +0800518
519 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +0530520 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800521
522 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500523 if (!body) {
Shraddha Barkee72f36e2015-09-04 12:08:49 +0530524 CERROR("can't unpack ost_body\n");
Tina Johnson26c4ea42014-09-21 00:08:05 +0530525 rc = -EPROTO;
526 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800527 }
528
529 *fa->fa_oi->oi_oa = body->oa;
530out:
531 rc = fa->fa_upcall(fa->fa_cookie, rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800532 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800533}
534
535int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
536 obd_enqueue_update_f upcall, void *cookie,
537 struct ptlrpc_request_set *rqset)
538{
539 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -0400540 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +0800541 struct osc_fsync_args *fa;
Chris Hanna29ac6842015-06-03 10:23:42 -0400542 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800543
544 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SYNC);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500545 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800546 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800547
Peng Taod7e09d02013-05-02 16:46:55 +0800548 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC);
549 if (rc) {
550 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800551 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800552 }
553
554 /* overload the size and blocks fields in the oa with start/end */
555 body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
556 LASSERT(body);
wang di3b2f75f2013-06-03 21:40:50 +0800557 lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
558 oinfo->oi_oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800559
560 ptlrpc_request_set_replen(req);
561 req->rq_interpret_reply = osc_sync_interpret;
562
563 CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args));
564 fa = ptlrpc_req_async_args(req);
565 fa->fa_oi = oinfo;
566 fa->fa_upcall = upcall;
567 fa->fa_cookie = cookie;
568
569 if (rqset == PTLRPCD_SET)
Olaf Weberc5c4c6f2015-09-14 18:41:35 -0400570 ptlrpcd_add_req(req);
Peng Taod7e09d02013-05-02 16:46:55 +0800571 else
572 ptlrpc_set_add_req(rqset, req);
573
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800574 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800575}
576
Peng Taod7e09d02013-05-02 16:46:55 +0800577/* Find and cancel locally locks matched by @mode in the resource found by
578 * @objid. Found locks are added into @cancel list. Returns the amount of
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500579 * locks added to @cancels list.
580 */
Peng Taod7e09d02013-05-02 16:46:55 +0800581static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
582 struct list_head *cancels,
Oleg Drokin52ee0d22016-02-24 21:59:54 -0500583 enum ldlm_mode mode, __u64 lock_flags)
Peng Taod7e09d02013-05-02 16:46:55 +0800584{
585 struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
586 struct ldlm_res_id res_id;
587 struct ldlm_resource *res;
588 int count;
Peng Taod7e09d02013-05-02 16:46:55 +0800589
590 /* Return, i.e. cancel nothing, only if ELC is supported (flag in
591 * export) but disabled through procfs (flag in NS).
592 *
593 * This distinguishes from a case when ELC is not supported originally,
594 * when we still want to cancel locks in advance and just cancel them
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500595 * locally, without sending any RPC.
596 */
Peng Taod7e09d02013-05-02 16:46:55 +0800597 if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800598 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800599
600 ostid_build_res_name(&oa->o_oi, &res_id);
601 res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
Emoly Liu099d5ad2016-08-16 16:19:13 -0400602 if (IS_ERR(res))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800603 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800604
605 LDLM_RESOURCE_ADDREF(res);
606 count = ldlm_cancel_resource_local(res, cancels, NULL, mode,
607 lock_flags, 0, NULL);
608 LDLM_RESOURCE_DELREF(res);
609 ldlm_resource_putref(res);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800610 return count;
Peng Taod7e09d02013-05-02 16:46:55 +0800611}
612
613static int osc_destroy_interpret(const struct lu_env *env,
614 struct ptlrpc_request *req, void *data,
615 int rc)
616{
617 struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
618
619 atomic_dec(&cli->cl_destroy_in_flight);
620 wake_up(&cli->cl_destroy_waitq);
621 return 0;
622}
623
624static int osc_can_send_destroy(struct client_obd *cli)
625{
626 if (atomic_inc_return(&cli->cl_destroy_in_flight) <=
627 cli->cl_max_rpcs_in_flight) {
628 /* The destroy request can be sent */
629 return 1;
630 }
631 if (atomic_dec_return(&cli->cl_destroy_in_flight) <
632 cli->cl_max_rpcs_in_flight) {
633 /*
634 * The counter has been modified between the two atomic
635 * operations.
636 */
637 wake_up(&cli->cl_destroy_waitq);
638 }
639 return 0;
640}
641
Peng Taod7e09d02013-05-02 16:46:55 +0800642/* Destroy requests can be async always on the client, and we don't even really
643 * care about the return code since the client cannot do anything at all about
644 * a destroy failure.
645 * When the MDS is unlinking a filename, it saves the file objects into a
646 * recovery llog, and these object records are cancelled when the OST reports
647 * they were destroyed and sync'd to disk (i.e. transaction committed).
648 * If the client dies, or the OST is down when the object should be destroyed,
649 * the records are not cancelled, and when the OST reconnects to the MDS next,
650 * it will retrieve the llog unlink logs and then sends the log cancellation
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500651 * cookies to the MDS after committing destroy transactions.
652 */
Peng Taod7e09d02013-05-02 16:46:55 +0800653static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
John L. Hammond3f062072016-09-18 16:38:12 -0400654 struct obdo *oa, struct obd_trans_info *oti)
Peng Taod7e09d02013-05-02 16:46:55 +0800655{
Chris Hanna29ac6842015-06-03 10:23:42 -0400656 struct client_obd *cli = &exp->exp_obd->u.cli;
Peng Taod7e09d02013-05-02 16:46:55 +0800657 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -0400658 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +0800659 LIST_HEAD(cancels);
660 int rc, count;
Peng Taod7e09d02013-05-02 16:46:55 +0800661
662 if (!oa) {
663 CDEBUG(D_INFO, "oa NULL\n");
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800664 return -EINVAL;
Peng Taod7e09d02013-05-02 16:46:55 +0800665 }
666
667 count = osc_resource_get_unused(exp, oa, &cancels, LCK_PW,
668 LDLM_FL_DISCARD_DATA);
669
670 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_DESTROY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500671 if (!req) {
Peng Taod7e09d02013-05-02 16:46:55 +0800672 ldlm_lock_list_put(&cancels, l_bl_ast, count);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800673 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800674 }
675
Peng Taod7e09d02013-05-02 16:46:55 +0800676 rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
677 0, &cancels, count);
678 if (rc) {
679 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800680 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800681 }
682
683 req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
684 ptlrpc_at_set_req_timeout(req);
685
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -0500686 if (oti && oa->o_valid & OBD_MD_FLCOOKIE)
Peng Taod7e09d02013-05-02 16:46:55 +0800687 oa->o_lcookie = *oti->oti_logcookies;
688 body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
689 LASSERT(body);
wang di3b2f75f2013-06-03 21:40:50 +0800690 lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800691
Peng Taod7e09d02013-05-02 16:46:55 +0800692 ptlrpc_request_set_replen(req);
693
Masanari Iida11d66e82013-12-14 02:24:04 +0900694 /* If osc_destroy is for destroying the unlink orphan,
Peng Taod7e09d02013-05-02 16:46:55 +0800695 * sent from MDT to OST, which should not be blocked here,
696 * because the process might be triggered by ptlrpcd, and
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500697 * it is not good to block ptlrpcd thread (b=16006
698 **/
Peng Taod7e09d02013-05-02 16:46:55 +0800699 if (!(oa->o_flags & OBD_FL_DELORPHAN)) {
700 req->rq_interpret_reply = osc_destroy_interpret;
701 if (!osc_can_send_destroy(cli)) {
702 struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP,
703 NULL);
704
705 /*
706 * Wait until the number of on-going destroy RPCs drops
707 * under max_rpc_in_flight
708 */
709 l_wait_event_exclusive(cli->cl_destroy_waitq,
710 osc_can_send_destroy(cli), &lwi);
711 }
712 }
713
714 /* Do not wait for response */
Olaf Weberc5c4c6f2015-09-14 18:41:35 -0400715 ptlrpcd_add_req(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800716 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800717}
718
719static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
720 long writing_bytes)
721{
Oleg Drokincd94f232016-08-21 18:04:34 -0400722 u32 bits = OBD_MD_FLBLOCKS | OBD_MD_FLGRANT;
Peng Taod7e09d02013-05-02 16:46:55 +0800723
724 LASSERT(!(oa->o_valid & bits));
725
726 oa->o_valid |= bits;
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400727 spin_lock(&cli->cl_loi_list_lock);
Hongchao Zhang3147b262016-08-16 16:19:22 -0400728 oa->o_dirty = cli->cl_dirty_pages << PAGE_SHIFT;
729 if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit >
730 cli->cl_dirty_max_pages)) {
Peng Taod7e09d02013-05-02 16:46:55 +0800731 CERROR("dirty %lu - %lu > dirty_max %lu\n",
Hongchao Zhang3147b262016-08-16 16:19:22 -0400732 cli->cl_dirty_pages, cli->cl_dirty_transit,
733 cli->cl_dirty_max_pages);
Peng Taod7e09d02013-05-02 16:46:55 +0800734 oa->o_undirty = 0;
Stephen Champion29c877a2016-09-18 16:37:43 -0400735 } else if (unlikely(atomic_long_read(&obd_dirty_pages) -
736 atomic_long_read(&obd_dirty_transit_pages) >
737 (obd_max_dirty_pages + 1))) {
Peng Taod7e09d02013-05-02 16:46:55 +0800738 /* The atomic_read() allowing the atomic_inc() are
739 * not covered by a lock thus they may safely race and trip
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500740 * this CERROR() unless we add in a small fudge factor (+1).
741 */
Stephen Champion29c877a2016-09-18 16:37:43 -0400742 CERROR("%s: dirty %ld + %ld > system dirty_max %lu\n",
Prakash Suryaac5b1482016-04-27 18:21:04 -0400743 cli->cl_import->imp_obd->obd_name,
Stephen Champion29c877a2016-09-18 16:37:43 -0400744 atomic_long_read(&obd_dirty_pages),
745 atomic_long_read(&obd_dirty_transit_pages),
Peng Taod7e09d02013-05-02 16:46:55 +0800746 obd_max_dirty_pages);
747 oa->o_undirty = 0;
Hongchao Zhang3147b262016-08-16 16:19:22 -0400748 } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages >
749 0x7fffffff)) {
Peng Taod7e09d02013-05-02 16:46:55 +0800750 CERROR("dirty %lu - dirty_max %lu too big???\n",
Hongchao Zhang3147b262016-08-16 16:19:22 -0400751 cli->cl_dirty_pages, cli->cl_dirty_max_pages);
Peng Taod7e09d02013-05-02 16:46:55 +0800752 oa->o_undirty = 0;
753 } else {
Dmitry Eremin97cba132016-09-18 16:37:15 -0400754 unsigned long max_in_flight;
755
756 max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT) *
757 (cli->cl_max_rpcs_in_flight + 1);
Hongchao Zhang3147b262016-08-16 16:19:22 -0400758 oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_SHIFT,
759 max_in_flight);
Peng Taod7e09d02013-05-02 16:46:55 +0800760 }
761 oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
762 oa->o_dropped = cli->cl_lost_grant;
763 cli->cl_lost_grant = 0;
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400764 spin_unlock(&cli->cl_loi_list_lock);
Greg Donald1d8cb702014-08-25 20:07:19 -0500765 CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n",
Peng Taod7e09d02013-05-02 16:46:55 +0800766 oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
Peng Taod7e09d02013-05-02 16:46:55 +0800767}
768
769void osc_update_next_shrink(struct client_obd *cli)
770{
771 cli->cl_next_shrink_grant =
772 cfs_time_shift(cli->cl_grant_shrink_interval);
Oleg Drokin72a87fc2016-02-24 22:00:40 -0500773 CDEBUG(D_CACHE, "next time %ld to shrink grant\n",
Peng Taod7e09d02013-05-02 16:46:55 +0800774 cli->cl_next_shrink_grant);
775}
776
Oleg Drokin21aef7d2014-08-15 12:55:56 -0400777static void __osc_update_grant(struct client_obd *cli, u64 grant)
Peng Taod7e09d02013-05-02 16:46:55 +0800778{
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400779 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800780 cli->cl_avail_grant += grant;
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400781 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800782}
783
784static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
785{
786 if (body->oa.o_valid & OBD_MD_FLGRANT) {
Greg Kroah-Hartmanb0f5aad2014-07-12 20:06:04 -0700787 CDEBUG(D_CACHE, "got %llu extra grant\n", body->oa.o_grant);
Peng Taod7e09d02013-05-02 16:46:55 +0800788 __osc_update_grant(cli, body->oa.o_grant);
789 }
790}
791
792static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
Oleg Drokin21aef7d2014-08-15 12:55:56 -0400793 u32 keylen, void *key, u32 vallen,
Peng Taod7e09d02013-05-02 16:46:55 +0800794 void *val, struct ptlrpc_request_set *set);
795
796static int osc_shrink_grant_interpret(const struct lu_env *env,
797 struct ptlrpc_request *req,
798 void *aa, int rc)
799{
800 struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
John L. Hammondf024bad2014-09-05 15:08:10 -0500801 struct obdo *oa = ((struct osc_brw_async_args *)aa)->aa_oa;
Peng Taod7e09d02013-05-02 16:46:55 +0800802 struct ost_body *body;
803
804 if (rc != 0) {
805 __osc_update_grant(cli, oa->o_grant);
Tina Johnson26c4ea42014-09-21 00:08:05 +0530806 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +0800807 }
808
809 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
810 LASSERT(body);
811 osc_update_grant(cli, body);
812out:
Mike Rapoport2ba262f2015-10-20 12:39:46 +0300813 kmem_cache_free(obdo_cachep, oa);
Peng Taod7e09d02013-05-02 16:46:55 +0800814 return rc;
815}
816
817static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
818{
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400819 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800820 oa->o_grant = cli->cl_avail_grant / 4;
821 cli->cl_avail_grant -= oa->o_grant;
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400822 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800823 if (!(oa->o_valid & OBD_MD_FLFLAGS)) {
824 oa->o_valid |= OBD_MD_FLFLAGS;
825 oa->o_flags = 0;
826 }
827 oa->o_flags |= OBD_FL_SHRINK_GRANT;
828 osc_update_next_shrink(cli);
829}
830
831/* Shrink the current grant, either from some large amount to enough for a
832 * full set of in-flight RPCs, or if we have already shrunk to that limit
833 * then to enough for a single RPC. This avoids keeping more grant than
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500834 * needed, and avoids shrinking the grant piecemeal.
835 */
Peng Taod7e09d02013-05-02 16:46:55 +0800836static int osc_shrink_grant(struct client_obd *cli)
837{
838 __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300839 (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
Peng Taod7e09d02013-05-02 16:46:55 +0800840
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400841 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800842 if (cli->cl_avail_grant <= target_bytes)
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300843 target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400844 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800845
846 return osc_shrink_grant_to_target(cli, target_bytes);
847}
848
849int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
850{
Chris Hanna29ac6842015-06-03 10:23:42 -0400851 int rc = 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800852 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +0800853
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400854 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800855 /* Don't shrink if we are already above or below the desired limit
856 * We don't want to shrink below a single RPC, as that will negatively
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500857 * impact block allocation and long-term performance.
858 */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300859 if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_SHIFT)
860 target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
Peng Taod7e09d02013-05-02 16:46:55 +0800861
862 if (target_bytes >= cli->cl_avail_grant) {
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400863 spin_unlock(&cli->cl_loi_list_lock);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800864 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +0800865 }
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400866 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800867
Julia Lawall77951782015-05-01 17:51:13 +0200868 body = kzalloc(sizeof(*body), GFP_NOFS);
Peng Taod7e09d02013-05-02 16:46:55 +0800869 if (!body)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800870 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +0800871
872 osc_announce_cached(cli, &body->oa, 0);
873
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400874 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800875 body->oa.o_grant = cli->cl_avail_grant - target_bytes;
876 cli->cl_avail_grant = target_bytes;
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400877 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800878 if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) {
879 body->oa.o_valid |= OBD_MD_FLFLAGS;
880 body->oa.o_flags = 0;
881 }
882 body->oa.o_flags |= OBD_FL_SHRINK_GRANT;
883 osc_update_next_shrink(cli);
884
885 rc = osc_set_info_async(NULL, cli->cl_import->imp_obd->obd_self_export,
886 sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK,
887 sizeof(*body), body, NULL);
888 if (rc != 0)
889 __osc_update_grant(cli, body->oa.o_grant);
Julia Lawall77951782015-05-01 17:51:13 +0200890 kfree(body);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +0800891 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +0800892}
893
894static int osc_should_shrink_grant(struct client_obd *client)
895{
Greg Kroah-Hartmana649ad12014-07-12 00:27:46 -0700896 unsigned long time = cfs_time_current();
897 unsigned long next_shrink = client->cl_next_shrink_grant;
Peng Taod7e09d02013-05-02 16:46:55 +0800898
899 if ((client->cl_import->imp_connect_data.ocd_connect_flags &
900 OBD_CONNECT_GRANT_SHRINK) == 0)
901 return 0;
902
903 if (cfs_time_aftereq(time, next_shrink - 5 * CFS_TICK)) {
904 /* Get the current RPC size directly, instead of going via:
905 * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export)
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500906 * Keep comment here so that it can be found by searching.
907 */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300908 int brw_size = client->cl_max_pages_per_rpc << PAGE_SHIFT;
Peng Taod7e09d02013-05-02 16:46:55 +0800909
910 if (client->cl_import->imp_state == LUSTRE_IMP_FULL &&
911 client->cl_avail_grant > brw_size)
912 return 1;
Antonio Murdaca71e8dd92015-06-08 21:48:41 +0200913
914 osc_update_next_shrink(client);
Peng Taod7e09d02013-05-02 16:46:55 +0800915 }
916 return 0;
917}
918
919static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data)
920{
921 struct client_obd *client;
922
Oleg Drokin79910d72016-02-26 01:50:03 -0500923 list_for_each_entry(client, &item->ti_obd_list, cl_grant_shrink_list) {
Peng Taod7e09d02013-05-02 16:46:55 +0800924 if (osc_should_shrink_grant(client))
925 osc_shrink_grant(client);
926 }
927 return 0;
928}
929
930static int osc_add_shrink_grant(struct client_obd *client)
931{
932 int rc;
933
934 rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval,
935 TIMEOUT_GRANT,
936 osc_grant_shrink_grant_cb, NULL,
937 &client->cl_grant_shrink_list);
938 if (rc) {
939 CERROR("add grant client %s error %d\n",
Oleg Drokin79910d72016-02-26 01:50:03 -0500940 client->cl_import->imp_obd->obd_name, rc);
Peng Taod7e09d02013-05-02 16:46:55 +0800941 return rc;
942 }
Oleg Drokin72a87fc2016-02-24 22:00:40 -0500943 CDEBUG(D_CACHE, "add grant client %s\n",
Peng Taod7e09d02013-05-02 16:46:55 +0800944 client->cl_import->imp_obd->obd_name);
945 osc_update_next_shrink(client);
946 return 0;
947}
948
949static int osc_del_shrink_grant(struct client_obd *client)
950{
951 return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list,
952 TIMEOUT_GRANT);
953}
954
955static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
956{
957 /*
958 * ocd_grant is the total grant amount we're expect to hold: if we've
Hongchao Zhang3147b262016-08-16 16:19:22 -0400959 * been evicted, it's the new avail_grant amount, cl_dirty_pages will
960 * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant +
961 * dirty.
Peng Taod7e09d02013-05-02 16:46:55 +0800962 *
963 * race is tolerable here: if we're evicted, but imp_state already
Hongchao Zhang3147b262016-08-16 16:19:22 -0400964 * left EVICTED state, then cl_dirty_pages must be 0 already.
Peng Taod7e09d02013-05-02 16:46:55 +0800965 */
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400966 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800967 if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
968 cli->cl_avail_grant = ocd->ocd_grant;
969 else
Hongchao Zhang3147b262016-08-16 16:19:22 -0400970 cli->cl_avail_grant = ocd->ocd_grant -
971 (cli->cl_dirty_pages << PAGE_SHIFT);
Peng Taod7e09d02013-05-02 16:46:55 +0800972
973 if (cli->cl_avail_grant < 0) {
974 CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
975 cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
Hongchao Zhang3147b262016-08-16 16:19:22 -0400976 ocd->ocd_grant, cli->cl_dirty_pages << PAGE_SHIFT);
Peng Taod7e09d02013-05-02 16:46:55 +0800977 /* workaround for servers which do not have the patch from
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500978 * LU-2679
979 */
Peng Taod7e09d02013-05-02 16:46:55 +0800980 cli->cl_avail_grant = ocd->ocd_grant;
981 }
982
983 /* determine the appropriate chunk size used by osc_extent. */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300984 cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
John L. Hammond7d53d8f2016-03-30 19:48:36 -0400985 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +0800986
Joe Perches2d00bd12014-11-23 11:28:50 -0800987 CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
988 cli->cl_import->imp_obd->obd_name,
989 cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits);
Peng Taod7e09d02013-05-02 16:46:55 +0800990
991 if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK &&
992 list_empty(&cli->cl_grant_shrink_list))
993 osc_add_shrink_grant(cli);
994}
995
996/* We assume that the reason this OSC got a short read is because it read
997 * beyond the end of a stripe file; i.e. lustre is reading a sparse file
998 * via the LOV, and it _knows_ it's reading inside the file, it's just that
Oleg Drokin30aa9c52016-02-24 22:00:37 -0500999 * this stripe never got written at or beyond this stripe offset yet.
1000 */
Oleg Drokin21aef7d2014-08-15 12:55:56 -04001001static void handle_short_read(int nob_read, u32 page_count,
Peng Taod7e09d02013-05-02 16:46:55 +08001002 struct brw_page **pga)
1003{
1004 char *ptr;
1005 int i = 0;
1006
1007 /* skip bytes read OK */
1008 while (nob_read > 0) {
Shraddha Barkee72f36e2015-09-04 12:08:49 +05301009 LASSERT(page_count > 0);
Peng Taod7e09d02013-05-02 16:46:55 +08001010
1011 if (pga[i]->count > nob_read) {
1012 /* EOF inside this page */
1013 ptr = kmap(pga[i]->pg) +
Oleg Drokin616387e2016-03-30 19:48:23 -04001014 (pga[i]->off & ~PAGE_MASK);
Peng Taod7e09d02013-05-02 16:46:55 +08001015 memset(ptr + nob_read, 0, pga[i]->count - nob_read);
1016 kunmap(pga[i]->pg);
1017 page_count--;
1018 i++;
1019 break;
1020 }
1021
1022 nob_read -= pga[i]->count;
1023 page_count--;
1024 i++;
1025 }
1026
1027 /* zero remaining pages */
1028 while (page_count-- > 0) {
Oleg Drokin616387e2016-03-30 19:48:23 -04001029 ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK);
Peng Taod7e09d02013-05-02 16:46:55 +08001030 memset(ptr, 0, pga[i]->count);
1031 kunmap(pga[i]->pg);
1032 i++;
1033 }
1034}
1035
1036static int check_write_rcs(struct ptlrpc_request *req,
1037 int requested_nob, int niocount,
Oleg Drokin21aef7d2014-08-15 12:55:56 -04001038 u32 page_count, struct brw_page **pga)
Peng Taod7e09d02013-05-02 16:46:55 +08001039{
Chris Hanna29ac6842015-06-03 10:23:42 -04001040 int i;
1041 __u32 *remote_rcs;
Peng Taod7e09d02013-05-02 16:46:55 +08001042
1043 remote_rcs = req_capsule_server_sized_get(&req->rq_pill, &RMF_RCS,
1044 sizeof(*remote_rcs) *
1045 niocount);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001046 if (!remote_rcs) {
Peng Taod7e09d02013-05-02 16:46:55 +08001047 CDEBUG(D_INFO, "Missing/short RC vector on BRW_WRITE reply\n");
Julia Lawallfbe7c6c2014-08-26 22:00:33 +02001048 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001049 }
1050
1051 /* return error if any niobuf was in error */
1052 for (i = 0; i < niocount; i++) {
1053 if ((int)remote_rcs[i] < 0)
Greg Donalde8291972014-08-31 17:40:17 -05001054 return remote_rcs[i];
Peng Taod7e09d02013-05-02 16:46:55 +08001055
1056 if (remote_rcs[i] != 0) {
1057 CDEBUG(D_INFO, "rc[%d] invalid (%d) req %p\n",
Oleg Drokin79910d72016-02-26 01:50:03 -05001058 i, remote_rcs[i], req);
Julia Lawallfbe7c6c2014-08-26 22:00:33 +02001059 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001060 }
1061 }
1062
1063 if (req->rq_bulk->bd_nob_transferred != requested_nob) {
1064 CERROR("Unexpected # bytes transferred: %d (requested %d)\n",
1065 req->rq_bulk->bd_nob_transferred, requested_nob);
Julia Lawallfbe7c6c2014-08-26 22:00:33 +02001066 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001067 }
1068
Julia Lawallfbe7c6c2014-08-26 22:00:33 +02001069 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08001070}
1071
1072static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
1073{
1074 if (p1->flag != p2->flag) {
Hatice ERTÃœRK7cf10542015-02-27 14:45:41 +02001075 unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
Prakash Suryaad479282016-04-27 18:21:06 -04001076 OBD_BRW_SYNC | OBD_BRW_ASYNC |
1077 OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
Peng Taod7e09d02013-05-02 16:46:55 +08001078
1079 /* warn if we try to combine flags that we don't know to be
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001080 * safe to combine
1081 */
Peng Taod7e09d02013-05-02 16:46:55 +08001082 if (unlikely((p1->flag & mask) != (p2->flag & mask))) {
Joe Perches2d00bd12014-11-23 11:28:50 -08001083 CWARN("Saw flags 0x%x and 0x%x in the same brw, please report this at http://bugs.whamcloud.com/\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001084 p1->flag, p2->flag);
1085 }
1086 return 0;
1087 }
1088
1089 return (p1->off + p1->count == p2->off);
1090}
1091
Oleg Drokin21aef7d2014-08-15 12:55:56 -04001092static u32 osc_checksum_bulk(int nob, u32 pg_count,
Chris Hanna29ac6842015-06-03 10:23:42 -04001093 struct brw_page **pga, int opc,
Oleg Drokind1332102016-02-24 21:59:48 -05001094 enum cksum_type cksum_type)
Peng Taod7e09d02013-05-02 16:46:55 +08001095{
Chris Hanna29ac6842015-06-03 10:23:42 -04001096 __u32 cksum;
1097 int i = 0;
1098 struct cfs_crypto_hash_desc *hdesc;
1099 unsigned int bufsize;
1100 int err;
1101 unsigned char cfs_alg = cksum_obd2cfs(cksum_type);
Peng Taod7e09d02013-05-02 16:46:55 +08001102
1103 LASSERT(pg_count > 0);
1104
1105 hdesc = cfs_crypto_hash_init(cfs_alg, NULL, 0);
1106 if (IS_ERR(hdesc)) {
1107 CERROR("Unable to initialize checksum hash %s\n",
1108 cfs_crypto_hash_name(cfs_alg));
1109 return PTR_ERR(hdesc);
1110 }
1111
1112 while (nob > 0 && pg_count > 0) {
Dmitry Eremin6ffc4b32016-09-18 16:38:36 -04001113 unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
Peng Taod7e09d02013-05-02 16:46:55 +08001114
1115 /* corrupt the data before we compute the checksum, to
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001116 * simulate an OST->client data error
1117 */
Peng Taod7e09d02013-05-02 16:46:55 +08001118 if (i == 0 && opc == OST_READ &&
1119 OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
1120 unsigned char *ptr = kmap(pga[i]->pg);
Oleg Drokin616387e2016-03-30 19:48:23 -04001121 int off = pga[i]->off & ~PAGE_MASK;
Mike Rapoport50ffcb72015-10-13 16:03:40 +03001122
Dmitry Eremin6ffc4b32016-09-18 16:38:36 -04001123 memcpy(ptr + off, "bad1", min_t(typeof(nob), 4, nob));
Peng Taod7e09d02013-05-02 16:46:55 +08001124 kunmap(pga[i]->pg);
1125 }
1126 cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
Oleg Drokin616387e2016-03-30 19:48:23 -04001127 pga[i]->off & ~PAGE_MASK,
Peng Taod7e09d02013-05-02 16:46:55 +08001128 count);
Greg Kroah-Hartmanaa3bee02014-01-13 16:19:00 -08001129 CDEBUG(D_PAGE,
1130 "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n",
1131 pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index,
1132 (long)pga[i]->pg->flags, page_count(pga[i]->pg),
1133 page_private(pga[i]->pg),
Oleg Drokin616387e2016-03-30 19:48:23 -04001134 (int)(pga[i]->off & ~PAGE_MASK));
Peng Taod7e09d02013-05-02 16:46:55 +08001135
1136 nob -= pga[i]->count;
1137 pg_count--;
1138 i++;
1139 }
1140
Andreas Dilgerc11e27a2016-03-26 15:40:51 -04001141 bufsize = sizeof(cksum);
Peng Taod7e09d02013-05-02 16:46:55 +08001142 err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
1143
Peng Taod7e09d02013-05-02 16:46:55 +08001144 /* For sending we only compute the wrong checksum instead
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001145 * of corrupting the data so it is still correct on a redo
1146 */
Peng Taod7e09d02013-05-02 16:46:55 +08001147 if (opc == OST_WRITE && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND))
1148 cksum++;
1149
1150 return cksum;
1151}
1152
Greg Donald1d8cb702014-08-25 20:07:19 -05001153static int osc_brw_prep_request(int cmd, struct client_obd *cli,
1154 struct obdo *oa,
Oleg Drokin21aef7d2014-08-15 12:55:56 -04001155 struct lov_stripe_md *lsm, u32 page_count,
Peng Taod7e09d02013-05-02 16:46:55 +08001156 struct brw_page **pga,
1157 struct ptlrpc_request **reqp,
Oleg Drokinef2e0f52015-09-27 16:45:46 -04001158 int reserve,
Peng Taod7e09d02013-05-02 16:46:55 +08001159 int resend)
1160{
Chris Hanna29ac6842015-06-03 10:23:42 -04001161 struct ptlrpc_request *req;
Peng Taod7e09d02013-05-02 16:46:55 +08001162 struct ptlrpc_bulk_desc *desc;
Chris Hanna29ac6842015-06-03 10:23:42 -04001163 struct ost_body *body;
1164 struct obd_ioobj *ioobj;
1165 struct niobuf_remote *niobuf;
Peng Taod7e09d02013-05-02 16:46:55 +08001166 int niocount, i, requested_nob, opc, rc;
1167 struct osc_brw_async_args *aa;
Chris Hanna29ac6842015-06-03 10:23:42 -04001168 struct req_capsule *pill;
Peng Taod7e09d02013-05-02 16:46:55 +08001169 struct brw_page *pg_prev;
1170
Peng Taod7e09d02013-05-02 16:46:55 +08001171 if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001172 return -ENOMEM; /* Recoverable */
Peng Taod7e09d02013-05-02 16:46:55 +08001173 if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ2))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001174 return -EINVAL; /* Fatal */
Peng Taod7e09d02013-05-02 16:46:55 +08001175
1176 if ((cmd & OBD_BRW_WRITE) != 0) {
1177 opc = OST_WRITE;
1178 req = ptlrpc_request_alloc_pool(cli->cl_import,
Li Xiaefd9d72015-09-14 18:41:32 -04001179 osc_rq_pool,
Peng Taod7e09d02013-05-02 16:46:55 +08001180 &RQF_OST_BRW_WRITE);
1181 } else {
1182 opc = OST_READ;
1183 req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW_READ);
1184 }
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001185 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001186 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +08001187
1188 for (niocount = i = 1; i < page_count; i++) {
1189 if (!can_merge_pages(pga[i - 1], pga[i]))
1190 niocount++;
1191 }
1192
1193 pill = &req->rq_pill;
1194 req_capsule_set_size(pill, &RMF_OBD_IOOBJ, RCL_CLIENT,
1195 sizeof(*ioobj));
1196 req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
1197 niocount * sizeof(*niobuf));
Peng Taod7e09d02013-05-02 16:46:55 +08001198
1199 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc);
1200 if (rc) {
1201 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001202 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001203 }
1204 req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
1205 ptlrpc_at_set_req_timeout(req);
1206 /* ask ptlrpc not to resend on EINPROGRESS since BRWs have their own
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001207 * retry logic
1208 */
Peng Taod7e09d02013-05-02 16:46:55 +08001209 req->rq_no_retry_einprogress = 1;
1210
1211 desc = ptlrpc_prep_bulk_imp(req, page_count,
1212 cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS,
1213 opc == OST_WRITE ? BULK_GET_SOURCE : BULK_PUT_SINK,
1214 OST_BULK_PORTAL);
1215
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001216 if (!desc) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05301217 rc = -ENOMEM;
1218 goto out;
1219 }
Peng Taod7e09d02013-05-02 16:46:55 +08001220 /* NB request now owns desc and will free it when it gets freed */
1221
1222 body = req_capsule_client_get(pill, &RMF_OST_BODY);
1223 ioobj = req_capsule_client_get(pill, &RMF_OBD_IOOBJ);
1224 niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001225 LASSERT(body && ioobj && niobuf);
Peng Taod7e09d02013-05-02 16:46:55 +08001226
wang di3b2f75f2013-06-03 21:40:50 +08001227 lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
Peng Taod7e09d02013-05-02 16:46:55 +08001228
1229 obdo_to_ioobj(oa, ioobj);
1230 ioobj->ioo_bufcnt = niocount;
1231 /* The high bits of ioo_max_brw tells server _maximum_ number of bulks
1232 * that might be send for this request. The actual number is decided
1233 * when the RPC is finally sent in ptlrpc_register_bulk(). It sends
1234 * "max - 1" for old client compatibility sending "0", and also so the
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001235 * the actual maximum is a power-of-two number, not one less. LU-1431
1236 */
Peng Taod7e09d02013-05-02 16:46:55 +08001237 ioobj_max_brw_set(ioobj, desc->bd_md_max_brw);
Peng Taod7e09d02013-05-02 16:46:55 +08001238 LASSERT(page_count > 0);
1239 pg_prev = pga[0];
1240 for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
1241 struct brw_page *pg = pga[i];
Oleg Drokin616387e2016-03-30 19:48:23 -04001242 int poff = pg->off & ~PAGE_MASK;
Peng Taod7e09d02013-05-02 16:46:55 +08001243
1244 LASSERT(pg->count > 0);
1245 /* make sure there is no gap in the middle of page array */
1246 LASSERTF(page_count == 1 ||
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001247 (ergo(i == 0, poff + pg->count == PAGE_SIZE) &&
Peng Taod7e09d02013-05-02 16:46:55 +08001248 ergo(i > 0 && i < page_count - 1,
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001249 poff == 0 && pg->count == PAGE_SIZE) &&
Peng Taod7e09d02013-05-02 16:46:55 +08001250 ergo(i == page_count - 1, poff == 0)),
Greg Kroah-Hartmanb0f5aad2014-07-12 20:06:04 -07001251 "i: %d/%d pg: %p off: %llu, count: %u\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001252 i, page_count, pg, pg->off, pg->count);
1253 LASSERTF(i == 0 || pg->off > pg_prev->off,
Joe Perches2d00bd12014-11-23 11:28:50 -08001254 "i %d p_c %u pg %p [pri %lu ind %lu] off %llu prev_pg %p [pri %lu ind %lu] off %llu\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001255 i, page_count,
1256 pg->pg, page_private(pg->pg), pg->pg->index, pg->off,
1257 pg_prev->pg, page_private(pg_prev->pg),
1258 pg_prev->pg->index, pg_prev->off);
1259 LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
1260 (pg->flag & OBD_BRW_SRVLOCK));
1261
1262 ptlrpc_prep_bulk_page_pin(desc, pg->pg, poff, pg->count);
1263 requested_nob += pg->count;
1264
1265 if (i > 0 && can_merge_pages(pg_prev, pg)) {
1266 niobuf--;
John L. Hammond638814f2016-09-18 16:37:07 -04001267 niobuf->rnb_len += pg->count;
Peng Taod7e09d02013-05-02 16:46:55 +08001268 } else {
John L. Hammond638814f2016-09-18 16:37:07 -04001269 niobuf->rnb_offset = pg->off;
1270 niobuf->rnb_len = pg->count;
1271 niobuf->rnb_flags = pg->flag;
Peng Taod7e09d02013-05-02 16:46:55 +08001272 }
1273 pg_prev = pg;
1274 }
1275
1276 LASSERTF((void *)(niobuf - niocount) ==
1277 req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE),
1278 "want %p - real %p\n", req_capsule_client_get(&req->rq_pill,
1279 &RMF_NIOBUF_REMOTE), (void *)(niobuf - niocount));
1280
1281 osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0);
1282 if (resend) {
1283 if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) {
1284 body->oa.o_valid |= OBD_MD_FLFLAGS;
1285 body->oa.o_flags = 0;
1286 }
1287 body->oa.o_flags |= OBD_FL_RECOV_RESEND;
1288 }
1289
1290 if (osc_should_shrink_grant(cli))
1291 osc_shrink_grant_local(cli, &body->oa);
1292
1293 /* size[REQ_REC_OFF] still sizeof (*body) */
1294 if (opc == OST_WRITE) {
1295 if (cli->cl_checksum &&
1296 !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
1297 /* store cl_cksum_type in a local variable since
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001298 * it can be changed via lprocfs
1299 */
Oleg Drokind1332102016-02-24 21:59:48 -05001300 enum cksum_type cksum_type = cli->cl_cksum_type;
Peng Taod7e09d02013-05-02 16:46:55 +08001301
1302 if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) {
1303 oa->o_flags &= OBD_FL_LOCAL_MASK;
1304 body->oa.o_flags = 0;
1305 }
1306 body->oa.o_flags |= cksum_type_pack(cksum_type);
1307 body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
1308 body->oa.o_cksum = osc_checksum_bulk(requested_nob,
1309 page_count, pga,
1310 OST_WRITE,
1311 cksum_type);
1312 CDEBUG(D_PAGE, "checksum at write origin: %x\n",
1313 body->oa.o_cksum);
1314 /* save this in 'oa', too, for later checking */
1315 oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
1316 oa->o_flags |= cksum_type_pack(cksum_type);
1317 } else {
1318 /* clear out the checksum flag, in case this is a
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001319 * resend but cl_checksum is no longer set. b=11238
1320 */
Peng Taod7e09d02013-05-02 16:46:55 +08001321 oa->o_valid &= ~OBD_MD_FLCKSUM;
1322 }
1323 oa->o_cksum = body->oa.o_cksum;
1324 /* 1 RC per niobuf */
1325 req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER,
1326 sizeof(__u32) * niocount);
1327 } else {
1328 if (cli->cl_checksum &&
1329 !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
1330 if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
1331 body->oa.o_flags = 0;
1332 body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type);
1333 body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
1334 }
1335 }
1336 ptlrpc_request_set_replen(req);
1337
1338 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
1339 aa = ptlrpc_req_async_args(req);
1340 aa->aa_oa = oa;
1341 aa->aa_requested_nob = requested_nob;
1342 aa->aa_nio_count = niocount;
1343 aa->aa_page_count = page_count;
1344 aa->aa_resends = 0;
1345 aa->aa_ppga = pga;
1346 aa->aa_cli = cli;
1347 INIT_LIST_HEAD(&aa->aa_oaps);
Peng Taod7e09d02013-05-02 16:46:55 +08001348
1349 *reqp = req;
Patrick Farrell346dc1c2016-09-18 16:37:37 -04001350 niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
1351 CDEBUG(D_RPCTRACE, "brw rpc %p - object " DOSTID " offset %lld<>%lld\n",
1352 req, POSTID(&oa->o_oi), niobuf[0].rnb_offset,
1353 niobuf[niocount - 1].rnb_offset + niobuf[niocount - 1].rnb_len);
1354
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001355 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08001356
1357 out:
1358 ptlrpc_req_finished(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001359 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001360}
1361
1362static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
1363 __u32 client_cksum, __u32 server_cksum, int nob,
Oleg Drokin21aef7d2014-08-15 12:55:56 -04001364 u32 page_count, struct brw_page **pga,
Oleg Drokind1332102016-02-24 21:59:48 -05001365 enum cksum_type client_cksum_type)
Peng Taod7e09d02013-05-02 16:46:55 +08001366{
1367 __u32 new_cksum;
1368 char *msg;
Oleg Drokind1332102016-02-24 21:59:48 -05001369 enum cksum_type cksum_type;
Peng Taod7e09d02013-05-02 16:46:55 +08001370
1371 if (server_cksum == client_cksum) {
1372 CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
1373 return 0;
1374 }
1375
1376 cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
1377 oa->o_flags : 0);
1378 new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
1379 cksum_type);
1380
1381 if (cksum_type != client_cksum_type)
Joe Perches2d00bd12014-11-23 11:28:50 -08001382 msg = "the server did not use the checksum type specified in the original request - likely a protocol problem"
1383 ;
Peng Taod7e09d02013-05-02 16:46:55 +08001384 else if (new_cksum == server_cksum)
Joe Perches2d00bd12014-11-23 11:28:50 -08001385 msg = "changed on the client after we checksummed it - likely false positive due to mmap IO (bug 11742)"
1386 ;
Peng Taod7e09d02013-05-02 16:46:55 +08001387 else if (new_cksum == client_cksum)
1388 msg = "changed in transit before arrival at OST";
1389 else
Joe Perches2d00bd12014-11-23 11:28:50 -08001390 msg = "changed in transit AND doesn't match the original - likely false positive due to mmap IO (bug 11742)"
1391 ;
Peng Taod7e09d02013-05-02 16:46:55 +08001392
1393 LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode "DFID
Greg Kroah-Hartmanb0f5aad2014-07-12 20:06:04 -07001394 " object "DOSTID" extent [%llu-%llu]\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001395 msg, libcfs_nid2str(peer->nid),
1396 oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0,
1397 oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
1398 oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
1399 POSTID(&oa->o_oi), pga[0]->off,
Oleg Drokincd94f232016-08-21 18:04:34 -04001400 pga[page_count - 1]->off +
1401 pga[page_count - 1]->count - 1);
Joe Perches2d00bd12014-11-23 11:28:50 -08001402 CERROR("original client csum %x (type %x), server csum %x (type %x), client csum now %x\n",
1403 client_cksum, client_cksum_type,
Peng Taod7e09d02013-05-02 16:46:55 +08001404 server_cksum, cksum_type, new_cksum);
1405 return 1;
1406}
1407
1408/* Note rc enters this function as number of bytes transferred */
1409static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
1410{
1411 struct osc_brw_async_args *aa = (void *)&req->rq_async_args;
1412 const lnet_process_id_t *peer =
1413 &req->rq_import->imp_connection->c_peer;
1414 struct client_obd *cli = aa->aa_cli;
1415 struct ost_body *body;
1416 __u32 client_cksum = 0;
Peng Taod7e09d02013-05-02 16:46:55 +08001417
1418 if (rc < 0 && rc != -EDQUOT) {
1419 DEBUG_REQ(D_INFO, req, "Failed request with rc = %d\n", rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001420 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001421 }
1422
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001423 LASSERTF(req->rq_repmsg, "rc = %d\n", rc);
Peng Taod7e09d02013-05-02 16:46:55 +08001424 body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001425 if (!body) {
Peng Taod7e09d02013-05-02 16:46:55 +08001426 DEBUG_REQ(D_INFO, req, "Can't unpack body\n");
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001427 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001428 }
1429
1430 /* set/clear over quota flag for a uid/gid */
1431 if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE &&
1432 body->oa.o_valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) {
1433 unsigned int qid[MAXQUOTAS] = { body->oa.o_uid, body->oa.o_gid };
1434
Greg Kroah-Hartman55f5a822014-07-12 20:26:07 -07001435 CDEBUG(D_QUOTA, "setdq for [%u %u] with valid %#llx, flags %x\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001436 body->oa.o_uid, body->oa.o_gid, body->oa.o_valid,
1437 body->oa.o_flags);
1438 osc_quota_setdq(cli, qid, body->oa.o_valid, body->oa.o_flags);
1439 }
1440
1441 osc_update_grant(cli, body);
1442
1443 if (rc < 0)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001444 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001445
1446 if (aa->aa_oa->o_valid & OBD_MD_FLCKSUM)
1447 client_cksum = aa->aa_oa->o_cksum; /* save for later */
1448
1449 if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
1450 if (rc > 0) {
1451 CERROR("Unexpected +ve rc %d\n", rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001452 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001453 }
1454 LASSERT(req->rq_bulk->bd_nob == aa->aa_requested_nob);
1455
1456 if (sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001457 return -EAGAIN;
Peng Taod7e09d02013-05-02 16:46:55 +08001458
1459 if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum &&
1460 check_write_checksum(&body->oa, peer, client_cksum,
1461 body->oa.o_cksum, aa->aa_requested_nob,
1462 aa->aa_page_count, aa->aa_ppga,
1463 cksum_type_unpack(aa->aa_oa->o_flags)))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001464 return -EAGAIN;
Peng Taod7e09d02013-05-02 16:46:55 +08001465
Greg Donald1d8cb702014-08-25 20:07:19 -05001466 rc = check_write_rcs(req, aa->aa_requested_nob,
1467 aa->aa_nio_count,
Peng Taod7e09d02013-05-02 16:46:55 +08001468 aa->aa_page_count, aa->aa_ppga);
Tina Johnson26c4ea42014-09-21 00:08:05 +05301469 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08001470 }
1471
1472 /* The rest of this function executes only for OST_READs */
1473
1474 /* if unwrap_bulk failed, return -EAGAIN to retry */
1475 rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, rc);
Tina Johnson26c4ea42014-09-21 00:08:05 +05301476 if (rc < 0) {
1477 rc = -EAGAIN;
1478 goto out;
1479 }
Peng Taod7e09d02013-05-02 16:46:55 +08001480
1481 if (rc > aa->aa_requested_nob) {
1482 CERROR("Unexpected rc %d (%d requested)\n", rc,
1483 aa->aa_requested_nob);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001484 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001485 }
1486
1487 if (rc != req->rq_bulk->bd_nob_transferred) {
Shraddha Barkee72f36e2015-09-04 12:08:49 +05301488 CERROR("Unexpected rc %d (%d transferred)\n",
Oleg Drokin79910d72016-02-26 01:50:03 -05001489 rc, req->rq_bulk->bd_nob_transferred);
Julia Lawallfbe7c6c2014-08-26 22:00:33 +02001490 return -EPROTO;
Peng Taod7e09d02013-05-02 16:46:55 +08001491 }
1492
1493 if (rc < aa->aa_requested_nob)
1494 handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
1495
1496 if (body->oa.o_valid & OBD_MD_FLCKSUM) {
1497 static int cksum_counter;
Chris Hanna29ac6842015-06-03 10:23:42 -04001498 __u32 server_cksum = body->oa.o_cksum;
Dmitry Eremin80feb1e2015-10-21 21:52:47 -04001499 char *via = "";
1500 char *router = "";
Oleg Drokind1332102016-02-24 21:59:48 -05001501 enum cksum_type cksum_type;
Peng Taod7e09d02013-05-02 16:46:55 +08001502
Oleg Drokincd94f232016-08-21 18:04:34 -04001503 cksum_type = cksum_type_unpack(body->oa.o_valid &
1504 OBD_MD_FLFLAGS ?
Peng Taod7e09d02013-05-02 16:46:55 +08001505 body->oa.o_flags : 0);
1506 client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
1507 aa->aa_ppga, OST_READ,
1508 cksum_type);
1509
Dmitry Eremin80feb1e2015-10-21 21:52:47 -04001510 if (peer->nid != req->rq_bulk->bd_sender) {
Peng Taod7e09d02013-05-02 16:46:55 +08001511 via = " via ";
1512 router = libcfs_nid2str(req->rq_bulk->bd_sender);
1513 }
1514
Bobi Jama2ff0f92014-06-22 21:32:11 -04001515 if (server_cksum != client_cksum) {
Joe Perches2d00bd12014-11-23 11:28:50 -08001516 LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from %s%s%s inode " DFID " object " DOSTID " extent [%llu-%llu]\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001517 req->rq_import->imp_obd->obd_name,
1518 libcfs_nid2str(peer->nid),
1519 via, router,
1520 body->oa.o_valid & OBD_MD_FLFID ?
Joe Perches2d00bd12014-11-23 11:28:50 -08001521 body->oa.o_parent_seq : (__u64)0,
Peng Taod7e09d02013-05-02 16:46:55 +08001522 body->oa.o_valid & OBD_MD_FLFID ?
Joe Perches2d00bd12014-11-23 11:28:50 -08001523 body->oa.o_parent_oid : 0,
Peng Taod7e09d02013-05-02 16:46:55 +08001524 body->oa.o_valid & OBD_MD_FLFID ?
Joe Perches2d00bd12014-11-23 11:28:50 -08001525 body->oa.o_parent_ver : 0,
Peng Taod7e09d02013-05-02 16:46:55 +08001526 POSTID(&body->oa.o_oi),
1527 aa->aa_ppga[0]->off,
1528 aa->aa_ppga[aa->aa_page_count-1]->off +
1529 aa->aa_ppga[aa->aa_page_count-1]->count -
Joe Perches2d00bd12014-11-23 11:28:50 -08001530 1);
Peng Taod7e09d02013-05-02 16:46:55 +08001531 CERROR("client %x, server %x, cksum_type %x\n",
1532 client_cksum, server_cksum, cksum_type);
1533 cksum_counter = 0;
1534 aa->aa_oa->o_cksum = client_cksum;
1535 rc = -EAGAIN;
1536 } else {
1537 cksum_counter++;
1538 CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
1539 rc = 0;
1540 }
1541 } else if (unlikely(client_cksum)) {
1542 static int cksum_missed;
1543
1544 cksum_missed++;
1545 if ((cksum_missed & (-cksum_missed)) == cksum_missed)
1546 CERROR("Checksum %u requested from %s but not sent\n",
1547 cksum_missed, libcfs_nid2str(peer->nid));
1548 } else {
1549 rc = 0;
1550 }
1551out:
1552 if (rc >= 0)
wang di3b2f75f2013-06-03 21:40:50 +08001553 lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
1554 aa->aa_oa, &body->oa);
Peng Taod7e09d02013-05-02 16:46:55 +08001555
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001556 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001557}
1558
Peng Taod7e09d02013-05-02 16:46:55 +08001559static int osc_brw_redo_request(struct ptlrpc_request *request,
1560 struct osc_brw_async_args *aa, int rc)
1561{
1562 struct ptlrpc_request *new_req;
1563 struct osc_brw_async_args *new_aa;
1564 struct osc_async_page *oap;
Peng Taod7e09d02013-05-02 16:46:55 +08001565
1566 DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request,
1567 "redo for recoverable error %d", rc);
1568
1569 rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
Mike Rapoportb2952d62015-09-03 11:49:13 +03001570 OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
Peng Taod7e09d02013-05-02 16:46:55 +08001571 aa->aa_cli, aa->aa_oa,
1572 NULL /* lsm unused by osc currently */,
1573 aa->aa_page_count, aa->aa_ppga,
Oleg Drokinef2e0f52015-09-27 16:45:46 -04001574 &new_req, 0, 1);
Peng Taod7e09d02013-05-02 16:46:55 +08001575 if (rc)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001576 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001577
1578 list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001579 if (oap->oap_request) {
Peng Taod7e09d02013-05-02 16:46:55 +08001580 LASSERTF(request == oap->oap_request,
1581 "request %p != oap_request %p\n",
1582 request, oap->oap_request);
1583 if (oap->oap_interrupted) {
1584 ptlrpc_req_finished(new_req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001585 return -EINTR;
Peng Taod7e09d02013-05-02 16:46:55 +08001586 }
1587 }
1588 }
1589 /* New request takes over pga and oaps from old request.
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001590 * Note that copying a list_head doesn't work, need to move it...
1591 */
Peng Taod7e09d02013-05-02 16:46:55 +08001592 aa->aa_resends++;
1593 new_req->rq_interpret_reply = request->rq_interpret_reply;
1594 new_req->rq_async_args = request->rq_async_args;
Prakash Suryaac5b1482016-04-27 18:21:04 -04001595 new_req->rq_commit_cb = request->rq_commit_cb;
Peng Taod7e09d02013-05-02 16:46:55 +08001596 /* cap resend delay to the current request timeout, this is similar to
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001597 * what ptlrpc does (see after_reply())
1598 */
Peng Taod7e09d02013-05-02 16:46:55 +08001599 if (aa->aa_resends > new_req->rq_timeout)
Arnd Bergmann219e6de2015-09-27 16:45:30 -04001600 new_req->rq_sent = ktime_get_real_seconds() + new_req->rq_timeout;
Peng Taod7e09d02013-05-02 16:46:55 +08001601 else
Arnd Bergmann219e6de2015-09-27 16:45:30 -04001602 new_req->rq_sent = ktime_get_real_seconds() + aa->aa_resends;
Peng Taod7e09d02013-05-02 16:46:55 +08001603 new_req->rq_generation_set = 1;
1604 new_req->rq_import_generation = request->rq_import_generation;
1605
1606 new_aa = ptlrpc_req_async_args(new_req);
1607
1608 INIT_LIST_HEAD(&new_aa->aa_oaps);
1609 list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps);
1610 INIT_LIST_HEAD(&new_aa->aa_exts);
1611 list_splice_init(&aa->aa_exts, &new_aa->aa_exts);
1612 new_aa->aa_resends = aa->aa_resends;
1613
1614 list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) {
1615 if (oap->oap_request) {
1616 ptlrpc_req_finished(oap->oap_request);
1617 oap->oap_request = ptlrpc_request_addref(new_req);
1618 }
1619 }
1620
Peng Taod7e09d02013-05-02 16:46:55 +08001621 /* XXX: This code will run into problem if we're going to support
1622 * to add a series of BRW RPCs into a self-defined ptlrpc_request_set
1623 * and wait for all of them to be finished. We should inherit request
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001624 * set from old request.
1625 */
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04001626 ptlrpcd_add_req(new_req);
Peng Taod7e09d02013-05-02 16:46:55 +08001627
1628 DEBUG_REQ(D_INFO, new_req, "new request");
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001629 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08001630}
1631
1632/*
1633 * ugh, we want disk allocation on the target to happen in offset order. we'll
1634 * follow sedgewicks advice and stick to the dead simple shellsort -- it'll do
1635 * fine for our small page arrays and doesn't require allocation. its an
1636 * insertion sort that swaps elements that are strides apart, shrinking the
1637 * stride down until its '1' and the array is sorted.
1638 */
1639static void sort_brw_pages(struct brw_page **array, int num)
1640{
1641 int stride, i, j;
1642 struct brw_page *tmp;
1643
1644 if (num == 1)
1645 return;
1646 for (stride = 1; stride < num ; stride = (stride * 3) + 1)
1647 ;
1648
1649 do {
1650 stride /= 3;
1651 for (i = stride ; i < num ; i++) {
1652 tmp = array[i];
1653 j = i;
1654 while (j >= stride && array[j - stride]->off > tmp->off) {
1655 array[j] = array[j - stride];
1656 j -= stride;
1657 }
1658 array[j] = tmp;
1659 }
1660 } while (stride > 1);
1661}
1662
Oleg Drokin21aef7d2014-08-15 12:55:56 -04001663static void osc_release_ppga(struct brw_page **ppga, u32 count)
Peng Taod7e09d02013-05-02 16:46:55 +08001664{
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001665 LASSERT(ppga);
Julia Lawall77951782015-05-01 17:51:13 +02001666 kfree(ppga);
Peng Taod7e09d02013-05-02 16:46:55 +08001667}
1668
Peng Taod7e09d02013-05-02 16:46:55 +08001669static int brw_interpret(const struct lu_env *env,
1670 struct ptlrpc_request *req, void *data, int rc)
1671{
1672 struct osc_brw_async_args *aa = data;
1673 struct osc_extent *ext;
1674 struct osc_extent *tmp;
Peng Taod7e09d02013-05-02 16:46:55 +08001675 struct client_obd *cli = aa->aa_cli;
Peng Taod7e09d02013-05-02 16:46:55 +08001676
1677 rc = osc_brw_fini_request(req, rc);
1678 CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
1679 /* When server return -EINPROGRESS, client should always retry
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001680 * regardless of the number of times the bulk was resent already.
1681 */
Peng Taod7e09d02013-05-02 16:46:55 +08001682 if (osc_recoverable_error(rc)) {
1683 if (req->rq_import_generation !=
1684 req->rq_import->imp_generation) {
Joe Perches2d00bd12014-11-23 11:28:50 -08001685 CDEBUG(D_HA, "%s: resend cross eviction for object: " DOSTID ", rc = %d.\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001686 req->rq_import->imp_obd->obd_name,
1687 POSTID(&aa->aa_oa->o_oi), rc);
1688 } else if (rc == -EINPROGRESS ||
1689 client_should_resend(aa->aa_resends, aa->aa_cli)) {
1690 rc = osc_brw_redo_request(req, aa, rc);
1691 } else {
Greg Kroah-Hartmanb0f5aad2014-07-12 20:06:04 -07001692 CERROR("%s: too many resent retries for object: %llu:%llu, rc = %d.\n",
Peng Taod7e09d02013-05-02 16:46:55 +08001693 req->rq_import->imp_obd->obd_name,
1694 POSTID(&aa->aa_oa->o_oi), rc);
1695 }
1696
1697 if (rc == 0)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001698 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08001699 else if (rc == -EAGAIN || rc == -EINPROGRESS)
1700 rc = -EIO;
1701 }
1702
Jinshan Xiong77605e42016-03-30 19:48:30 -04001703 if (rc == 0) {
Peng Taod7e09d02013-05-02 16:46:55 +08001704 struct obdo *oa = aa->aa_oa;
1705 struct cl_attr *attr = &osc_env_info(env)->oti_attr;
1706 unsigned long valid = 0;
Jinshan Xiong77605e42016-03-30 19:48:30 -04001707 struct cl_object *obj;
1708 struct osc_async_page *last;
Peng Taod7e09d02013-05-02 16:46:55 +08001709
Jinshan Xiong77605e42016-03-30 19:48:30 -04001710 last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]);
1711 obj = osc2cl(last->oap_obj);
1712
1713 cl_object_attr_lock(obj);
Peng Taod7e09d02013-05-02 16:46:55 +08001714 if (oa->o_valid & OBD_MD_FLBLOCKS) {
1715 attr->cat_blocks = oa->o_blocks;
1716 valid |= CAT_BLOCKS;
1717 }
1718 if (oa->o_valid & OBD_MD_FLMTIME) {
1719 attr->cat_mtime = oa->o_mtime;
1720 valid |= CAT_MTIME;
1721 }
1722 if (oa->o_valid & OBD_MD_FLATIME) {
1723 attr->cat_atime = oa->o_atime;
1724 valid |= CAT_ATIME;
1725 }
1726 if (oa->o_valid & OBD_MD_FLCTIME) {
1727 attr->cat_ctime = oa->o_ctime;
1728 valid |= CAT_CTIME;
1729 }
Jinshan Xiong77605e42016-03-30 19:48:30 -04001730
1731 if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
1732 struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
Niu Yawei60deafe2016-09-18 16:37:12 -04001733 loff_t last_off = last->oap_count + last->oap_obj_off +
1734 last->oap_page_off;
Jinshan Xiong77605e42016-03-30 19:48:30 -04001735
1736 /* Change file size if this is an out of quota or
1737 * direct IO write and it extends the file size
1738 */
1739 if (loi->loi_lvb.lvb_size < last_off) {
1740 attr->cat_size = last_off;
1741 valid |= CAT_SIZE;
1742 }
1743 /* Extend KMS if it's not a lockless write */
1744 if (loi->loi_kms < last_off &&
1745 oap2osc_page(last)->ops_srvlock == 0) {
1746 attr->cat_kms = last_off;
1747 valid |= CAT_KMS;
1748 }
Peng Taod7e09d02013-05-02 16:46:55 +08001749 }
Jinshan Xiong77605e42016-03-30 19:48:30 -04001750
1751 if (valid != 0)
Bobi Jam96234ec2016-09-18 16:38:43 -04001752 cl_object_attr_update(env, obj, attr, valid);
Jinshan Xiong77605e42016-03-30 19:48:30 -04001753 cl_object_attr_unlock(obj);
Peng Taod7e09d02013-05-02 16:46:55 +08001754 }
Mike Rapoport2ba262f2015-10-20 12:39:46 +03001755 kmem_cache_free(obdo_cachep, aa->aa_oa);
Peng Taod7e09d02013-05-02 16:46:55 +08001756
Jinshan Xiongd806f302016-08-16 16:19:10 -04001757 if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0)
1758 osc_inc_unstable_pages(req);
1759
Jinshan Xiong77605e42016-03-30 19:48:30 -04001760 list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
1761 list_del_init(&ext->oe_link);
1762 osc_extent_finish(env, ext, 1, rc);
1763 }
1764 LASSERT(list_empty(&aa->aa_exts));
1765 LASSERT(list_empty(&aa->aa_oaps));
1766
Peng Taod7e09d02013-05-02 16:46:55 +08001767 cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
1768 req->rq_bulk->bd_nob_transferred);
1769 osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
1770 ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
1771
John L. Hammond7d53d8f2016-03-30 19:48:36 -04001772 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +08001773 /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
1774 * is called so we know whether to go to sync BRWs or wait for more
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001775 * RPCs to complete
1776 */
Peng Taod7e09d02013-05-02 16:46:55 +08001777 if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE)
1778 cli->cl_w_in_flight--;
1779 else
1780 cli->cl_r_in_flight--;
1781 osc_wake_cache_waiters(cli);
John L. Hammond7d53d8f2016-03-30 19:48:36 -04001782 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +08001783
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04001784 osc_io_unplug(env, cli, NULL);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08001785 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08001786}
1787
Prakash Suryaac5b1482016-04-27 18:21:04 -04001788static void brw_commit(struct ptlrpc_request *req)
1789{
Prakash Suryaac5b1482016-04-27 18:21:04 -04001790 /*
1791 * If osc_inc_unstable_pages (via osc_extent_finish) races with
1792 * this called via the rq_commit_cb, I need to ensure
1793 * osc_dec_unstable_pages is still called. Otherwise unstable
1794 * pages may be leaked.
1795 */
Jinshan Xiongfa1cc962016-08-16 16:18:34 -04001796 spin_lock(&req->rq_lock);
1797 if (unlikely(req->rq_unstable)) {
1798 req->rq_unstable = 0;
Prakash Suryaac5b1482016-04-27 18:21:04 -04001799 spin_unlock(&req->rq_lock);
1800 osc_dec_unstable_pages(req);
Prakash Suryaac5b1482016-04-27 18:21:04 -04001801 } else {
1802 req->rq_committed = 1;
Jinshan Xiongfa1cc962016-08-16 16:18:34 -04001803 spin_unlock(&req->rq_lock);
Prakash Suryaac5b1482016-04-27 18:21:04 -04001804 }
Prakash Suryaac5b1482016-04-27 18:21:04 -04001805}
1806
Peng Taod7e09d02013-05-02 16:46:55 +08001807/**
1808 * Build an RPC by the list of extent @ext_list. The caller must ensure
1809 * that the total pages in this list are NOT over max pages per RPC.
1810 * Extents in the list must be in OES_RPC state.
1811 */
1812int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04001813 struct list_head *ext_list, int cmd)
Peng Taod7e09d02013-05-02 16:46:55 +08001814{
Chris Hanna29ac6842015-06-03 10:23:42 -04001815 struct ptlrpc_request *req = NULL;
1816 struct osc_extent *ext;
1817 struct brw_page **pga = NULL;
1818 struct osc_brw_async_args *aa = NULL;
1819 struct obdo *oa = NULL;
1820 struct osc_async_page *oap;
1821 struct osc_async_page *tmp;
1822 struct cl_req *clerq = NULL;
1823 enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
Chris Hanna29ac6842015-06-03 10:23:42 -04001824 struct cl_req_attr *crattr = NULL;
1825 u64 starting_offset = OBD_OBJECT_EOF;
1826 u64 ending_offset = 0;
1827 int mpflag = 0;
1828 int mem_tight = 0;
1829 int page_count = 0;
Jinshan Xiongd806f302016-08-16 16:19:10 -04001830 bool soft_sync = false;
Chris Hanna29ac6842015-06-03 10:23:42 -04001831 int i;
1832 int rc;
1833 struct ost_body *body;
Peng Taod7e09d02013-05-02 16:46:55 +08001834 LIST_HEAD(rpc_list);
Peng Taod7e09d02013-05-02 16:46:55 +08001835
Peng Taod7e09d02013-05-02 16:46:55 +08001836 LASSERT(!list_empty(ext_list));
1837
1838 /* add pages into rpc_list to build BRW rpc */
1839 list_for_each_entry(ext, ext_list, oe_link) {
1840 LASSERT(ext->oe_state == OES_RPC);
1841 mem_tight |= ext->oe_memalloc;
1842 list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
1843 ++page_count;
1844 list_add_tail(&oap->oap_rpc_item, &rpc_list);
1845 if (starting_offset > oap->oap_obj_off)
1846 starting_offset = oap->oap_obj_off;
1847 else
1848 LASSERT(oap->oap_page_off == 0);
1849 if (ending_offset < oap->oap_obj_off + oap->oap_count)
1850 ending_offset = oap->oap_obj_off +
1851 oap->oap_count;
1852 else
1853 LASSERT(oap->oap_page_off + oap->oap_count ==
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001854 PAGE_SIZE);
Peng Taod7e09d02013-05-02 16:46:55 +08001855 }
1856 }
1857
Jinshan Xiongd806f302016-08-16 16:19:10 -04001858 soft_sync = osc_over_unstable_soft_limit(cli);
Peng Taod7e09d02013-05-02 16:46:55 +08001859 if (mem_tight)
1860 mpflag = cfs_memory_pressure_get_and_set();
1861
Julia Lawall77951782015-05-01 17:51:13 +02001862 crattr = kzalloc(sizeof(*crattr), GFP_NOFS);
Julia Lawall3408e9a2015-06-20 18:59:09 +02001863 if (!crattr) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05301864 rc = -ENOMEM;
1865 goto out;
1866 }
Bobi Jamcad6faf2013-06-03 21:40:45 +08001867
Julia Lawall77951782015-05-01 17:51:13 +02001868 pga = kcalloc(page_count, sizeof(*pga), GFP_NOFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001869 if (!pga) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05301870 rc = -ENOMEM;
1871 goto out;
1872 }
Peng Taod7e09d02013-05-02 16:46:55 +08001873
Amitoj Kaur Chawlac4418da2016-02-26 14:24:55 +05301874 oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001875 if (!oa) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05301876 rc = -ENOMEM;
1877 goto out;
1878 }
Peng Taod7e09d02013-05-02 16:46:55 +08001879
1880 i = 0;
1881 list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
1882 struct cl_page *page = oap2cl_page(oap);
Mike Rapoport50ffcb72015-10-13 16:03:40 +03001883
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001884 if (!clerq) {
Peng Taod7e09d02013-05-02 16:46:55 +08001885 clerq = cl_req_alloc(env, page, crt,
Bobi Jamcad6faf2013-06-03 21:40:45 +08001886 1 /* only 1-object rpcs for now */);
Tina Johnson26c4ea42014-09-21 00:08:05 +05301887 if (IS_ERR(clerq)) {
1888 rc = PTR_ERR(clerq);
1889 goto out;
1890 }
Peng Taod7e09d02013-05-02 16:46:55 +08001891 }
1892 if (mem_tight)
1893 oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
Jinshan Xiongd806f302016-08-16 16:19:10 -04001894 if (soft_sync)
1895 oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
Peng Taod7e09d02013-05-02 16:46:55 +08001896 pga[i] = &oap->oap_brw_page;
1897 pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
1898 CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
James Simmonsbadc9fe2016-03-31 10:18:37 -04001899 pga[i]->pg, oap->oap_page->index, oap,
Bobi Jamcad6faf2013-06-03 21:40:45 +08001900 pga[i]->flag);
Peng Taod7e09d02013-05-02 16:46:55 +08001901 i++;
1902 cl_req_page_add(env, clerq, page);
1903 }
1904
1905 /* always get the data for the obdo for the rpc */
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001906 LASSERT(clerq);
Bobi Jamcad6faf2013-06-03 21:40:45 +08001907 crattr->cra_oa = oa;
1908 cl_req_attr_set(env, clerq, crattr, ~0ULL);
Peng Taod7e09d02013-05-02 16:46:55 +08001909
1910 rc = cl_req_prep(env, clerq);
1911 if (rc != 0) {
1912 CERROR("cl_req_prep failed: %d\n", rc);
Tina Johnson26c4ea42014-09-21 00:08:05 +05301913 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08001914 }
1915
1916 sort_brw_pages(pga, page_count);
1917 rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
Oleg Drokin79910d72016-02-26 01:50:03 -05001918 pga, &req, 1, 0);
Peng Taod7e09d02013-05-02 16:46:55 +08001919 if (rc != 0) {
1920 CERROR("prep_req failed: %d\n", rc);
Tina Johnson26c4ea42014-09-21 00:08:05 +05301921 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08001922 }
1923
Prakash Suryaac5b1482016-04-27 18:21:04 -04001924 req->rq_commit_cb = brw_commit;
Peng Taod7e09d02013-05-02 16:46:55 +08001925 req->rq_interpret_reply = brw_interpret;
1926
1927 if (mem_tight != 0)
1928 req->rq_memalloc = 1;
1929
1930 /* Need to update the timestamps after the request is built in case
1931 * we race with setattr (locally or in queue at OST). If OST gets
1932 * later setattr before earlier BRW (as determined by the request xid),
1933 * the OST will not use BRW timestamps. Sadly, there is no obvious
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001934 * way to do this in a single call. bug 10150
1935 */
Niu Yawei3ce08cd2015-03-25 21:53:25 -04001936 body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
1937 crattr->cra_oa = &body->oa;
Bobi Jamcad6faf2013-06-03 21:40:45 +08001938 cl_req_attr_set(env, clerq, crattr,
Oleg Drokincd94f232016-08-21 18:04:34 -04001939 OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME);
Peng Taod7e09d02013-05-02 16:46:55 +08001940
Bobi Jamcad6faf2013-06-03 21:40:45 +08001941 lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
Peng Taod7e09d02013-05-02 16:46:55 +08001942
1943 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
1944 aa = ptlrpc_req_async_args(req);
1945 INIT_LIST_HEAD(&aa->aa_oaps);
1946 list_splice_init(&rpc_list, &aa->aa_oaps);
1947 INIT_LIST_HEAD(&aa->aa_exts);
1948 list_splice_init(ext_list, &aa->aa_exts);
1949 aa->aa_clerq = clerq;
1950
1951 /* queued sync pages can be torn down while the pages
Oleg Drokin30aa9c52016-02-24 22:00:37 -05001952 * were between the pending list and the rpc
1953 */
Peng Taod7e09d02013-05-02 16:46:55 +08001954 tmp = NULL;
1955 list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
1956 /* only one oap gets a request reference */
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001957 if (!tmp)
Peng Taod7e09d02013-05-02 16:46:55 +08001958 tmp = oap;
1959 if (oap->oap_interrupted && !req->rq_intr) {
1960 CDEBUG(D_INODE, "oap %p in req %p interrupted\n",
Oleg Drokin79910d72016-02-26 01:50:03 -05001961 oap, req);
Peng Taod7e09d02013-05-02 16:46:55 +08001962 ptlrpc_mark_interrupted(req);
1963 }
1964 }
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001965 if (tmp)
Peng Taod7e09d02013-05-02 16:46:55 +08001966 tmp->oap_request = ptlrpc_request_addref(req);
1967
John L. Hammond7d53d8f2016-03-30 19:48:36 -04001968 spin_lock(&cli->cl_loi_list_lock);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03001969 starting_offset >>= PAGE_SHIFT;
Peng Taod7e09d02013-05-02 16:46:55 +08001970 if (cmd == OBD_BRW_READ) {
1971 cli->cl_r_in_flight++;
1972 lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
1973 lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight);
1974 lprocfs_oh_tally_log2(&cli->cl_read_offset_hist,
1975 starting_offset + 1);
1976 } else {
1977 cli->cl_w_in_flight++;
1978 lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count);
1979 lprocfs_oh_tally(&cli->cl_write_rpc_hist, cli->cl_w_in_flight);
1980 lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
1981 starting_offset + 1);
1982 }
John L. Hammond7d53d8f2016-03-30 19:48:36 -04001983 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +08001984
Dmitry Eremin97cba132016-09-18 16:37:15 -04001985 DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight",
Peng Taod7e09d02013-05-02 16:46:55 +08001986 page_count, aa, cli->cl_r_in_flight,
1987 cli->cl_w_in_flight);
1988
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04001989 ptlrpcd_add_req(req);
Peng Taod7e09d02013-05-02 16:46:55 +08001990 rc = 0;
Peng Taod7e09d02013-05-02 16:46:55 +08001991
1992out:
1993 if (mem_tight != 0)
1994 cfs_memory_pressure_restore(mpflag);
1995
Shraddha Barkef999d092015-10-12 20:49:18 +05301996 kfree(crattr);
Bobi Jamcad6faf2013-06-03 21:40:45 +08001997
Peng Taod7e09d02013-05-02 16:46:55 +08001998 if (rc != 0) {
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05001999 LASSERT(!req);
Peng Taod7e09d02013-05-02 16:46:55 +08002000
2001 if (oa)
Mike Rapoport2ba262f2015-10-20 12:39:46 +03002002 kmem_cache_free(obdo_cachep, oa);
Julia Lawall59e267c2015-05-01 21:37:50 +02002003 kfree(pga);
Peng Taod7e09d02013-05-02 16:46:55 +08002004 /* this should happen rarely and is pretty bad, it makes the
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002005 * pending list not follow the dirty order
2006 */
Peng Taod7e09d02013-05-02 16:46:55 +08002007 while (!list_empty(ext_list)) {
2008 ext = list_entry(ext_list->next, struct osc_extent,
Oleg Drokin79910d72016-02-26 01:50:03 -05002009 oe_link);
Peng Taod7e09d02013-05-02 16:46:55 +08002010 list_del_init(&ext->oe_link);
2011 osc_extent_finish(env, ext, 0, rc);
2012 }
2013 if (clerq && !IS_ERR(clerq))
2014 cl_req_completion(env, clerq, rc);
2015 }
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002016 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002017}
2018
2019static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
2020 struct ldlm_enqueue_info *einfo)
2021{
2022 void *data = einfo->ei_cbdata;
2023 int set = 0;
2024
Peng Taod7e09d02013-05-02 16:46:55 +08002025 LASSERT(lock->l_blocking_ast == einfo->ei_cb_bl);
2026 LASSERT(lock->l_resource->lr_type == einfo->ei_type);
2027 LASSERT(lock->l_completion_ast == einfo->ei_cb_cp);
2028 LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
2029
2030 lock_res_and_lock(lock);
Peng Taod7e09d02013-05-02 16:46:55 +08002031
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002032 if (!lock->l_ast_data)
Peng Taod7e09d02013-05-02 16:46:55 +08002033 lock->l_ast_data = data;
2034 if (lock->l_ast_data == data)
2035 set = 1;
2036
Peng Taod7e09d02013-05-02 16:46:55 +08002037 unlock_res_and_lock(lock);
2038
2039 return set;
2040}
2041
2042static int osc_set_data_with_check(struct lustre_handle *lockh,
2043 struct ldlm_enqueue_info *einfo)
2044{
2045 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
2046 int set = 0;
2047
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002048 if (lock) {
Peng Taod7e09d02013-05-02 16:46:55 +08002049 set = osc_set_lock_data_with_check(lock, einfo);
2050 LDLM_LOCK_PUT(lock);
2051 } else
2052 CERROR("lockh %p, data %p - client evicted?\n",
2053 lockh, einfo->ei_cbdata);
2054 return set;
2055}
2056
Jinshan Xiong06563b52016-03-30 19:48:40 -04002057static int osc_enqueue_fini(struct ptlrpc_request *req,
2058 osc_enqueue_upcall_f upcall, void *cookie,
2059 struct lustre_handle *lockh, enum ldlm_mode mode,
2060 __u64 *flags, int agl, int errcode)
Peng Taod7e09d02013-05-02 16:46:55 +08002061{
Jinshan Xiong06563b52016-03-30 19:48:40 -04002062 bool intent = *flags & LDLM_FL_HAS_INTENT;
2063 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002064
Jinshan Xiong06563b52016-03-30 19:48:40 -04002065 /* The request was created before ldlm_cli_enqueue call. */
2066 if (intent && errcode == ELDLM_LOCK_ABORTED) {
2067 struct ldlm_reply *rep;
Mike Rapoport50ffcb72015-10-13 16:03:40 +03002068
Jinshan Xiong06563b52016-03-30 19:48:40 -04002069 rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
Peng Taod7e09d02013-05-02 16:46:55 +08002070
Jinshan Xiong06563b52016-03-30 19:48:40 -04002071 rep->lock_policy_res1 =
2072 ptlrpc_status_ntoh(rep->lock_policy_res1);
2073 if (rep->lock_policy_res1)
2074 errcode = rep->lock_policy_res1;
2075 if (!agl)
2076 *flags |= LDLM_FL_LVB_READY;
2077 } else if (errcode == ELDLM_OK) {
Peng Taod7e09d02013-05-02 16:46:55 +08002078 *flags |= LDLM_FL_LVB_READY;
Peng Taod7e09d02013-05-02 16:46:55 +08002079 }
2080
2081 /* Call the update callback. */
Jinshan Xiong06563b52016-03-30 19:48:40 -04002082 rc = (*upcall)(cookie, lockh, errcode);
2083 /* release the reference taken in ldlm_cli_enqueue() */
2084 if (errcode == ELDLM_LOCK_MATCHED)
2085 errcode = ELDLM_OK;
2086 if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
2087 ldlm_lock_decref(lockh, mode);
2088
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002089 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002090}
2091
2092static int osc_enqueue_interpret(const struct lu_env *env,
2093 struct ptlrpc_request *req,
2094 struct osc_enqueue_args *aa, int rc)
2095{
2096 struct ldlm_lock *lock;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002097 struct lustre_handle *lockh = &aa->oa_lockh;
2098 enum ldlm_mode mode = aa->oa_mode;
2099 struct ost_lvb *lvb = aa->oa_lvb;
2100 __u32 lvb_len = sizeof(*lvb);
2101 __u64 flags = 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002102
Peng Taod7e09d02013-05-02 16:46:55 +08002103
2104 /* ldlm_cli_enqueue is holding a reference on the lock, so it must
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002105 * be valid.
2106 */
Jinshan Xiong06563b52016-03-30 19:48:40 -04002107 lock = ldlm_handle2lock(lockh);
2108 LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n",
2109 lockh->cookie, req, aa);
Peng Taod7e09d02013-05-02 16:46:55 +08002110
2111 /* Take an additional reference so that a blocking AST that
2112 * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
2113 * to arrive after an upcall has been executed by
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002114 * osc_enqueue_fini().
2115 */
Jinshan Xiong06563b52016-03-30 19:48:40 -04002116 ldlm_lock_addref(lockh, mode);
Peng Taod7e09d02013-05-02 16:46:55 +08002117
Alexander Boyko219eeac2016-05-04 10:28:53 -04002118 /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
2119 OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
2120
Peng Taod7e09d02013-05-02 16:46:55 +08002121 /* Let CP AST to grant the lock first. */
2122 OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
2123
Jinshan Xiong06563b52016-03-30 19:48:40 -04002124 if (aa->oa_agl) {
2125 LASSERT(!aa->oa_lvb);
2126 LASSERT(!aa->oa_flags);
2127 aa->oa_flags = &flags;
Peng Taod7e09d02013-05-02 16:46:55 +08002128 }
2129
2130 /* Complete obtaining the lock procedure. */
Jinshan Xiong06563b52016-03-30 19:48:40 -04002131 rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
2132 aa->oa_mode, aa->oa_flags, lvb, lvb_len,
2133 lockh, rc);
Peng Taod7e09d02013-05-02 16:46:55 +08002134 /* Complete osc stuff. */
Jinshan Xiong06563b52016-03-30 19:48:40 -04002135 rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
2136 aa->oa_flags, aa->oa_agl, rc);
Peng Taod7e09d02013-05-02 16:46:55 +08002137
2138 OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
2139
Jinshan Xiong06563b52016-03-30 19:48:40 -04002140 ldlm_lock_decref(lockh, mode);
Peng Taod7e09d02013-05-02 16:46:55 +08002141 LDLM_LOCK_PUT(lock);
2142 return rc;
2143}
2144
Peng Taod7e09d02013-05-02 16:46:55 +08002145struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
2146
2147/* When enqueuing asynchronously, locks are not ordered, we can obtain a lock
2148 * from the 2nd OSC before a lock from the 1st one. This does not deadlock with
2149 * other synchronous requests, however keeping some locks and trying to obtain
2150 * others may take a considerable amount of time in a case of ost failure; and
2151 * when other sync requests do not get released lock from a client, the client
Jinshan Xiong06563b52016-03-30 19:48:40 -04002152 * is evicted from the cluster -- such scenaries make the life difficult, so
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002153 * release locks just after they are obtained.
2154 */
Peng Taod7e09d02013-05-02 16:46:55 +08002155int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
2156 __u64 *flags, ldlm_policy_data_t *policy,
2157 struct ost_lvb *lvb, int kms_valid,
Jinshan Xiong06563b52016-03-30 19:48:40 -04002158 osc_enqueue_upcall_f upcall, void *cookie,
Peng Taod7e09d02013-05-02 16:46:55 +08002159 struct ldlm_enqueue_info *einfo,
Peng Taod7e09d02013-05-02 16:46:55 +08002160 struct ptlrpc_request_set *rqset, int async, int agl)
2161{
2162 struct obd_device *obd = exp->exp_obd;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002163 struct lustre_handle lockh = { 0 };
Peng Taod7e09d02013-05-02 16:46:55 +08002164 struct ptlrpc_request *req = NULL;
2165 int intent = *flags & LDLM_FL_HAS_INTENT;
Andriy Skulysh025fd3c2016-06-20 16:55:50 -04002166 __u64 match_flags = *flags;
Oleg Drokin52ee0d22016-02-24 21:59:54 -05002167 enum ldlm_mode mode;
Peng Taod7e09d02013-05-02 16:46:55 +08002168 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002169
2170 /* Filesystem lock extents are extended to page boundaries so that
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002171 * dealing with the page cache is a little smoother.
2172 */
Oleg Drokin616387e2016-03-30 19:48:23 -04002173 policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
2174 policy->l_extent.end |= ~PAGE_MASK;
Peng Taod7e09d02013-05-02 16:46:55 +08002175
2176 /*
2177 * kms is not valid when either object is completely fresh (so that no
2178 * locks are cached), or object was evicted. In the latter case cached
2179 * lock cannot be used, because it would prime inode state with
2180 * potentially stale LVB.
2181 */
2182 if (!kms_valid)
2183 goto no_match;
2184
2185 /* Next, search for already existing extent locks that will cover us */
2186 /* If we're trying to read, we also search for an existing PW lock. The
2187 * VFS and page cache already protect us locally, so lots of readers/
2188 * writers can share a single PW lock.
2189 *
2190 * There are problems with conversion deadlocks, so instead of
2191 * converting a read lock to a write lock, we'll just enqueue a new
2192 * one.
2193 *
2194 * At some point we should cancel the read lock instead of making them
2195 * send us a blocking callback, but there are problems with canceling
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002196 * locks out from other users right now, too.
2197 */
Peng Taod7e09d02013-05-02 16:46:55 +08002198 mode = einfo->ei_mode;
2199 if (einfo->ei_mode == LCK_PR)
2200 mode |= LCK_PW;
Andriy Skulysh025fd3c2016-06-20 16:55:50 -04002201 if (agl == 0)
2202 match_flags |= LDLM_FL_LVB_READY;
2203 if (intent != 0)
2204 match_flags |= LDLM_FL_BLOCK_GRANTED;
2205 mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id,
Jinshan Xiong06563b52016-03-30 19:48:40 -04002206 einfo->ei_type, policy, mode, &lockh, 0);
Peng Taod7e09d02013-05-02 16:46:55 +08002207 if (mode) {
Jinshan Xiong06563b52016-03-30 19:48:40 -04002208 struct ldlm_lock *matched;
Peng Taod7e09d02013-05-02 16:46:55 +08002209
Jinshan Xiong06563b52016-03-30 19:48:40 -04002210 if (*flags & LDLM_FL_TEST_LOCK)
2211 return ELDLM_OK;
2212
2213 matched = ldlm_handle2lock(&lockh);
2214 if (agl) {
2215 /* AGL enqueues DLM locks speculatively. Therefore if
2216 * it already exists a DLM lock, it wll just inform the
2217 * caller to cancel the AGL process for this stripe.
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002218 */
Jinshan Xiong06563b52016-03-30 19:48:40 -04002219 ldlm_lock_decref(&lockh, mode);
Peng Taod7e09d02013-05-02 16:46:55 +08002220 LDLM_LOCK_PUT(matched);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002221 return -ECANCELED;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002222 } else if (osc_set_lock_data_with_check(matched, einfo)) {
Peng Taod7e09d02013-05-02 16:46:55 +08002223 *flags |= LDLM_FL_LVB_READY;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002224 /* We already have a lock, and it's referenced. */
2225 (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
Peng Taod7e09d02013-05-02 16:46:55 +08002226
Jinshan Xiong06563b52016-03-30 19:48:40 -04002227 ldlm_lock_decref(&lockh, mode);
Peng Taod7e09d02013-05-02 16:46:55 +08002228 LDLM_LOCK_PUT(matched);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002229 return ELDLM_OK;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002230 } else {
2231 ldlm_lock_decref(&lockh, mode);
2232 LDLM_LOCK_PUT(matched);
Peng Taod7e09d02013-05-02 16:46:55 +08002233 }
2234 }
2235
Jinshan Xiong06563b52016-03-30 19:48:40 -04002236no_match:
2237 if (*flags & LDLM_FL_TEST_LOCK)
2238 return -ENOLCK;
Peng Taod7e09d02013-05-02 16:46:55 +08002239 if (intent) {
Peng Taod7e09d02013-05-02 16:46:55 +08002240 req = ptlrpc_request_alloc(class_exp2cliimp(exp),
2241 &RQF_LDLM_ENQUEUE_LVB);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002242 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002243 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +08002244
Vitaly Fertman26402562016-03-30 19:49:04 -04002245 rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
2246 if (rc) {
Peng Taod7e09d02013-05-02 16:46:55 +08002247 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002248 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002249 }
2250
2251 req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
Joe Perchesec83e612013-10-13 20:22:03 -07002252 sizeof(*lvb));
Peng Taod7e09d02013-05-02 16:46:55 +08002253 ptlrpc_request_set_replen(req);
2254 }
2255
2256 /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */
2257 *flags &= ~LDLM_FL_BLOCK_GRANTED;
2258
2259 rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
Jinshan Xiong06563b52016-03-30 19:48:40 -04002260 sizeof(*lvb), LVB_T_OST, &lockh, async);
2261 if (async) {
Peng Taod7e09d02013-05-02 16:46:55 +08002262 if (!rc) {
2263 struct osc_enqueue_args *aa;
Mike Rapoport50ffcb72015-10-13 16:03:40 +03002264
Jinshan Xiong06563b52016-03-30 19:48:40 -04002265 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
Peng Taod7e09d02013-05-02 16:46:55 +08002266 aa = ptlrpc_req_async_args(req);
Peng Taod7e09d02013-05-02 16:46:55 +08002267 aa->oa_exp = exp;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002268 aa->oa_mode = einfo->ei_mode;
2269 aa->oa_type = einfo->ei_type;
2270 lustre_handle_copy(&aa->oa_lockh, &lockh);
Peng Taod7e09d02013-05-02 16:46:55 +08002271 aa->oa_upcall = upcall;
2272 aa->oa_cookie = cookie;
Peng Taod7e09d02013-05-02 16:46:55 +08002273 aa->oa_agl = !!agl;
Jinshan Xiong06563b52016-03-30 19:48:40 -04002274 if (!agl) {
2275 aa->oa_flags = flags;
2276 aa->oa_lvb = lvb;
2277 } else {
2278 /* AGL is essentially to enqueue an DLM lock
2279 * in advance, so we don't care about the
2280 * result of AGL enqueue.
2281 */
2282 aa->oa_lvb = NULL;
2283 aa->oa_flags = NULL;
2284 }
Peng Taod7e09d02013-05-02 16:46:55 +08002285
2286 req->rq_interpret_reply =
2287 (ptlrpc_interpterer_t)osc_enqueue_interpret;
2288 if (rqset == PTLRPCD_SET)
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04002289 ptlrpcd_add_req(req);
Peng Taod7e09d02013-05-02 16:46:55 +08002290 else
2291 ptlrpc_set_add_req(rqset, req);
2292 } else if (intent) {
2293 ptlrpc_req_finished(req);
2294 }
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002295 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002296 }
2297
Jinshan Xiong06563b52016-03-30 19:48:40 -04002298 rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
2299 flags, agl, rc);
Peng Taod7e09d02013-05-02 16:46:55 +08002300 if (intent)
2301 ptlrpc_req_finished(req);
2302
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002303 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002304}
2305
Peng Taod7e09d02013-05-02 16:46:55 +08002306int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
2307 __u32 type, ldlm_policy_data_t *policy, __u32 mode,
Dmitry Eremin875332d2014-06-22 21:32:08 -04002308 __u64 *flags, void *data, struct lustre_handle *lockh,
Peng Taod7e09d02013-05-02 16:46:55 +08002309 int unref)
2310{
2311 struct obd_device *obd = exp->exp_obd;
Dmitry Eremin875332d2014-06-22 21:32:08 -04002312 __u64 lflags = *flags;
Oleg Drokin52ee0d22016-02-24 21:59:54 -05002313 enum ldlm_mode rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002314
2315 if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002316 return -EIO;
Peng Taod7e09d02013-05-02 16:46:55 +08002317
2318 /* Filesystem lock extents are extended to page boundaries so that
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002319 * dealing with the page cache is a little smoother
2320 */
Oleg Drokin616387e2016-03-30 19:48:23 -04002321 policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
2322 policy->l_extent.end |= ~PAGE_MASK;
Peng Taod7e09d02013-05-02 16:46:55 +08002323
2324 /* Next, search for already existing extent locks that will cover us */
2325 /* If we're trying to read, we also search for an existing PW lock. The
2326 * VFS and page cache already protect us locally, so lots of readers/
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002327 * writers can share a single PW lock.
2328 */
Peng Taod7e09d02013-05-02 16:46:55 +08002329 rc = mode;
2330 if (mode == LCK_PR)
2331 rc |= LCK_PW;
2332 rc = ldlm_lock_match(obd->obd_namespace, lflags,
2333 res_id, type, policy, rc, lockh, unref);
2334 if (rc) {
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002335 if (data) {
Peng Taod7e09d02013-05-02 16:46:55 +08002336 if (!osc_set_data_with_check(lockh, data)) {
2337 if (!(lflags & LDLM_FL_TEST_LOCK))
2338 ldlm_lock_decref(lockh, rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002339 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002340 }
2341 }
2342 if (!(lflags & LDLM_FL_TEST_LOCK) && mode != rc) {
2343 ldlm_lock_addref(lockh, LCK_PR);
2344 ldlm_lock_decref(lockh, LCK_PW);
2345 }
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002346 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002347 }
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002348 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002349}
2350
2351int osc_cancel_base(struct lustre_handle *lockh, __u32 mode)
2352{
Peng Taod7e09d02013-05-02 16:46:55 +08002353 if (unlikely(mode == LCK_GROUP))
2354 ldlm_lock_decref_and_cancel(lockh, mode);
2355 else
2356 ldlm_lock_decref(lockh, mode);
2357
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002358 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002359}
2360
Peng Taod7e09d02013-05-02 16:46:55 +08002361static int osc_statfs_interpret(const struct lu_env *env,
2362 struct ptlrpc_request *req,
2363 struct osc_async_args *aa, int rc)
2364{
2365 struct obd_statfs *msfs;
Peng Taod7e09d02013-05-02 16:46:55 +08002366
2367 if (rc == -EBADR)
2368 /* The request has in fact never been sent
2369 * due to issues at a higher level (LOV).
2370 * Exit immediately since the caller is
2371 * aware of the problem and takes care
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002372 * of the clean up
2373 */
Oleg Drokindefa2202016-02-24 22:00:39 -05002374 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002375
2376 if ((rc == -ENOTCONN || rc == -EAGAIN) &&
Tina Johnson26c4ea42014-09-21 00:08:05 +05302377 (aa->aa_oi->oi_flags & OBD_STATFS_NODELAY)) {
2378 rc = 0;
2379 goto out;
2380 }
Peng Taod7e09d02013-05-02 16:46:55 +08002381
2382 if (rc != 0)
Tina Johnson26c4ea42014-09-21 00:08:05 +05302383 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002384
2385 msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002386 if (!msfs) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05302387 rc = -EPROTO;
2388 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002389 }
2390
2391 *aa->aa_oi->oi_osfs = *msfs;
2392out:
2393 rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002394 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002395}
2396
2397static int osc_statfs_async(struct obd_export *exp,
2398 struct obd_info *oinfo, __u64 max_age,
2399 struct ptlrpc_request_set *rqset)
2400{
Chris Hanna29ac6842015-06-03 10:23:42 -04002401 struct obd_device *obd = class_exp2obd(exp);
Peng Taod7e09d02013-05-02 16:46:55 +08002402 struct ptlrpc_request *req;
2403 struct osc_async_args *aa;
Chris Hanna29ac6842015-06-03 10:23:42 -04002404 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002405
2406 /* We could possibly pass max_age in the request (as an absolute
2407 * timestamp or a "seconds.usec ago") so the target can avoid doing
2408 * extra calls into the filesystem if that isn't necessary (e.g.
2409 * during mount that would help a bit). Having relative timestamps
2410 * is not so great if request processing is slow, while absolute
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002411 * timestamps are not ideal because they need time synchronization.
2412 */
Peng Taod7e09d02013-05-02 16:46:55 +08002413 req = ptlrpc_request_alloc(obd->u.cli.cl_import, &RQF_OST_STATFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002414 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002415 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +08002416
2417 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
2418 if (rc) {
2419 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002420 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002421 }
2422 ptlrpc_request_set_replen(req);
2423 req->rq_request_portal = OST_CREATE_PORTAL;
2424 ptlrpc_at_set_req_timeout(req);
2425
2426 if (oinfo->oi_flags & OBD_STATFS_NODELAY) {
2427 /* procfs requests not want stat in wait for avoid deadlock */
2428 req->rq_no_resend = 1;
2429 req->rq_no_delay = 1;
2430 }
2431
2432 req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
Oleg Drokine9570b42016-03-30 19:49:05 -04002433 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
Peng Taod7e09d02013-05-02 16:46:55 +08002434 aa = ptlrpc_req_async_args(req);
2435 aa->aa_oi = oinfo;
2436
2437 ptlrpc_set_add_req(rqset, req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002438 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002439}
2440
2441static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
2442 struct obd_statfs *osfs, __u64 max_age, __u32 flags)
2443{
Chris Hanna29ac6842015-06-03 10:23:42 -04002444 struct obd_device *obd = class_exp2obd(exp);
2445 struct obd_statfs *msfs;
Peng Taod7e09d02013-05-02 16:46:55 +08002446 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -04002447 struct obd_import *imp = NULL;
Peng Taod7e09d02013-05-02 16:46:55 +08002448 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002449
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002450 /* Since the request might also come from lprocfs, so we need
2451 * sync this with client_disconnect_export Bug15684
2452 */
Peng Taod7e09d02013-05-02 16:46:55 +08002453 down_read(&obd->u.cli.cl_sem);
2454 if (obd->u.cli.cl_import)
2455 imp = class_import_get(obd->u.cli.cl_import);
2456 up_read(&obd->u.cli.cl_sem);
2457 if (!imp)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002458 return -ENODEV;
Peng Taod7e09d02013-05-02 16:46:55 +08002459
2460 /* We could possibly pass max_age in the request (as an absolute
2461 * timestamp or a "seconds.usec ago") so the target can avoid doing
2462 * extra calls into the filesystem if that isn't necessary (e.g.
2463 * during mount that would help a bit). Having relative timestamps
2464 * is not so great if request processing is slow, while absolute
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002465 * timestamps are not ideal because they need time synchronization.
2466 */
Peng Taod7e09d02013-05-02 16:46:55 +08002467 req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
2468
2469 class_import_put(imp);
2470
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002471 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002472 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +08002473
2474 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
2475 if (rc) {
2476 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002477 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002478 }
2479 ptlrpc_request_set_replen(req);
2480 req->rq_request_portal = OST_CREATE_PORTAL;
2481 ptlrpc_at_set_req_timeout(req);
2482
2483 if (flags & OBD_STATFS_NODELAY) {
2484 /* procfs requests not want stat in wait for avoid deadlock */
2485 req->rq_no_resend = 1;
2486 req->rq_no_delay = 1;
2487 }
2488
2489 rc = ptlrpc_queue_wait(req);
2490 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +05302491 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002492
2493 msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002494 if (!msfs) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05302495 rc = -EPROTO;
2496 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002497 }
2498
2499 *osfs = *msfs;
2500
Peng Taod7e09d02013-05-02 16:46:55 +08002501 out:
2502 ptlrpc_req_finished(req);
2503 return rc;
2504}
2505
Peng Taod7e09d02013-05-02 16:46:55 +08002506static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
Oleg Drokine09bee32016-01-03 12:05:43 -05002507 void *karg, void __user *uarg)
Peng Taod7e09d02013-05-02 16:46:55 +08002508{
2509 struct obd_device *obd = exp->exp_obd;
2510 struct obd_ioctl_data *data = karg;
2511 int err = 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002512
2513 if (!try_module_get(THIS_MODULE)) {
James Nunez19b20562016-03-07 18:10:21 -05002514 CERROR("%s: cannot get module '%s'\n", obd->obd_name,
2515 module_name(THIS_MODULE));
Peng Taod7e09d02013-05-02 16:46:55 +08002516 return -EINVAL;
2517 }
2518 switch (cmd) {
Peng Taod7e09d02013-05-02 16:46:55 +08002519 case OBD_IOC_CLIENT_RECOVER:
2520 err = ptlrpc_recover_import(obd->u.cli.cl_import,
2521 data->ioc_inlbuf1, 0);
2522 if (err > 0)
2523 err = 0;
Tina Johnson26c4ea42014-09-21 00:08:05 +05302524 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002525 case IOC_OSC_SET_ACTIVE:
2526 err = ptlrpc_set_import_active(obd->u.cli.cl_import,
2527 data->ioc_offset);
Tina Johnson26c4ea42014-09-21 00:08:05 +05302528 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002529 case OBD_IOC_POLL_QUOTACHECK:
Amitoj Kaur Chawla167a47c2015-10-15 00:20:03 +05302530 err = osc_quota_poll_check(exp, karg);
Tina Johnson26c4ea42014-09-21 00:08:05 +05302531 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002532 case OBD_IOC_PING_TARGET:
2533 err = ptlrpc_obd_ping(obd);
Tina Johnson26c4ea42014-09-21 00:08:05 +05302534 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002535 default:
2536 CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n",
2537 cmd, current_comm());
Tina Johnson26c4ea42014-09-21 00:08:05 +05302538 err = -ENOTTY;
2539 goto out;
Peng Taod7e09d02013-05-02 16:46:55 +08002540 }
2541out:
2542 module_put(THIS_MODULE);
2543 return err;
2544}
2545
2546static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
Oleg Drokin21aef7d2014-08-15 12:55:56 -04002547 u32 keylen, void *key, __u32 *vallen, void *val,
Peng Taod7e09d02013-05-02 16:46:55 +08002548 struct lov_stripe_md *lsm)
2549{
Peng Taod7e09d02013-05-02 16:46:55 +08002550 if (!vallen || !val)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002551 return -EFAULT;
Peng Taod7e09d02013-05-02 16:46:55 +08002552
John L. Hammond78e674e2016-09-18 16:38:07 -04002553 if (KEY_IS(KEY_FIEMAP)) {
Amitoj Kaur Chawla167a47c2015-10-15 00:20:03 +05302554 struct ll_fiemap_info_key *fm_key = key;
Chris Hanna29ac6842015-06-03 10:23:42 -04002555 struct ldlm_res_id res_id;
2556 ldlm_policy_data_t policy;
2557 struct lustre_handle lockh;
Oleg Drokin52ee0d22016-02-24 21:59:54 -05002558 enum ldlm_mode mode = 0;
Chris Hanna29ac6842015-06-03 10:23:42 -04002559 struct ptlrpc_request *req;
2560 struct ll_user_fiemap *reply;
2561 char *tmp;
2562 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002563
Artem Blagodarenko9d865432013-06-03 21:40:53 +08002564 if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC))
2565 goto skip_locking;
2566
2567 policy.l_extent.start = fm_key->fiemap.fm_start &
Oleg Drokin616387e2016-03-30 19:48:23 -04002568 PAGE_MASK;
Artem Blagodarenko9d865432013-06-03 21:40:53 +08002569
2570 if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03002571 fm_key->fiemap.fm_start + PAGE_SIZE - 1)
Artem Blagodarenko9d865432013-06-03 21:40:53 +08002572 policy.l_extent.end = OBD_OBJECT_EOF;
2573 else
2574 policy.l_extent.end = (fm_key->fiemap.fm_start +
2575 fm_key->fiemap.fm_length +
Greg Kroah-Hartman5f479922016-04-11 09:30:50 -07002576 PAGE_SIZE - 1) & PAGE_MASK;
Artem Blagodarenko9d865432013-06-03 21:40:53 +08002577
2578 ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
2579 mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
2580 LDLM_FL_BLOCK_GRANTED |
2581 LDLM_FL_LVB_READY,
2582 &res_id, LDLM_EXTENT, &policy,
2583 LCK_PR | LCK_PW, &lockh, 0);
2584 if (mode) { /* lock is cached on client */
2585 if (mode != LCK_PR) {
2586 ldlm_lock_addref(&lockh, LCK_PR);
2587 ldlm_lock_decref(&lockh, LCK_PW);
2588 }
2589 } else { /* no cached lock, needs acquire lock on server side */
2590 fm_key->oa.o_valid |= OBD_MD_FLFLAGS;
2591 fm_key->oa.o_flags |= OBD_FL_SRVLOCK;
2592 }
2593
2594skip_locking:
Peng Taod7e09d02013-05-02 16:46:55 +08002595 req = ptlrpc_request_alloc(class_exp2cliimp(exp),
2596 &RQF_OST_GET_INFO_FIEMAP);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002597 if (!req) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05302598 rc = -ENOMEM;
2599 goto drop_lock;
2600 }
Peng Taod7e09d02013-05-02 16:46:55 +08002601
2602 req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY,
2603 RCL_CLIENT, keylen);
2604 req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
2605 RCL_CLIENT, *vallen);
2606 req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
2607 RCL_SERVER, *vallen);
2608
2609 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
2610 if (rc) {
2611 ptlrpc_request_free(req);
Tina Johnson26c4ea42014-09-21 00:08:05 +05302612 goto drop_lock;
Peng Taod7e09d02013-05-02 16:46:55 +08002613 }
2614
2615 tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
2616 memcpy(tmp, key, keylen);
2617 tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
2618 memcpy(tmp, val, *vallen);
2619
2620 ptlrpc_request_set_replen(req);
2621 rc = ptlrpc_queue_wait(req);
2622 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +05302623 goto fini_req;
Peng Taod7e09d02013-05-02 16:46:55 +08002624
2625 reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002626 if (!reply) {
Tina Johnson26c4ea42014-09-21 00:08:05 +05302627 rc = -EPROTO;
2628 goto fini_req;
2629 }
Peng Taod7e09d02013-05-02 16:46:55 +08002630
2631 memcpy(val, reply, *vallen);
Artem Blagodarenko9d865432013-06-03 21:40:53 +08002632fini_req:
Peng Taod7e09d02013-05-02 16:46:55 +08002633 ptlrpc_req_finished(req);
Artem Blagodarenko9d865432013-06-03 21:40:53 +08002634drop_lock:
2635 if (mode)
2636 ldlm_lock_decref(&lockh, LCK_PR);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002637 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002638 }
2639
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002640 return -EINVAL;
Peng Taod7e09d02013-05-02 16:46:55 +08002641}
2642
2643static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
Oleg Drokin21aef7d2014-08-15 12:55:56 -04002644 u32 keylen, void *key, u32 vallen,
Peng Taod7e09d02013-05-02 16:46:55 +08002645 void *val, struct ptlrpc_request_set *set)
2646{
2647 struct ptlrpc_request *req;
Chris Hanna29ac6842015-06-03 10:23:42 -04002648 struct obd_device *obd = exp->exp_obd;
2649 struct obd_import *imp = class_exp2cliimp(exp);
2650 char *tmp;
2651 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002652
2653 OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10);
2654
2655 if (KEY_IS(KEY_CHECKSUM)) {
2656 if (vallen != sizeof(int))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002657 return -EINVAL;
Peng Taod7e09d02013-05-02 16:46:55 +08002658 exp->exp_obd->u.cli.cl_checksum = (*(int *)val) ? 1 : 0;
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002659 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002660 }
2661
2662 if (KEY_IS(KEY_SPTLRPC_CONF)) {
2663 sptlrpc_conf_client_adapt(obd);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002664 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002665 }
2666
2667 if (KEY_IS(KEY_FLUSH_CTX)) {
2668 sptlrpc_import_flush_my_ctx(imp);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002669 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002670 }
2671
2672 if (KEY_IS(KEY_CACHE_SET)) {
2673 struct client_obd *cli = &obd->u.cli;
2674
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002675 LASSERT(!cli->cl_cache); /* only once */
Amitoj Kaur Chawla167a47c2015-10-15 00:20:03 +05302676 cli->cl_cache = val;
Emoly Liu1b02bde2016-06-20 16:55:24 -04002677 cl_cache_incref(cli->cl_cache);
Peng Taod7e09d02013-05-02 16:46:55 +08002678 cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
2679
2680 /* add this osc into entity list */
2681 LASSERT(list_empty(&cli->cl_lru_osc));
2682 spin_lock(&cli->cl_cache->ccc_lru_lock);
2683 list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru);
2684 spin_unlock(&cli->cl_cache->ccc_lru_lock);
2685
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002686 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002687 }
2688
2689 if (KEY_IS(KEY_CACHE_LRU_SHRINK)) {
2690 struct client_obd *cli = &obd->u.cli;
Stephen Champion29c877a2016-09-18 16:37:43 -04002691 long nr = atomic_long_read(&cli->cl_lru_in_list) >> 1;
2692 long target = *(long *)val;
Peng Taod7e09d02013-05-02 16:46:55 +08002693
Jinshan Xiong2579d8d2016-03-30 19:48:27 -04002694 nr = osc_lru_shrink(env, cli, min(nr, target), true);
Stephen Champion29c877a2016-09-18 16:37:43 -04002695 *(long *)val -= nr;
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002696 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002697 }
2698
2699 if (!set && !KEY_IS(KEY_GRANT_SHRINK))
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002700 return -EINVAL;
Peng Taod7e09d02013-05-02 16:46:55 +08002701
2702 /* We pass all other commands directly to OST. Since nobody calls osc
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002703 * methods directly and everybody is supposed to go through LOV, we
2704 * assume lov checked invalid values for us.
2705 * The only recognised values so far are evict_by_nid and mds_conn.
2706 * Even if something bad goes through, we'd get a -EINVAL from OST
2707 * anyway.
2708 */
Peng Taod7e09d02013-05-02 16:46:55 +08002709
2710 req = ptlrpc_request_alloc(imp, KEY_IS(KEY_GRANT_SHRINK) ?
2711 &RQF_OST_SET_GRANT_INFO :
2712 &RQF_OBD_SET_INFO);
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002713 if (!req)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002714 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +08002715
2716 req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
2717 RCL_CLIENT, keylen);
2718 if (!KEY_IS(KEY_GRANT_SHRINK))
2719 req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
2720 RCL_CLIENT, vallen);
2721 rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SET_INFO);
2722 if (rc) {
2723 ptlrpc_request_free(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002724 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002725 }
2726
2727 tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
2728 memcpy(tmp, key, keylen);
2729 tmp = req_capsule_client_get(&req->rq_pill, KEY_IS(KEY_GRANT_SHRINK) ?
2730 &RMF_OST_BODY :
2731 &RMF_SETINFO_VAL);
2732 memcpy(tmp, val, vallen);
2733
2734 if (KEY_IS(KEY_GRANT_SHRINK)) {
John L. Hammondf024bad2014-09-05 15:08:10 -05002735 struct osc_brw_async_args *aa;
Peng Taod7e09d02013-05-02 16:46:55 +08002736 struct obdo *oa;
2737
2738 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
2739 aa = ptlrpc_req_async_args(req);
Amitoj Kaur Chawlac4418da2016-02-26 14:24:55 +05302740 oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
Peng Taod7e09d02013-05-02 16:46:55 +08002741 if (!oa) {
2742 ptlrpc_req_finished(req);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002743 return -ENOMEM;
Peng Taod7e09d02013-05-02 16:46:55 +08002744 }
2745 *oa = ((struct ost_body *)val)->oa;
2746 aa->aa_oa = oa;
2747 req->rq_interpret_reply = osc_shrink_grant_interpret;
2748 }
2749
2750 ptlrpc_request_set_replen(req);
2751 if (!KEY_IS(KEY_GRANT_SHRINK)) {
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002752 LASSERT(set);
Peng Taod7e09d02013-05-02 16:46:55 +08002753 ptlrpc_set_add_req(set, req);
2754 ptlrpc_check_set(NULL, set);
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04002755 } else {
2756 ptlrpcd_add_req(req);
2757 }
Peng Taod7e09d02013-05-02 16:46:55 +08002758
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002759 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002760}
2761
Peng Taod7e09d02013-05-02 16:46:55 +08002762static int osc_reconnect(const struct lu_env *env,
2763 struct obd_export *exp, struct obd_device *obd,
2764 struct obd_uuid *cluuid,
2765 struct obd_connect_data *data,
2766 void *localdata)
2767{
2768 struct client_obd *cli = &obd->u.cli;
2769
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002770 if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
Peng Taod7e09d02013-05-02 16:46:55 +08002771 long lost_grant;
2772
John L. Hammond7d53d8f2016-03-30 19:48:36 -04002773 spin_lock(&cli->cl_loi_list_lock);
Hongchao Zhang3147b262016-08-16 16:19:22 -04002774 data->ocd_grant = (cli->cl_avail_grant +
2775 (cli->cl_dirty_pages << PAGE_SHIFT)) ?:
2776 2 * cli_brw_size(obd);
Peng Taod7e09d02013-05-02 16:46:55 +08002777 lost_grant = cli->cl_lost_grant;
2778 cli->cl_lost_grant = 0;
John L. Hammond7d53d8f2016-03-30 19:48:36 -04002779 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +08002780
Joe Perches2d00bd12014-11-23 11:28:50 -08002781 CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n",
2782 data->ocd_connect_flags,
Peng Taod7e09d02013-05-02 16:46:55 +08002783 data->ocd_version, data->ocd_grant, lost_grant);
2784 }
2785
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002786 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002787}
2788
2789static int osc_disconnect(struct obd_export *exp)
2790{
2791 struct obd_device *obd = class_exp2obd(exp);
Peng Taod7e09d02013-05-02 16:46:55 +08002792 int rc;
2793
Peng Taod7e09d02013-05-02 16:46:55 +08002794 rc = client_disconnect_export(exp);
2795 /**
2796 * Initially we put del_shrink_grant before disconnect_export, but it
2797 * causes the following problem if setup (connect) and cleanup
2798 * (disconnect) are tangled together.
2799 * connect p1 disconnect p2
2800 * ptlrpc_connect_import
2801 * ............... class_manual_cleanup
2802 * osc_disconnect
2803 * del_shrink_grant
2804 * ptlrpc_connect_interrupt
2805 * init_grant_shrink
2806 * add this client to shrink list
2807 * cleanup_osc
2808 * Bang! pinger trigger the shrink.
2809 * So the osc should be disconnected from the shrink list, after we
2810 * are sure the import has been destroyed. BUG18662
2811 */
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05002812 if (!obd->u.cli.cl_import)
Peng Taod7e09d02013-05-02 16:46:55 +08002813 osc_del_shrink_grant(&obd->u.cli);
2814 return rc;
2815}
2816
2817static int osc_import_event(struct obd_device *obd,
2818 struct obd_import *imp,
2819 enum obd_import_event event)
2820{
2821 struct client_obd *cli;
2822 int rc = 0;
2823
Peng Taod7e09d02013-05-02 16:46:55 +08002824 LASSERT(imp->imp_obd == obd);
2825
2826 switch (event) {
2827 case IMP_EVENT_DISCON: {
2828 cli = &obd->u.cli;
John L. Hammond7d53d8f2016-03-30 19:48:36 -04002829 spin_lock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +08002830 cli->cl_avail_grant = 0;
2831 cli->cl_lost_grant = 0;
John L. Hammond7d53d8f2016-03-30 19:48:36 -04002832 spin_unlock(&cli->cl_loi_list_lock);
Peng Taod7e09d02013-05-02 16:46:55 +08002833 break;
2834 }
2835 case IMP_EVENT_INACTIVE: {
2836 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
2837 break;
2838 }
2839 case IMP_EVENT_INVALIDATE: {
2840 struct ldlm_namespace *ns = obd->obd_namespace;
Chris Hanna29ac6842015-06-03 10:23:42 -04002841 struct lu_env *env;
2842 int refcheck;
Peng Taod7e09d02013-05-02 16:46:55 +08002843
2844 env = cl_env_get(&refcheck);
2845 if (!IS_ERR(env)) {
2846 /* Reset grants */
2847 cli = &obd->u.cli;
2848 /* all pages go to failing rpcs due to the invalid
Oleg Drokin30aa9c52016-02-24 22:00:37 -05002849 * import
2850 */
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04002851 osc_io_unplug(env, cli, NULL);
Peng Taod7e09d02013-05-02 16:46:55 +08002852
2853 ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
2854 cl_env_put(env, &refcheck);
Oleg Drokinda5ecb42016-04-01 15:18:01 -04002855 } else {
Peng Taod7e09d02013-05-02 16:46:55 +08002856 rc = PTR_ERR(env);
Oleg Drokinda5ecb42016-04-01 15:18:01 -04002857 }
Peng Taod7e09d02013-05-02 16:46:55 +08002858 break;
2859 }
2860 case IMP_EVENT_ACTIVE: {
2861 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
2862 break;
2863 }
2864 case IMP_EVENT_OCD: {
2865 struct obd_connect_data *ocd = &imp->imp_connect_data;
2866
2867 if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT)
2868 osc_init_grant(&obd->u.cli, ocd);
2869
2870 /* See bug 7198 */
2871 if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL)
Mike Rapoportb2952d62015-09-03 11:49:13 +03002872 imp->imp_client->cli_request_portal = OST_REQUEST_PORTAL;
Peng Taod7e09d02013-05-02 16:46:55 +08002873
2874 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
2875 break;
2876 }
2877 case IMP_EVENT_DEACTIVATE: {
2878 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_DEACTIVATE, NULL);
2879 break;
2880 }
2881 case IMP_EVENT_ACTIVATE: {
2882 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVATE, NULL);
2883 break;
2884 }
2885 default:
2886 CERROR("Unknown import event %d\n", event);
2887 LBUG();
2888 }
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002889 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002890}
2891
2892/**
2893 * Determine whether the lock can be canceled before replaying the lock
2894 * during recovery, see bug16774 for detailed information.
2895 *
2896 * \retval zero the lock can't be canceled
2897 * \retval other ok to cancel
2898 */
Jinshan Xiong7d443332016-03-30 19:49:02 -04002899static int osc_cancel_weight(struct ldlm_lock *lock)
Peng Taod7e09d02013-05-02 16:46:55 +08002900{
Peng Taod7e09d02013-05-02 16:46:55 +08002901 /*
Jinshan Xiong7d443332016-03-30 19:49:02 -04002902 * Cancel all unused and granted extent lock.
Peng Taod7e09d02013-05-02 16:46:55 +08002903 */
2904 if (lock->l_resource->lr_type == LDLM_EXTENT &&
Jinshan Xiong7d443332016-03-30 19:49:02 -04002905 lock->l_granted_mode == lock->l_req_mode &&
2906 osc_ldlm_weigh_ast(lock) == 0)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002907 return 1;
Peng Taod7e09d02013-05-02 16:46:55 +08002908
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002909 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002910}
2911
2912static int brw_queue_work(const struct lu_env *env, void *data)
2913{
2914 struct client_obd *cli = data;
2915
2916 CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli);
2917
Olaf Weberc5c4c6f2015-09-14 18:41:35 -04002918 osc_io_unplug(env, cli, NULL);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002919 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08002920}
2921
2922int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
2923{
Radek Dostalea7893b2014-07-27 23:22:57 +02002924 struct lprocfs_static_vars lvars = { NULL };
Chris Hanna29ac6842015-06-03 10:23:42 -04002925 struct client_obd *cli = &obd->u.cli;
2926 void *handler;
2927 int rc;
Li Xiaefd9d72015-09-14 18:41:32 -04002928 int adding;
2929 int added;
2930 int req_count;
Peng Taod7e09d02013-05-02 16:46:55 +08002931
2932 rc = ptlrpcd_addref();
2933 if (rc)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002934 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002935
2936 rc = client_obd_setup(obd, lcfg);
2937 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +05302938 goto out_ptlrpcd;
Peng Taod7e09d02013-05-02 16:46:55 +08002939
2940 handler = ptlrpcd_alloc_work(cli->cl_import, brw_queue_work, cli);
Tina Johnson26c4ea42014-09-21 00:08:05 +05302941 if (IS_ERR(handler)) {
2942 rc = PTR_ERR(handler);
2943 goto out_client_setup;
2944 }
Peng Taod7e09d02013-05-02 16:46:55 +08002945 cli->cl_writeback_work = handler;
2946
Jinshan Xiong2579d8d2016-03-30 19:48:27 -04002947 handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli);
2948 if (IS_ERR(handler)) {
2949 rc = PTR_ERR(handler);
2950 goto out_ptlrpcd_work;
2951 }
2952
2953 cli->cl_lru_work = handler;
2954
Peng Taod7e09d02013-05-02 16:46:55 +08002955 rc = osc_quota_setup(obd);
2956 if (rc)
Tina Johnson26c4ea42014-09-21 00:08:05 +05302957 goto out_ptlrpcd_work;
Peng Taod7e09d02013-05-02 16:46:55 +08002958
2959 cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
2960 lprocfs_osc_init_vars(&lvars);
Oleg Drokin9b801302015-05-21 15:32:16 -04002961 if (lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars) == 0) {
Peng Taod7e09d02013-05-02 16:46:55 +08002962 lproc_osc_attach_seqstat(obd);
2963 sptlrpc_lprocfs_cliobd_attach(obd);
2964 ptlrpc_lprocfs_register_obd(obd);
2965 }
2966
Li Xiaefd9d72015-09-14 18:41:32 -04002967 /*
2968 * We try to control the total number of requests with a upper limit
2969 * osc_reqpool_maxreqcount. There might be some race which will cause
2970 * over-limit allocation, but it is fine.
2971 */
2972 req_count = atomic_read(&osc_pool_req_count);
2973 if (req_count < osc_reqpool_maxreqcount) {
2974 adding = cli->cl_max_rpcs_in_flight + 2;
2975 if (req_count + adding > osc_reqpool_maxreqcount)
2976 adding = osc_reqpool_maxreqcount - req_count;
2977
2978 added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
2979 atomic_add(added, &osc_pool_req_count);
2980 }
Peng Taod7e09d02013-05-02 16:46:55 +08002981
2982 INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
Jinshan Xiong7d443332016-03-30 19:49:02 -04002983 ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002984 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08002985
2986out_ptlrpcd_work:
Jinshan Xiong2579d8d2016-03-30 19:48:27 -04002987 if (cli->cl_writeback_work) {
2988 ptlrpcd_destroy_work(cli->cl_writeback_work);
2989 cli->cl_writeback_work = NULL;
2990 }
2991 if (cli->cl_lru_work) {
2992 ptlrpcd_destroy_work(cli->cl_lru_work);
2993 cli->cl_lru_work = NULL;
2994 }
Peng Taod7e09d02013-05-02 16:46:55 +08002995out_client_setup:
2996 client_obd_cleanup(obd);
2997out_ptlrpcd:
2998 ptlrpcd_decref();
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08002999 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08003000}
3001
3002static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
3003{
Peng Taod7e09d02013-05-02 16:46:55 +08003004 switch (stage) {
3005 case OBD_CLEANUP_EARLY: {
3006 struct obd_import *imp;
Mike Rapoport50ffcb72015-10-13 16:03:40 +03003007
Peng Taod7e09d02013-05-02 16:46:55 +08003008 imp = obd->u.cli.cl_import;
3009 CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name);
3010 /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */
3011 ptlrpc_deactivate_import(imp);
3012 spin_lock(&imp->imp_lock);
3013 imp->imp_pingable = 0;
3014 spin_unlock(&imp->imp_lock);
3015 break;
3016 }
3017 case OBD_CLEANUP_EXPORTS: {
3018 struct client_obd *cli = &obd->u.cli;
3019 /* LU-464
3020 * for echo client, export may be on zombie list, wait for
3021 * zombie thread to cull it, because cli.cl_import will be
3022 * cleared in client_disconnect_export():
3023 * class_export_destroy() -> obd_cleanup() ->
3024 * echo_device_free() -> echo_client_cleanup() ->
3025 * obd_disconnect() -> osc_disconnect() ->
3026 * client_disconnect_export()
3027 */
3028 obd_zombie_barrier();
3029 if (cli->cl_writeback_work) {
3030 ptlrpcd_destroy_work(cli->cl_writeback_work);
3031 cli->cl_writeback_work = NULL;
3032 }
Jinshan Xiong2579d8d2016-03-30 19:48:27 -04003033 if (cli->cl_lru_work) {
3034 ptlrpcd_destroy_work(cli->cl_lru_work);
3035 cli->cl_lru_work = NULL;
3036 }
Peng Taod7e09d02013-05-02 16:46:55 +08003037 obd_cleanup_client_import(obd);
3038 ptlrpc_lprocfs_unregister_obd(obd);
3039 lprocfs_obd_cleanup(obd);
Peng Taod7e09d02013-05-02 16:46:55 +08003040 break;
3041 }
3042 }
Heena Sirwani41f8d412014-10-07 17:28:30 +05303043 return 0;
Peng Taod7e09d02013-05-02 16:46:55 +08003044}
3045
Amitoj Kaur Chawlaf51e5a22016-02-16 19:14:12 +05303046static int osc_cleanup(struct obd_device *obd)
Peng Taod7e09d02013-05-02 16:46:55 +08003047{
3048 struct client_obd *cli = &obd->u.cli;
3049 int rc;
3050
Peng Taod7e09d02013-05-02 16:46:55 +08003051 /* lru cleanup */
Oleg Drokin7f1ae4c2016-02-16 00:46:57 -05003052 if (cli->cl_cache) {
Peng Taod7e09d02013-05-02 16:46:55 +08003053 LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
3054 spin_lock(&cli->cl_cache->ccc_lru_lock);
3055 list_del_init(&cli->cl_lru_osc);
3056 spin_unlock(&cli->cl_cache->ccc_lru_lock);
3057 cli->cl_lru_left = NULL;
Emoly Liu1b02bde2016-06-20 16:55:24 -04003058 cl_cache_decref(cli->cl_cache);
Peng Taod7e09d02013-05-02 16:46:55 +08003059 cli->cl_cache = NULL;
3060 }
3061
3062 /* free memory of osc quota cache */
3063 osc_quota_cleanup(obd);
3064
3065 rc = client_obd_cleanup(obd);
3066
3067 ptlrpcd_decref();
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08003068 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08003069}
3070
3071int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg)
3072{
Radek Dostalea7893b2014-07-27 23:22:57 +02003073 struct lprocfs_static_vars lvars = { NULL };
Peng Taod7e09d02013-05-02 16:46:55 +08003074 int rc = 0;
3075
3076 lprocfs_osc_init_vars(&lvars);
3077
3078 switch (lcfg->lcfg_command) {
3079 default:
3080 rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars,
3081 lcfg, obd);
3082 if (rc > 0)
3083 rc = 0;
3084 break;
3085 }
3086
Julia Lawallfbe7c6c2014-08-26 22:00:33 +02003087 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08003088}
3089
Oleg Drokin21aef7d2014-08-15 12:55:56 -04003090static int osc_process_config(struct obd_device *obd, u32 len, void *buf)
Peng Taod7e09d02013-05-02 16:46:55 +08003091{
3092 return osc_process_config_base(obd, buf);
3093}
3094
Amitoj Kaur Chawlaf51e5a22016-02-16 19:14:12 +05303095static struct obd_ops osc_obd_ops = {
Dan Carpentera13b1f32015-10-29 12:24:40 +03003096 .owner = THIS_MODULE,
3097 .setup = osc_setup,
3098 .precleanup = osc_precleanup,
3099 .cleanup = osc_cleanup,
3100 .add_conn = client_import_add_conn,
3101 .del_conn = client_import_del_conn,
3102 .connect = client_connect_import,
3103 .reconnect = osc_reconnect,
3104 .disconnect = osc_disconnect,
3105 .statfs = osc_statfs,
3106 .statfs_async = osc_statfs_async,
Dan Carpentera13b1f32015-10-29 12:24:40 +03003107 .unpackmd = osc_unpackmd,
3108 .create = osc_create,
3109 .destroy = osc_destroy,
3110 .getattr = osc_getattr,
3111 .getattr_async = osc_getattr_async,
3112 .setattr = osc_setattr,
3113 .setattr_async = osc_setattr_async,
Dan Carpentera13b1f32015-10-29 12:24:40 +03003114 .iocontrol = osc_iocontrol,
3115 .get_info = osc_get_info,
3116 .set_info_async = osc_set_info_async,
3117 .import_event = osc_import_event,
3118 .process_config = osc_process_config,
3119 .quotactl = osc_quotactl,
3120 .quotacheck = osc_quotacheck,
Peng Taod7e09d02013-05-02 16:46:55 +08003121};
3122
3123extern struct lu_kmem_descr osc_caches[];
Peng Taod7e09d02013-05-02 16:46:55 +08003124extern struct lock_class_key osc_ast_guard_class;
3125
Andreas Ruprechtb47ea4b2015-02-02 20:24:14 +01003126static int __init osc_init(void)
Peng Taod7e09d02013-05-02 16:46:55 +08003127{
Radek Dostalea7893b2014-07-27 23:22:57 +02003128 struct lprocfs_static_vars lvars = { NULL };
Li Xiaefd9d72015-09-14 18:41:32 -04003129 unsigned int reqpool_size;
3130 unsigned int reqsize;
Peng Taod7e09d02013-05-02 16:46:55 +08003131 int rc;
Peng Taod7e09d02013-05-02 16:46:55 +08003132
3133 /* print an address of _any_ initialized kernel symbol from this
3134 * module, to allow debugging with gdb that doesn't support data
Oleg Drokin30aa9c52016-02-24 22:00:37 -05003135 * symbols from modules.
3136 */
Peng Taod7e09d02013-05-02 16:46:55 +08003137 CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
3138
3139 rc = lu_kmem_init(osc_caches);
Keith Manntheya55e0f42013-07-23 00:06:47 +08003140 if (rc)
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08003141 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08003142
3143 lprocfs_osc_init_vars(&lvars);
3144
Oleg Drokin2962b442015-05-21 15:32:13 -04003145 rc = class_register_type(&osc_obd_ops, NULL,
Peng Taod7e09d02013-05-02 16:46:55 +08003146 LUSTRE_OSC_NAME, &osc_device_type);
Li Xiaefd9d72015-09-14 18:41:32 -04003147 if (rc)
3148 goto out_kmem;
Peng Taod7e09d02013-05-02 16:46:55 +08003149
Li Xiaefd9d72015-09-14 18:41:32 -04003150 /* This is obviously too much memory, only prevent overflow here */
3151 if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
3152 rc = -EINVAL;
3153 goto out_type;
3154 }
3155
3156 reqpool_size = osc_reqpool_mem_max << 20;
3157
3158 reqsize = 1;
3159 while (reqsize < OST_MAXREQSIZE)
3160 reqsize = reqsize << 1;
3161
3162 /*
3163 * We don't enlarge the request count in OSC pool according to
3164 * cl_max_rpcs_in_flight. The allocation from the pool will only be
3165 * tried after normal allocation failed. So a small OSC pool won't
3166 * cause much performance degression in most of cases.
3167 */
3168 osc_reqpool_maxreqcount = reqpool_size / reqsize;
3169
3170 atomic_set(&osc_pool_req_count, 0);
3171 osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE,
3172 ptlrpc_add_rqs_to_pool);
3173
3174 if (osc_rq_pool)
3175 return 0;
3176
3177 rc = -ENOMEM;
3178
3179out_type:
3180 class_unregister_type(LUSTRE_OSC_NAME);
3181out_kmem:
3182 lu_kmem_fini(osc_caches);
Greg Kroah-Hartman0a3bdb02013-08-03 10:35:28 +08003183 return rc;
Peng Taod7e09d02013-05-02 16:46:55 +08003184}
3185
3186static void /*__exit*/ osc_exit(void)
3187{
3188 class_unregister_type(LUSTRE_OSC_NAME);
3189 lu_kmem_fini(osc_caches);
Li Xiaefd9d72015-09-14 18:41:32 -04003190 ptlrpc_free_rq_pool(osc_rq_pool);
Peng Taod7e09d02013-05-02 16:46:55 +08003191}
3192
James Simmonsa0455472015-11-04 13:40:02 -05003193MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
Peng Taod7e09d02013-05-02 16:46:55 +08003194MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
3195MODULE_LICENSE("GPL");
Greg Kroah-Hartman69607362013-08-04 08:16:25 +08003196MODULE_VERSION(LUSTRE_VERSION_STRING);
Peng Taod7e09d02013-05-02 16:46:55 +08003197
Greg Kroah-Hartman69607362013-08-04 08:16:25 +08003198module_init(osc_init);
3199module_exit(osc_exit);