blob: 24e0b19c8b69bec6124d0f960d4089fc645eed0e [file] [log] [blame]
Joel Becker8adf0532007-11-28 14:38:40 -08001/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * stack_user.c
5 *
6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
7 *
8 * Copyright (C) 2007 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation, version 2.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#include <linux/module.h>
Joel Becker6427a722008-02-18 19:23:28 -080021#include <linux/fs.h>
22#include <linux/miscdevice.h>
23#include <linux/mutex.h>
24#include <linux/reboot.h>
Joel Becker462c7e62008-02-18 19:40:12 -080025#include <asm/uaccess.h>
Joel Becker8adf0532007-11-28 14:38:40 -080026
David Teiglandcf4d8d72008-02-20 14:29:27 -080027#include "ocfs2.h" /* For struct ocfs2_lock_res */
Joel Becker8adf0532007-11-28 14:38:40 -080028#include "stackglue.h"
29
30
Joel Becker6427a722008-02-18 19:23:28 -080031/*
32 * The control protocol starts with a handshake. Until the handshake
33 * is complete, the control device will fail all write(2)s.
34 *
35 * The handshake is simple. First, the client reads until EOF. Each line
36 * of output is a supported protocol tag. All protocol tags are a single
37 * character followed by a two hex digit version number. Currently the
38 * only things supported is T01, for "Text-base version 0x01". Next, the
Joel Beckerde870ef2008-02-18 17:07:09 -080039 * client writes the version they would like to use, including the newline.
40 * Thus, the protocol tag is 'T01\n'. If the version tag written is
41 * unknown, -EINVAL is returned. Once the negotiation is complete, the
42 * client can start sending messages.
43 *
Joel Beckerd4b95ee2008-02-20 15:39:44 -080044 * The T01 protocol has three messages. First is the "SETN" message.
Joel Becker3cfd4ab2008-02-20 14:44:34 -080045 * It has the following syntax:
46 *
47 * SETN<space><8-char-hex-nodenum><newline>
48 *
49 * This is 14 characters.
50 *
51 * The "SETN" message must be the first message following the protocol.
52 * It tells ocfs2_control the local node number.
53 *
Joel Beckerd4b95ee2008-02-20 15:39:44 -080054 * Next comes the "SETV" message. It has the following syntax:
55 *
56 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
57 *
58 * This is 11 characters.
59 *
60 * The "SETV" message sets the filesystem locking protocol version as
61 * negotiated by the client. The client negotiates based on the maximum
62 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
63 * number from the "SETV" message must match
Joel Beckera12630b2008-05-09 18:49:29 -070064 * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number
Joel Beckerd4b95ee2008-02-20 15:39:44 -080065 * must be less than or equal to ...->lp_max_version.pv_minor.
66 *
67 * Once this information has been set, mounts will be allowed. From this
68 * point on, the "DOWN" message can be sent for node down notification.
69 * It has the following syntax:
Joel Beckerde870ef2008-02-18 17:07:09 -080070 *
71 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
72 *
73 * eg:
74 *
75 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
76 *
77 * This is 47 characters.
Joel Becker6427a722008-02-18 19:23:28 -080078 */
79
80/*
Joel Becker462c7e62008-02-18 19:40:12 -080081 * Whether or not the client has done the handshake.
82 * For now, we have just one protocol version.
83 */
84#define OCFS2_CONTROL_PROTO "T01\n"
85#define OCFS2_CONTROL_PROTO_LEN 4
Joel Becker3cfd4ab2008-02-20 14:44:34 -080086
87/* Handshake states */
Joel Becker462c7e62008-02-18 19:40:12 -080088#define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
89#define OCFS2_CONTROL_HANDSHAKE_READ (1)
Joel Becker3cfd4ab2008-02-20 14:44:34 -080090#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
91#define OCFS2_CONTROL_HANDSHAKE_VALID (3)
92
93/* Messages */
94#define OCFS2_CONTROL_MESSAGE_OP_LEN 4
95#define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
96#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
Joel Beckerd4b95ee2008-02-20 15:39:44 -080097#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
98#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
Joel Becker3cfd4ab2008-02-20 14:44:34 -080099#define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
Joel Beckerde870ef2008-02-18 17:07:09 -0800100#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
101#define OCFS2_TEXT_UUID_LEN 32
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800102#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
Joel Beckerde870ef2008-02-18 17:07:09 -0800103#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
Joel Becker462c7e62008-02-18 19:40:12 -0800104
105/*
Joel Becker6427a722008-02-18 19:23:28 -0800106 * ocfs2_live_connection is refcounted because the filesystem and
107 * miscdevice sides can detach in different order. Let's just be safe.
108 */
109struct ocfs2_live_connection {
110 struct list_head oc_list;
111 struct ocfs2_cluster_connection *oc_conn;
112};
113
Joel Becker462c7e62008-02-18 19:40:12 -0800114struct ocfs2_control_private {
115 struct list_head op_list;
116 int op_state;
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800117 int op_this_node;
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800118 struct ocfs2_protocol_version op_proto;
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800119};
120
121/* SETN<space><8-char-hex-nodenum><newline> */
122struct ocfs2_control_message_setn {
123 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
124 char space;
125 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
126 char newline;
127};
128
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800129/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
130struct ocfs2_control_message_setv {
131 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
132 char space1;
133 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
134 char space2;
135 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
136 char newline;
137};
138
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800139/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
140struct ocfs2_control_message_down {
141 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
142 char space1;
143 char uuid[OCFS2_TEXT_UUID_LEN];
144 char space2;
145 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
146 char newline;
147};
148
149union ocfs2_control_message {
150 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
151 struct ocfs2_control_message_setn u_setn;
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800152 struct ocfs2_control_message_setv u_setv;
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800153 struct ocfs2_control_message_down u_down;
Joel Becker462c7e62008-02-18 19:40:12 -0800154};
155
Joel Beckera12630b2008-05-09 18:49:29 -0700156static struct ocfs2_stack_plugin ocfs2_user_plugin;
David Teiglandcf4d8d72008-02-20 14:29:27 -0800157
Joel Becker6427a722008-02-18 19:23:28 -0800158static atomic_t ocfs2_control_opened;
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800159static int ocfs2_control_this_node = -1;
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800160static struct ocfs2_protocol_version running_proto;
Joel Becker6427a722008-02-18 19:23:28 -0800161
162static LIST_HEAD(ocfs2_live_connection_list);
Joel Becker462c7e62008-02-18 19:40:12 -0800163static LIST_HEAD(ocfs2_control_private_list);
Joel Becker6427a722008-02-18 19:23:28 -0800164static DEFINE_MUTEX(ocfs2_control_lock);
165
Joel Becker462c7e62008-02-18 19:40:12 -0800166static inline void ocfs2_control_set_handshake_state(struct file *file,
167 int state)
168{
169 struct ocfs2_control_private *p = file->private_data;
170 p->op_state = state;
171}
172
173static inline int ocfs2_control_get_handshake_state(struct file *file)
174{
175 struct ocfs2_control_private *p = file->private_data;
176 return p->op_state;
177}
178
Joel Becker6427a722008-02-18 19:23:28 -0800179static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
180{
181 size_t len = strlen(name);
182 struct ocfs2_live_connection *c;
183
184 BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
185
186 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
187 if ((c->oc_conn->cc_namelen == len) &&
188 !strncmp(c->oc_conn->cc_name, name, len))
189 return c;
190 }
191
192 return c;
193}
194
195/*
196 * ocfs2_live_connection structures are created underneath the ocfs2
197 * mount path. Since the VFS prevents multiple calls to
198 * fill_super(), we can't get dupes here.
199 */
200static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
201 struct ocfs2_live_connection **c_ret)
202{
203 int rc = 0;
204 struct ocfs2_live_connection *c;
205
206 c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
207 if (!c)
208 return -ENOMEM;
209
210 mutex_lock(&ocfs2_control_lock);
211 c->oc_conn = conn;
212
213 if (atomic_read(&ocfs2_control_opened))
214 list_add(&c->oc_list, &ocfs2_live_connection_list);
215 else {
216 printk(KERN_ERR
217 "ocfs2: Userspace control daemon is not present\n");
218 rc = -ESRCH;
219 }
220
221 mutex_unlock(&ocfs2_control_lock);
222
223 if (!rc)
224 *c_ret = c;
225 else
226 kfree(c);
227
228 return rc;
229}
230
231/*
232 * This function disconnects the cluster connection from ocfs2_control.
233 * Afterwards, userspace can't affect the cluster connection.
234 */
235static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
236{
237 mutex_lock(&ocfs2_control_lock);
238 list_del_init(&c->oc_list);
239 c->oc_conn = NULL;
240 mutex_unlock(&ocfs2_control_lock);
241
242 kfree(c);
243}
244
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800245static int ocfs2_control_cfu(void *target, size_t target_len,
246 const char __user *buf, size_t count)
Joel Becker462c7e62008-02-18 19:40:12 -0800247{
248 /* The T01 expects write(2) calls to have exactly one command */
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800249 if ((count != target_len) ||
250 (count > sizeof(union ocfs2_control_message)))
Joel Becker462c7e62008-02-18 19:40:12 -0800251 return -EINVAL;
252
253 if (copy_from_user(target, buf, target_len))
254 return -EFAULT;
255
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800256 return 0;
Joel Becker462c7e62008-02-18 19:40:12 -0800257}
258
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800259static ssize_t ocfs2_control_validate_protocol(struct file *file,
260 const char __user *buf,
261 size_t count)
Joel Becker462c7e62008-02-18 19:40:12 -0800262{
263 ssize_t ret;
264 char kbuf[OCFS2_CONTROL_PROTO_LEN];
265
266 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
267 buf, count);
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800268 if (ret)
Joel Becker462c7e62008-02-18 19:40:12 -0800269 return ret;
270
271 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
272 return -EINVAL;
273
Joel Becker462c7e62008-02-18 19:40:12 -0800274 ocfs2_control_set_handshake_state(file,
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800275 OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
Joel Becker462c7e62008-02-18 19:40:12 -0800276
277 return count;
278}
279
Joel Beckerde870ef2008-02-18 17:07:09 -0800280static void ocfs2_control_send_down(const char *uuid,
281 int nodenum)
282{
283 struct ocfs2_live_connection *c;
284
285 mutex_lock(&ocfs2_control_lock);
286
287 c = ocfs2_connection_find(uuid);
288 if (c) {
289 BUG_ON(c->oc_conn == NULL);
290 c->oc_conn->cc_recovery_handler(nodenum,
291 c->oc_conn->cc_recovery_data);
292 }
293
294 mutex_unlock(&ocfs2_control_lock);
295}
296
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800297/*
298 * Called whenever configuration elements are sent to /dev/ocfs2_control.
299 * If all configuration elements are present, try to set the global
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800300 * values. If there is a problem, return an error. Skip any missing
301 * elements, and only bump ocfs2_control_opened when we have all elements
302 * and are successful.
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800303 */
304static int ocfs2_control_install_private(struct file *file)
Joel Beckerde870ef2008-02-18 17:07:09 -0800305{
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800306 int rc = 0;
307 int set_p = 1;
308 struct ocfs2_control_private *p = file->private_data;
309
310 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
311
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800312 mutex_lock(&ocfs2_control_lock);
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800313
314 if (p->op_this_node < 0) {
315 set_p = 0;
316 } else if ((ocfs2_control_this_node >= 0) &&
317 (ocfs2_control_this_node != p->op_this_node)) {
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800318 rc = -EINVAL;
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800319 goto out_unlock;
320 }
321
322 if (!p->op_proto.pv_major) {
323 set_p = 0;
324 } else if (!list_empty(&ocfs2_live_connection_list) &&
325 ((running_proto.pv_major != p->op_proto.pv_major) ||
326 (running_proto.pv_minor != p->op_proto.pv_minor))) {
327 rc = -EINVAL;
328 goto out_unlock;
329 }
330
331 if (set_p) {
332 ocfs2_control_this_node = p->op_this_node;
333 running_proto.pv_major = p->op_proto.pv_major;
334 running_proto.pv_minor = p->op_proto.pv_minor;
335 }
336
337out_unlock:
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800338 mutex_unlock(&ocfs2_control_lock);
339
340 if (!rc && set_p) {
341 /* We set the global values successfully */
342 atomic_inc(&ocfs2_control_opened);
343 ocfs2_control_set_handshake_state(file,
344 OCFS2_CONTROL_HANDSHAKE_VALID);
345 }
346
347 return rc;
348}
349
David Teiglandcf4d8d72008-02-20 14:29:27 -0800350static int ocfs2_control_get_this_node(void)
351{
352 int rc;
353
354 mutex_lock(&ocfs2_control_lock);
355 if (ocfs2_control_this_node < 0)
356 rc = -EINVAL;
357 else
358 rc = ocfs2_control_this_node;
359 mutex_unlock(&ocfs2_control_lock);
360
361 return rc;
362}
363
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800364static int ocfs2_control_do_setnode_msg(struct file *file,
365 struct ocfs2_control_message_setn *msg)
366{
Joel Beckerde870ef2008-02-18 17:07:09 -0800367 long nodenum;
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800368 char *ptr = NULL;
369 struct ocfs2_control_private *p = file->private_data;
Joel Beckerde870ef2008-02-18 17:07:09 -0800370
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800371 if (ocfs2_control_get_handshake_state(file) !=
372 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
Joel Beckerde870ef2008-02-18 17:07:09 -0800373 return -EINVAL;
374
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800375 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
376 OCFS2_CONTROL_MESSAGE_OP_LEN))
Joel Beckerde870ef2008-02-18 17:07:09 -0800377 return -EINVAL;
Joel Beckerde870ef2008-02-18 17:07:09 -0800378
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800379 if ((msg->space != ' ') || (msg->newline != '\n'))
380 return -EINVAL;
381 msg->space = msg->newline = '\0';
382
383 nodenum = simple_strtol(msg->nodestr, &ptr, 16);
384 if (!ptr || *ptr)
385 return -EINVAL;
386
387 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
388 (nodenum > INT_MAX) || (nodenum < 0))
389 return -ERANGE;
390 p->op_this_node = nodenum;
391
392 return ocfs2_control_install_private(file);
393}
394
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800395static int ocfs2_control_do_setversion_msg(struct file *file,
396 struct ocfs2_control_message_setv *msg)
397 {
398 long major, minor;
399 char *ptr = NULL;
400 struct ocfs2_control_private *p = file->private_data;
401 struct ocfs2_protocol_version *max =
Joel Beckera12630b2008-05-09 18:49:29 -0700402 &ocfs2_user_plugin.sp_proto->lp_max_version;
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800403
404 if (ocfs2_control_get_handshake_state(file) !=
405 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
406 return -EINVAL;
407
408 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
409 OCFS2_CONTROL_MESSAGE_OP_LEN))
410 return -EINVAL;
411
412 if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
413 (msg->newline != '\n'))
414 return -EINVAL;
415 msg->space1 = msg->space2 = msg->newline = '\0';
416
417 major = simple_strtol(msg->major, &ptr, 16);
418 if (!ptr || *ptr)
419 return -EINVAL;
420 minor = simple_strtol(msg->minor, &ptr, 16);
421 if (!ptr || *ptr)
422 return -EINVAL;
423
424 /*
425 * The major must be between 1 and 255, inclusive. The minor
426 * must be between 0 and 255, inclusive. The version passed in
427 * must be within the maximum version supported by the filesystem.
428 */
429 if ((major == LONG_MIN) || (major == LONG_MAX) ||
430 (major > (u8)-1) || (major < 1))
431 return -ERANGE;
432 if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
433 (minor > (u8)-1) || (minor < 0))
434 return -ERANGE;
435 if ((major != max->pv_major) ||
436 (minor > max->pv_minor))
437 return -EINVAL;
438
439 p->op_proto.pv_major = major;
440 p->op_proto.pv_minor = minor;
441
442 return ocfs2_control_install_private(file);
443}
444
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800445static int ocfs2_control_do_down_msg(struct file *file,
446 struct ocfs2_control_message_down *msg)
447{
448 long nodenum;
449 char *p = NULL;
450
451 if (ocfs2_control_get_handshake_state(file) !=
452 OCFS2_CONTROL_HANDSHAKE_VALID)
453 return -EINVAL;
454
455 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
456 OCFS2_CONTROL_MESSAGE_OP_LEN))
457 return -EINVAL;
458
459 if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
460 (msg->newline != '\n'))
461 return -EINVAL;
462 msg->space1 = msg->space2 = msg->newline = '\0';
463
464 nodenum = simple_strtol(msg->nodestr, &p, 16);
Joel Beckerde870ef2008-02-18 17:07:09 -0800465 if (!p || *p)
466 return -EINVAL;
467
468 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
469 (nodenum > INT_MAX) || (nodenum < 0))
470 return -ERANGE;
471
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800472 ocfs2_control_send_down(msg->uuid, nodenum);
Joel Beckerde870ef2008-02-18 17:07:09 -0800473
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800474 return 0;
475}
476
477static ssize_t ocfs2_control_message(struct file *file,
478 const char __user *buf,
479 size_t count)
480{
481 ssize_t ret;
482 union ocfs2_control_message msg;
483
484 /* Try to catch padding issues */
485 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
486 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
487
488 memset(&msg, 0, sizeof(union ocfs2_control_message));
489 ret = ocfs2_control_cfu(&msg, count, buf, count);
490 if (ret)
491 goto out;
492
493 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
494 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
495 OCFS2_CONTROL_MESSAGE_OP_LEN))
496 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800497 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
498 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
499 OCFS2_CONTROL_MESSAGE_OP_LEN))
500 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800501 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
502 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
503 OCFS2_CONTROL_MESSAGE_OP_LEN))
504 ret = ocfs2_control_do_down_msg(file, &msg.u_down);
505 else
506 ret = -EINVAL;
507
508out:
509 return ret ? ret : count;
Joel Beckerde870ef2008-02-18 17:07:09 -0800510}
Joel Becker6427a722008-02-18 19:23:28 -0800511
512static ssize_t ocfs2_control_write(struct file *file,
513 const char __user *buf,
514 size_t count,
515 loff_t *ppos)
Joel Becker8adf0532007-11-28 14:38:40 -0800516{
Joel Becker462c7e62008-02-18 19:40:12 -0800517 ssize_t ret;
518
519 switch (ocfs2_control_get_handshake_state(file)) {
520 case OCFS2_CONTROL_HANDSHAKE_INVALID:
521 ret = -EINVAL;
522 break;
523
524 case OCFS2_CONTROL_HANDSHAKE_READ:
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800525 ret = ocfs2_control_validate_protocol(file, buf,
526 count);
Joel Becker462c7e62008-02-18 19:40:12 -0800527 break;
528
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800529 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
Joel Becker462c7e62008-02-18 19:40:12 -0800530 case OCFS2_CONTROL_HANDSHAKE_VALID:
Joel Beckerde870ef2008-02-18 17:07:09 -0800531 ret = ocfs2_control_message(file, buf, count);
Joel Becker462c7e62008-02-18 19:40:12 -0800532 break;
533
534 default:
535 BUG();
536 ret = -EIO;
537 break;
538 }
539
540 return ret;
Joel Becker8adf0532007-11-28 14:38:40 -0800541}
542
Joel Becker462c7e62008-02-18 19:40:12 -0800543/*
544 * This is a naive version. If we ever have a new protocol, we'll expand
545 * it. Probably using seq_file.
546 */
Joel Becker6427a722008-02-18 19:23:28 -0800547static ssize_t ocfs2_control_read(struct file *file,
548 char __user *buf,
549 size_t count,
550 loff_t *ppos)
551{
Akinobu Mita7600c722008-06-09 16:34:23 -0700552 ssize_t ret;
Joel Becker462c7e62008-02-18 19:40:12 -0800553
Akinobu Mita7600c722008-06-09 16:34:23 -0700554 ret = simple_read_from_buffer(buf, count, ppos,
555 OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
Joel Becker462c7e62008-02-18 19:40:12 -0800556
557 /* Have we read the whole protocol list? */
Akinobu Mita7600c722008-06-09 16:34:23 -0700558 if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
Joel Becker462c7e62008-02-18 19:40:12 -0800559 ocfs2_control_set_handshake_state(file,
560 OCFS2_CONTROL_HANDSHAKE_READ);
561
Akinobu Mita7600c722008-06-09 16:34:23 -0700562 return ret;
Joel Becker6427a722008-02-18 19:23:28 -0800563}
564
565static int ocfs2_control_release(struct inode *inode, struct file *file)
566{
Joel Becker462c7e62008-02-18 19:40:12 -0800567 struct ocfs2_control_private *p = file->private_data;
568
569 mutex_lock(&ocfs2_control_lock);
570
571 if (ocfs2_control_get_handshake_state(file) !=
572 OCFS2_CONTROL_HANDSHAKE_VALID)
573 goto out;
574
Joel Becker6427a722008-02-18 19:23:28 -0800575 if (atomic_dec_and_test(&ocfs2_control_opened)) {
Joel Becker6427a722008-02-18 19:23:28 -0800576 if (!list_empty(&ocfs2_live_connection_list)) {
577 /* XXX: Do bad things! */
578 printk(KERN_ERR
579 "ocfs2: Unexpected release of ocfs2_control!\n"
580 " Loss of cluster connection requires "
581 "an emergency restart!\n");
582 emergency_restart();
583 }
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800584 /*
585 * Last valid close clears the node number and resets
586 * the locking protocol version
587 */
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800588 ocfs2_control_this_node = -1;
Joel Beckerd4b95ee2008-02-20 15:39:44 -0800589 running_proto.pv_major = 0;
590 running_proto.pv_major = 0;
Joel Becker6427a722008-02-18 19:23:28 -0800591 }
592
Joel Becker462c7e62008-02-18 19:40:12 -0800593out:
594 list_del_init(&p->op_list);
595 file->private_data = NULL;
596
597 mutex_unlock(&ocfs2_control_lock);
598
599 kfree(p);
600
Joel Becker6427a722008-02-18 19:23:28 -0800601 return 0;
602}
603
604static int ocfs2_control_open(struct inode *inode, struct file *file)
605{
Joel Becker462c7e62008-02-18 19:40:12 -0800606 struct ocfs2_control_private *p;
607
608 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
609 if (!p)
610 return -ENOMEM;
Joel Becker3cfd4ab2008-02-20 14:44:34 -0800611 p->op_this_node = -1;
Joel Becker462c7e62008-02-18 19:40:12 -0800612
613 mutex_lock(&ocfs2_control_lock);
614 file->private_data = p;
615 list_add(&p->op_list, &ocfs2_control_private_list);
616 mutex_unlock(&ocfs2_control_lock);
Joel Becker6427a722008-02-18 19:23:28 -0800617
618 return 0;
619}
620
621static const struct file_operations ocfs2_control_fops = {
622 .open = ocfs2_control_open,
623 .release = ocfs2_control_release,
624 .read = ocfs2_control_read,
625 .write = ocfs2_control_write,
626 .owner = THIS_MODULE,
627};
628
Adrian Bunk4d8755b2008-04-21 11:49:26 +0300629static struct miscdevice ocfs2_control_device = {
Joel Becker6427a722008-02-18 19:23:28 -0800630 .minor = MISC_DYNAMIC_MINOR,
631 .name = "ocfs2_control",
632 .fops = &ocfs2_control_fops,
633};
634
635static int ocfs2_control_init(void)
636{
637 int rc;
638
639 atomic_set(&ocfs2_control_opened, 0);
640
641 rc = misc_register(&ocfs2_control_device);
642 if (rc)
643 printk(KERN_ERR
644 "ocfs2: Unable to register ocfs2_control device "
645 "(errno %d)\n",
646 -rc);
647
648 return rc;
649}
650
651static void ocfs2_control_exit(void)
652{
653 int rc;
654
655 rc = misc_deregister(&ocfs2_control_device);
656 if (rc)
657 printk(KERN_ERR
658 "ocfs2: Unable to deregister ocfs2_control device "
659 "(errno %d)\n",
660 -rc);
661}
662
David Teiglandcf4d8d72008-02-20 14:29:27 -0800663static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg)
664{
665 struct ocfs2_lock_res *res = astarg;
666 return &res->l_lksb.lksb_fsdlm;
667}
668
669static void fsdlm_lock_ast_wrapper(void *astarg)
670{
671 struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
672 int status = lksb->sb_status;
673
Joel Beckera12630b2008-05-09 18:49:29 -0700674 BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
David Teiglandcf4d8d72008-02-20 14:29:27 -0800675
676 /*
677 * For now we're punting on the issue of other non-standard errors
678 * where we can't tell if the unlock_ast or lock_ast should be called.
679 * The main "other error" that's possible is EINVAL which means the
680 * function was called with invalid args, which shouldn't be possible
681 * since the caller here is under our control. Other non-standard
682 * errors probably fall into the same category, or otherwise are fatal
683 * which means we can't carry on anyway.
684 */
685
686 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
Joel Beckera12630b2008-05-09 18:49:29 -0700687 ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0);
David Teiglandcf4d8d72008-02-20 14:29:27 -0800688 else
Joel Beckera12630b2008-05-09 18:49:29 -0700689 ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg);
David Teiglandcf4d8d72008-02-20 14:29:27 -0800690}
691
692static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
693{
Joel Beckera12630b2008-05-09 18:49:29 -0700694 BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
David Teiglandcf4d8d72008-02-20 14:29:27 -0800695
Joel Beckera12630b2008-05-09 18:49:29 -0700696 ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level);
David Teiglandcf4d8d72008-02-20 14:29:27 -0800697}
698
699static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
700 int mode,
701 union ocfs2_dlm_lksb *lksb,
702 u32 flags,
703 void *name,
704 unsigned int namelen,
705 void *astarg)
706{
707 int ret;
708
709 if (!lksb->lksb_fsdlm.sb_lvbptr)
710 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
711 sizeof(struct dlm_lksb);
712
713 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
714 flags|DLM_LKF_NODLCKWT, name, namelen, 0,
715 fsdlm_lock_ast_wrapper, astarg,
716 fsdlm_blocking_ast_wrapper);
717 return ret;
718}
719
720static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
721 union ocfs2_dlm_lksb *lksb,
722 u32 flags,
723 void *astarg)
724{
725 int ret;
726
727 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
728 flags, &lksb->lksb_fsdlm, astarg);
729 return ret;
730}
731
732static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
733{
734 return lksb->lksb_fsdlm.sb_status;
735}
736
737static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
738{
739 return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
740}
741
742static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
743{
744}
745
746/*
747 * Compare a requested locking protocol version against the current one.
748 *
749 * If the major numbers are different, they are incompatible.
750 * If the current minor is greater than the request, they are incompatible.
751 * If the current minor is less than or equal to the request, they are
752 * compatible, and the requester should run at the current minor version.
753 */
754static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
755 struct ocfs2_protocol_version *request)
756{
757 if (existing->pv_major != request->pv_major)
758 return 1;
759
760 if (existing->pv_minor > request->pv_minor)
761 return 1;
762
763 if (existing->pv_minor < request->pv_minor)
764 request->pv_minor = existing->pv_minor;
765
766 return 0;
767}
768
769static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
770{
771 dlm_lockspace_t *fsdlm;
772 struct ocfs2_live_connection *control;
773 int rc = 0;
774
775 BUG_ON(conn == NULL);
776
777 rc = ocfs2_live_connection_new(conn, &control);
778 if (rc)
779 goto out;
780
781 /*
782 * running_proto must have been set before we allowed any mounts
783 * to proceed.
784 */
785 if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
786 printk(KERN_ERR
787 "Unable to mount with fs locking protocol version "
788 "%u.%u because the userspace control daemon has "
789 "negotiated %u.%u\n",
790 conn->cc_version.pv_major, conn->cc_version.pv_minor,
791 running_proto.pv_major, running_proto.pv_minor);
792 rc = -EPROTO;
793 ocfs2_live_connection_drop(control);
794 goto out;
795 }
796
797 rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
798 &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
799 if (rc) {
800 ocfs2_live_connection_drop(control);
801 goto out;
802 }
803
804 conn->cc_private = control;
805 conn->cc_lockspace = fsdlm;
806out:
807 return rc;
808}
809
Joel Becker2c394502008-05-30 15:58:26 -0700810static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
David Teiglandcf4d8d72008-02-20 14:29:27 -0800811{
812 dlm_release_lockspace(conn->cc_lockspace, 2);
813 conn->cc_lockspace = NULL;
814 ocfs2_live_connection_drop(conn->cc_private);
815 conn->cc_private = NULL;
816 return 0;
817}
818
819static int user_cluster_this_node(unsigned int *this_node)
820{
821 int rc;
822
823 rc = ocfs2_control_get_this_node();
824 if (rc < 0)
825 return rc;
826
827 *this_node = rc;
828 return 0;
829}
830
Joel Beckera12630b2008-05-09 18:49:29 -0700831static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
David Teiglandcf4d8d72008-02-20 14:29:27 -0800832 .connect = user_cluster_connect,
833 .disconnect = user_cluster_disconnect,
834 .this_node = user_cluster_this_node,
835 .dlm_lock = user_dlm_lock,
836 .dlm_unlock = user_dlm_unlock,
837 .lock_status = user_dlm_lock_status,
838 .lock_lvb = user_dlm_lvb,
839 .dump_lksb = user_dlm_dump_lksb,
840};
841
Joel Beckera12630b2008-05-09 18:49:29 -0700842static struct ocfs2_stack_plugin ocfs2_user_plugin = {
David Teiglandcf4d8d72008-02-20 14:29:27 -0800843 .sp_name = "user",
Joel Beckera12630b2008-05-09 18:49:29 -0700844 .sp_ops = &ocfs2_user_plugin_ops,
David Teiglandcf4d8d72008-02-20 14:29:27 -0800845 .sp_owner = THIS_MODULE,
846};
847
848
Joel Beckera12630b2008-05-09 18:49:29 -0700849static int __init ocfs2_user_plugin_init(void)
Joel Becker6427a722008-02-18 19:23:28 -0800850{
David Teiglandcf4d8d72008-02-20 14:29:27 -0800851 int rc;
852
853 rc = ocfs2_control_init();
854 if (!rc) {
Joel Beckera12630b2008-05-09 18:49:29 -0700855 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
David Teiglandcf4d8d72008-02-20 14:29:27 -0800856 if (rc)
857 ocfs2_control_exit();
858 }
859
860 return rc;
Joel Becker6427a722008-02-18 19:23:28 -0800861}
862
Joel Beckera12630b2008-05-09 18:49:29 -0700863static void __exit ocfs2_user_plugin_exit(void)
Joel Becker8adf0532007-11-28 14:38:40 -0800864{
Joel Beckera12630b2008-05-09 18:49:29 -0700865 ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
Joel Becker6427a722008-02-18 19:23:28 -0800866 ocfs2_control_exit();
Joel Becker8adf0532007-11-28 14:38:40 -0800867}
868
869MODULE_AUTHOR("Oracle");
870MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
871MODULE_LICENSE("GPL");
Joel Beckera12630b2008-05-09 18:49:29 -0700872module_init(ocfs2_user_plugin_init);
873module_exit(ocfs2_user_plugin_exit);