blob: 3229ae6c78469019f0eb747c4e09a10137e26b0f [file] [log] [blame]
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07001#ifndef _FS_CEPH_LIBCEPH_H
2#define _FS_CEPH_LIBCEPH_H
3
David Howellsa1ce3922012-10-02 18:01:25 +01004#include <linux/ceph/ceph_debug.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07005
6#include <asm/unaligned.h>
7#include <linux/backing-dev.h>
8#include <linux/completion.h>
9#include <linux/exportfs.h>
Paul Gortmaker187f1882011-11-23 20:12:59 -050010#include <linux/bug.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070011#include <linux/fs.h>
12#include <linux/mempool.h>
13#include <linux/pagemap.h>
14#include <linux/wait.h>
15#include <linux/writeback.h>
16#include <linux/slab.h>
Elena Reshetova06dfa962017-03-17 14:10:27 +020017#include <linux/refcount.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070018
David Howellsa1ce3922012-10-02 18:01:25 +010019#include <linux/ceph/types.h>
20#include <linux/ceph/messenger.h>
21#include <linux/ceph/msgpool.h>
22#include <linux/ceph/mon_client.h>
23#include <linux/ceph/osd_client.h>
24#include <linux/ceph/ceph_fs.h>
Yan, Zheng51e92732016-02-05 15:36:22 +080025#include <linux/ceph/string_table.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070026
27/*
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070028 * mount options
29 */
30#define CEPH_OPT_FSID (1<<0)
31#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
32#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
33#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
Ilya Dryomova51983e2015-10-28 23:52:06 +010034#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
Chaitanya Huilgolba988f82015-01-23 16:41:25 +053035#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
Ilya Dryomova51983e2015-10-28 23:52:06 +010036#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070037
Chaitanya Huilgolba988f82015-01-23 16:41:25 +053038#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070039
40#define ceph_set_opt(client, opt) \
41 (client)->options->flags |= CEPH_OPT_##opt;
42#define ceph_test_opt(client, opt) \
43 (!!((client)->options->flags & CEPH_OPT_##opt))
44
45struct ceph_options {
46 int flags;
47 struct ceph_fsid fsid;
48 struct ceph_entity_addr my_addr;
Ilya Dryomova319bf52015-05-15 12:02:17 +030049 unsigned long mount_timeout; /* jiffies */
50 unsigned long osd_idle_ttl; /* jiffies */
51 unsigned long osd_keepalive_timeout; /* jiffies */
Ilya Dryomov7cc5e382017-02-12 17:11:07 +010052 unsigned long osd_request_timeout; /* jiffies */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070053
54 /*
55 * any type that can't be simply compared or doesn't need need
56 * to be compared should go beyond this point,
57 * ceph_compare_options() should be updated accordingly
58 */
59
60 struct ceph_entity_addr *mon_addr; /* should be the first
61 pointer type of args */
62 int num_mon;
63 char *name;
Tommi Virtanen8323c3a2011-03-25 16:32:57 -070064 struct ceph_crypto_key *key;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070065};
66
67/*
68 * defaults
69 */
Ilya Dryomova319bf52015-05-15 12:02:17 +030070#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
71#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
72#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
Ilya Dryomov7cc5e382017-02-12 17:11:07 +010073#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */
Ilya Dryomov58d81b12016-01-21 16:33:15 +010074
Ilya Dryomov168b9092016-01-21 16:33:19 +010075#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
Ilya Dryomov58d81b12016-01-21 16:33:15 +010076#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
77#define CEPH_MONC_PING_TIMEOUT msecs_to_jiffies(30 * 1000)
Ilya Dryomov168b9092016-01-21 16:33:19 +010078#define CEPH_MONC_HUNT_BACKOFF 2
79#define CEPH_MONC_HUNT_MAX_MULT 10
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070080
81#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
Alex Elder7b11ba372013-03-08 18:51:03 -060082#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070083#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
84
85#define CEPH_AUTH_NAME_DEFAULT "guest"
86
87/*
88 * Delay telling the MDS we no longer want caps, in case we reopen
89 * the file. Delay a minimum amount of time, even if we send a cap
90 * message for some other reason. Otherwise, take the oppotunity to
91 * update the mds to avoid sending another message later.
92 */
93#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
94#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
95
96#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
97
98/* mount state */
99enum {
100 CEPH_MOUNT_MOUNTING,
101 CEPH_MOUNT_MOUNTED,
102 CEPH_MOUNT_UNMOUNTING,
103 CEPH_MOUNT_UNMOUNTED,
104 CEPH_MOUNT_SHUTDOWN,
105};
106
Ilya Dryomova319bf52015-05-15 12:02:17 +0300107static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
108{
109 return timeout ?: MAX_SCHEDULE_TIMEOUT;
110}
111
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700112struct ceph_mds_client;
113
114/*
115 * per client state
116 *
117 * possibly shared by multiple mount points, if they are
118 * mounting the same ceph filesystem/cluster.
119 */
120struct ceph_client {
121 struct ceph_fsid fsid;
122 bool have_fsid;
123
124 void *private;
125
126 struct ceph_options *options;
127
128 struct mutex mount_mutex; /* serialize mount attempts */
129 wait_queue_head_t auth_wq;
130 int auth_err;
131
132 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
133
Ilya Dryomov12b46292013-12-24 21:19:23 +0200134 u64 supported_features;
135 u64 required_features;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700136
Alex Elder15d98822012-05-26 23:26:43 -0500137 struct ceph_messenger msgr; /* messenger instance */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700138 struct ceph_mon_client monc;
139 struct ceph_osd_client osdc;
140
141#ifdef CONFIG_DEBUG_FS
142 struct dentry *debugfs_dir;
143 struct dentry *debugfs_monmap;
144 struct dentry *debugfs_osdmap;
Ilya Dryomov5cf7bd32015-03-25 21:07:41 +0300145 struct dentry *debugfs_options;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700146#endif
147};
148
Ilya Dryomov859bff52015-10-28 23:50:58 +0100149#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700150
151
152/*
153 * snapshots
154 */
155
156/*
157 * A "snap context" is the set of existing snapshots when we
158 * write data. It is used by the OSD to guide its COW behavior.
159 *
160 * The ceph_snap_context is refcounted, and attached to each dirty
161 * page, indicating which context the dirty data belonged when it was
162 * dirtied.
163 */
164struct ceph_snap_context {
Elena Reshetova06dfa962017-03-17 14:10:27 +0200165 refcount_t nref;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700166 u64 seq;
Alex Elderaa711ee32012-07-13 20:35:11 -0500167 u32 num_snaps;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700168 u64 snaps[];
169};
170
Alex Elder4f0dcb12013-04-30 00:44:32 -0500171extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
172 gfp_t gfp_flags);
173extern struct ceph_snap_context *ceph_get_snap_context(
174 struct ceph_snap_context *sc);
175extern void ceph_put_snap_context(struct ceph_snap_context *sc);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700176
177/*
178 * calculate the number of pages a given length and offset map onto,
179 * if we align the data.
180 */
181static inline int calc_pages_for(u64 off, u64 len)
182{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300183 return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
184 (off >> PAGE_SHIFT);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700185}
186
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200187/*
188 * These are not meant to be generic - an integer key is assumed.
189 */
190#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
191static void insert_##name(struct rb_root *root, type *t) \
192{ \
193 struct rb_node **n = &root->rb_node; \
194 struct rb_node *parent = NULL; \
195 \
196 BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \
197 \
198 while (*n) { \
199 type *cur = rb_entry(*n, type, nodefld); \
200 \
201 parent = *n; \
202 if (t->keyfld < cur->keyfld) \
203 n = &(*n)->rb_left; \
204 else if (t->keyfld > cur->keyfld) \
205 n = &(*n)->rb_right; \
206 else \
207 BUG(); \
208 } \
209 \
210 rb_link_node(&t->nodefld, parent, n); \
211 rb_insert_color(&t->nodefld, root); \
212} \
213static void erase_##name(struct rb_root *root, type *t) \
214{ \
215 BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \
216 rb_erase(&t->nodefld, root); \
217 RB_CLEAR_NODE(&t->nodefld); \
218}
219
220#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
Arnd Bergmanna0f2b652016-06-13 15:04:56 +0200221extern type __lookup_##name##_key; \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200222static type *lookup_##name(struct rb_root *root, \
Arnd Bergmanna0f2b652016-06-13 15:04:56 +0200223 typeof(__lookup_##name##_key.keyfld) key) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200224{ \
225 struct rb_node *n = root->rb_node; \
226 \
227 while (n) { \
228 type *cur = rb_entry(n, type, nodefld); \
229 \
230 if (key < cur->keyfld) \
231 n = n->rb_left; \
232 else if (key > cur->keyfld) \
233 n = n->rb_right; \
234 else \
235 return cur; \
236 } \
237 \
238 return NULL; \
239}
240
241#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \
242DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
243DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
244
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200245extern struct kmem_cache *ceph_inode_cachep;
246extern struct kmem_cache *ceph_cap_cachep;
Yan, Zhengf66fd9f2015-06-10 17:26:13 +0800247extern struct kmem_cache *ceph_cap_flush_cachep;
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200248extern struct kmem_cache *ceph_dentry_cachep;
249extern struct kmem_cache *ceph_file_cachep;
250
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700251/* ceph_common.c */
Alex Elder72fe25e2013-01-30 11:13:33 -0600252extern bool libceph_compatible(void *data);
253
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700254extern const char *ceph_msg_type_name(int type);
255extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200256extern void *ceph_kvmalloc(size_t size, gfp_t flags);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700257
Alex Elderee577412012-01-24 10:08:36 -0600258extern struct ceph_options *ceph_parse_options(char *options,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700259 const char *dev_name, const char *dev_name_end,
260 int (*parse_extra_token)(char *c, void *private),
261 void *private);
Ilya Dryomovff40f9a2015-03-25 21:02:16 +0300262int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700263extern void ceph_destroy_options(struct ceph_options *opt);
264extern int ceph_compare_options(struct ceph_options *new_opt,
265 struct ceph_client *client);
Ilya Dryomov74da4a0f2017-03-03 18:16:07 +0100266struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
Ilya Dryomov005a07bf2016-08-18 18:38:43 +0200267struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
Ilya Dryomov033268a2016-08-12 14:59:58 +0200268u64 ceph_client_gid(struct ceph_client *client);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700269extern void ceph_destroy_client(struct ceph_client *client);
270extern int __ceph_open_session(struct ceph_client *client,
271 unsigned long started);
272extern int ceph_open_session(struct ceph_client *client);
273
274/* pagevec.c */
275extern void ceph_release_page_vector(struct page **pages, int num_pages);
276
Alex Elderb3248142013-02-06 13:11:38 -0600277extern struct page **ceph_get_direct_page_vector(const void __user *data,
Henry C Changb6aa5902010-12-15 20:45:41 -0800278 int num_pages,
279 bool write_page);
280extern void ceph_put_page_vector(struct page **pages, int num_pages,
281 bool dirty);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700282extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
283extern int ceph_copy_user_to_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600284 const void __user *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700285 loff_t off, size_t len);
Alex Elder903bb322013-02-06 13:11:38 -0600286extern void ceph_copy_to_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600287 const void *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700288 loff_t off, size_t len);
Alex Elder903bb322013-02-06 13:11:38 -0600289extern void ceph_copy_from_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600290 void *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700291 loff_t off, size_t len);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700292extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
293
294
295#endif /* _FS_CEPH_SUPER_H */