blob: 1816c5e26581716b24d6d3fd0f036b9b739c467c [file] [log] [blame]
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07001#ifndef _FS_CEPH_LIBCEPH_H
2#define _FS_CEPH_LIBCEPH_H
3
David Howellsa1ce3922012-10-02 18:01:25 +01004#include <linux/ceph/ceph_debug.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07005
6#include <asm/unaligned.h>
7#include <linux/backing-dev.h>
8#include <linux/completion.h>
9#include <linux/exportfs.h>
Paul Gortmaker187f1882011-11-23 20:12:59 -050010#include <linux/bug.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070011#include <linux/fs.h>
12#include <linux/mempool.h>
13#include <linux/pagemap.h>
14#include <linux/wait.h>
15#include <linux/writeback.h>
16#include <linux/slab.h>
17
David Howellsa1ce3922012-10-02 18:01:25 +010018#include <linux/ceph/types.h>
19#include <linux/ceph/messenger.h>
20#include <linux/ceph/msgpool.h>
21#include <linux/ceph/mon_client.h>
22#include <linux/ceph/osd_client.h>
23#include <linux/ceph/ceph_fs.h>
Yan, Zheng51e92732016-02-05 15:36:22 +080024#include <linux/ceph/string_table.h>
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070025
26/*
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070027 * mount options
28 */
29#define CEPH_OPT_FSID (1<<0)
30#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
31#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
32#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
Ilya Dryomova51983e2015-10-28 23:52:06 +010033#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
Chaitanya Huilgolba988f82015-01-23 16:41:25 +053034#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
Ilya Dryomova51983e2015-10-28 23:52:06 +010035#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070036
Chaitanya Huilgolba988f82015-01-23 16:41:25 +053037#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070038
39#define ceph_set_opt(client, opt) \
40 (client)->options->flags |= CEPH_OPT_##opt;
41#define ceph_test_opt(client, opt) \
42 (!!((client)->options->flags & CEPH_OPT_##opt))
43
44struct ceph_options {
45 int flags;
46 struct ceph_fsid fsid;
47 struct ceph_entity_addr my_addr;
Ilya Dryomova319bf52015-05-15 12:02:17 +030048 unsigned long mount_timeout; /* jiffies */
49 unsigned long osd_idle_ttl; /* jiffies */
50 unsigned long osd_keepalive_timeout; /* jiffies */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070051
52 /*
53 * any type that can't be simply compared or doesn't need need
54 * to be compared should go beyond this point,
55 * ceph_compare_options() should be updated accordingly
56 */
57
58 struct ceph_entity_addr *mon_addr; /* should be the first
59 pointer type of args */
60 int num_mon;
61 char *name;
Tommi Virtanen8323c3a2011-03-25 16:32:57 -070062 struct ceph_crypto_key *key;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070063};
64
65/*
66 * defaults
67 */
Ilya Dryomova319bf52015-05-15 12:02:17 +030068#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
69#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
70#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
Ilya Dryomov58d81b12016-01-21 16:33:15 +010071
Ilya Dryomov168b9092016-01-21 16:33:19 +010072#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
Ilya Dryomov58d81b12016-01-21 16:33:15 +010073#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
74#define CEPH_MONC_PING_TIMEOUT msecs_to_jiffies(30 * 1000)
Ilya Dryomov168b9092016-01-21 16:33:19 +010075#define CEPH_MONC_HUNT_BACKOFF 2
76#define CEPH_MONC_HUNT_MAX_MULT 10
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070077
78#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
Alex Elder7b11ba32013-03-08 18:51:03 -060079#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -070080#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
81
82#define CEPH_AUTH_NAME_DEFAULT "guest"
83
84/*
85 * Delay telling the MDS we no longer want caps, in case we reopen
86 * the file. Delay a minimum amount of time, even if we send a cap
87 * message for some other reason. Otherwise, take the oppotunity to
88 * update the mds to avoid sending another message later.
89 */
90#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
91#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
92
93#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
94
95/* mount state */
96enum {
97 CEPH_MOUNT_MOUNTING,
98 CEPH_MOUNT_MOUNTED,
99 CEPH_MOUNT_UNMOUNTING,
100 CEPH_MOUNT_UNMOUNTED,
101 CEPH_MOUNT_SHUTDOWN,
102};
103
Ilya Dryomova319bf52015-05-15 12:02:17 +0300104static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
105{
106 return timeout ?: MAX_SCHEDULE_TIMEOUT;
107}
108
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700109struct ceph_mds_client;
110
111/*
112 * per client state
113 *
114 * possibly shared by multiple mount points, if they are
115 * mounting the same ceph filesystem/cluster.
116 */
117struct ceph_client {
118 struct ceph_fsid fsid;
119 bool have_fsid;
120
121 void *private;
122
123 struct ceph_options *options;
124
125 struct mutex mount_mutex; /* serialize mount attempts */
126 wait_queue_head_t auth_wq;
127 int auth_err;
128
129 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
130
Ilya Dryomov12b46292013-12-24 21:19:23 +0200131 u64 supported_features;
132 u64 required_features;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700133
Alex Elder15d98822012-05-26 23:26:43 -0500134 struct ceph_messenger msgr; /* messenger instance */
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700135 struct ceph_mon_client monc;
136 struct ceph_osd_client osdc;
137
138#ifdef CONFIG_DEBUG_FS
139 struct dentry *debugfs_dir;
140 struct dentry *debugfs_monmap;
141 struct dentry *debugfs_osdmap;
Ilya Dryomov5cf7bd32015-03-25 21:07:41 +0300142 struct dentry *debugfs_options;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700143#endif
144};
145
Ilya Dryomov859bff52015-10-28 23:50:58 +0100146#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700147
148
149/*
150 * snapshots
151 */
152
153/*
154 * A "snap context" is the set of existing snapshots when we
155 * write data. It is used by the OSD to guide its COW behavior.
156 *
157 * The ceph_snap_context is refcounted, and attached to each dirty
158 * page, indicating which context the dirty data belonged when it was
159 * dirtied.
160 */
161struct ceph_snap_context {
162 atomic_t nref;
163 u64 seq;
Alex Elderaa711ee2012-07-13 20:35:11 -0500164 u32 num_snaps;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700165 u64 snaps[];
166};
167
Alex Elder4f0dcb12013-04-30 00:44:32 -0500168extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
169 gfp_t gfp_flags);
170extern struct ceph_snap_context *ceph_get_snap_context(
171 struct ceph_snap_context *sc);
172extern void ceph_put_snap_context(struct ceph_snap_context *sc);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700173
174/*
175 * calculate the number of pages a given length and offset map onto,
176 * if we align the data.
177 */
178static inline int calc_pages_for(u64 off, u64 len)
179{
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300180 return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
181 (off >> PAGE_SHIFT);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700182}
183
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200184/*
185 * These are not meant to be generic - an integer key is assumed.
186 */
187#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
188static void insert_##name(struct rb_root *root, type *t) \
189{ \
190 struct rb_node **n = &root->rb_node; \
191 struct rb_node *parent = NULL; \
192 \
193 BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \
194 \
195 while (*n) { \
196 type *cur = rb_entry(*n, type, nodefld); \
197 \
198 parent = *n; \
199 if (t->keyfld < cur->keyfld) \
200 n = &(*n)->rb_left; \
201 else if (t->keyfld > cur->keyfld) \
202 n = &(*n)->rb_right; \
203 else \
204 BUG(); \
205 } \
206 \
207 rb_link_node(&t->nodefld, parent, n); \
208 rb_insert_color(&t->nodefld, root); \
209} \
210static void erase_##name(struct rb_root *root, type *t) \
211{ \
212 BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \
213 rb_erase(&t->nodefld, root); \
214 RB_CLEAR_NODE(&t->nodefld); \
215}
216
217#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
Arnd Bergmanna0f2b652016-06-13 15:04:56 +0200218extern type __lookup_##name##_key; \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200219static type *lookup_##name(struct rb_root *root, \
Arnd Bergmanna0f2b652016-06-13 15:04:56 +0200220 typeof(__lookup_##name##_key.keyfld) key) \
Ilya Dryomovfcd00b62016-04-28 16:07:22 +0200221{ \
222 struct rb_node *n = root->rb_node; \
223 \
224 while (n) { \
225 type *cur = rb_entry(n, type, nodefld); \
226 \
227 if (key < cur->keyfld) \
228 n = n->rb_left; \
229 else if (key > cur->keyfld) \
230 n = n->rb_right; \
231 else \
232 return cur; \
233 } \
234 \
235 return NULL; \
236}
237
238#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \
239DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
240DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
241
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200242extern struct kmem_cache *ceph_inode_cachep;
243extern struct kmem_cache *ceph_cap_cachep;
Yan, Zhengf66fd9f2015-06-10 17:26:13 +0800244extern struct kmem_cache *ceph_cap_flush_cachep;
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200245extern struct kmem_cache *ceph_dentry_cachep;
246extern struct kmem_cache *ceph_file_cachep;
247
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700248/* ceph_common.c */
Alex Elder72fe25e2013-01-30 11:13:33 -0600249extern bool libceph_compatible(void *data);
250
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700251extern const char *ceph_msg_type_name(int type);
252extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
Ilya Dryomoveeb0bed2014-01-09 20:08:21 +0200253extern void *ceph_kvmalloc(size_t size, gfp_t flags);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700254
Alex Elderee577412012-01-24 10:08:36 -0600255extern struct ceph_options *ceph_parse_options(char *options,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700256 const char *dev_name, const char *dev_name_end,
257 int (*parse_extra_token)(char *c, void *private),
258 void *private);
Ilya Dryomovff40f9a2015-03-25 21:02:16 +0300259int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700260extern void ceph_destroy_options(struct ceph_options *opt);
261extern int ceph_compare_options(struct ceph_options *new_opt,
262 struct ceph_client *client);
263extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
Sage Weil6ab00d42011-08-09 09:41:59 -0700264 void *private,
Ilya Dryomov12b46292013-12-24 21:19:23 +0200265 u64 supported_features,
266 u64 required_features);
Ilya Dryomov005a07bf2016-08-18 18:38:43 +0200267struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
Ilya Dryomov033268a2016-08-12 14:59:58 +0200268u64 ceph_client_gid(struct ceph_client *client);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700269extern void ceph_destroy_client(struct ceph_client *client);
270extern int __ceph_open_session(struct ceph_client *client,
271 unsigned long started);
272extern int ceph_open_session(struct ceph_client *client);
273
274/* pagevec.c */
275extern void ceph_release_page_vector(struct page **pages, int num_pages);
276
Alex Elderb3248142013-02-06 13:11:38 -0600277extern struct page **ceph_get_direct_page_vector(const void __user *data,
Henry C Changb6aa5902010-12-15 20:45:41 -0800278 int num_pages,
279 bool write_page);
280extern void ceph_put_page_vector(struct page **pages, int num_pages,
281 bool dirty);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700282extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
283extern int ceph_copy_user_to_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600284 const void __user *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700285 loff_t off, size_t len);
Alex Elder903bb322013-02-06 13:11:38 -0600286extern void ceph_copy_to_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600287 const void *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700288 loff_t off, size_t len);
Alex Elder903bb322013-02-06 13:11:38 -0600289extern void ceph_copy_from_page_vector(struct page **pages,
Alex Elderb3248142013-02-06 13:11:38 -0600290 void *data,
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700291 loff_t off, size_t len);
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700292extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
293
294
295#endif /* _FS_CEPH_SUPER_H */