| /* |
| * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> |
| * All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #ifndef __DST_H |
| #define __DST_H |
| |
| #include <linux/types.h> |
| #include <linux/connector.h> |
| |
| #define DST_NAMELEN 32 |
| #define DST_NAME "dst" |
| |
| enum { |
| /* Remove node with given id from storage */ |
| DST_DEL_NODE = 0, |
| /* Add remote node with given id to the storage */ |
| DST_ADD_REMOTE, |
| /* Add local node with given id to the storage to be exported and used by remote peers */ |
| DST_ADD_EXPORT, |
| /* Crypto initialization command (hash/cipher used to protect the connection) */ |
| DST_CRYPTO, |
| /* Security attributes for given connection (permissions for example) */ |
| DST_SECURITY, |
| /* Register given node in the block layer subsystem */ |
| DST_START, |
| DST_CMD_MAX |
| }; |
| |
| struct dst_ctl |
| { |
| /* Storage name */ |
| char name[DST_NAMELEN]; |
| /* Command flags */ |
| __u32 flags; |
| /* Command itself (see above) */ |
| __u32 cmd; |
| /* Maximum number of pages per single request in this device */ |
| __u32 max_pages; |
| /* Stale/error transaction scanning timeout in milliseconds */ |
| __u32 trans_scan_timeout; |
| /* Maximum number of retry sends before completing transaction as broken */ |
| __u32 trans_max_retries; |
| /* Storage size */ |
| __u64 size; |
| }; |
| |
| /* Reply command carries completion status */ |
| struct dst_ctl_ack |
| { |
| struct cn_msg msg; |
| int error; |
| int unused[3]; |
| }; |
| |
| /* |
| * Unfortunaltely socket address structure is not exported to userspace |
| * and is redefined there. |
| */ |
| #define SADDR_MAX_DATA 128 |
| |
| struct saddr { |
| /* address family, AF_xxx */ |
| unsigned short sa_family; |
| /* 14 bytes of protocol address */ |
| char sa_data[SADDR_MAX_DATA]; |
| /* Number of bytes used in sa_data */ |
| unsigned short sa_data_len; |
| }; |
| |
| /* Address structure */ |
| struct dst_network_ctl |
| { |
| /* Socket type: datagram, stream...*/ |
| unsigned int type; |
| /* Let me guess, is it a Jupiter diameter? */ |
| unsigned int proto; |
| /* Peer's address */ |
| struct saddr addr; |
| }; |
| |
| struct dst_crypto_ctl |
| { |
| /* Cipher and hash names */ |
| char cipher_algo[DST_NAMELEN]; |
| char hash_algo[DST_NAMELEN]; |
| |
| /* Key sizes. Can be zero for digest for example */ |
| unsigned int cipher_keysize, hash_keysize; |
| /* Alignment. Calculated by the DST itself. */ |
| unsigned int crypto_attached_size; |
| /* Number of threads to perform crypto operations */ |
| int thread_num; |
| }; |
| |
| /* Export security attributes have this bits checked in when client connects */ |
| #define DST_PERM_READ (1<<0) |
| #define DST_PERM_WRITE (1<<1) |
| |
| /* |
| * Right now it is simple model, where each remote address |
| * is assigned to set of permissions it is allowed to perform. |
| * In real world block device does not know anything but |
| * reading and writing, so it should be more than enough. |
| */ |
| struct dst_secure_user |
| { |
| unsigned int permissions; |
| struct saddr addr; |
| }; |
| |
| /* |
| * Export control command: device to export and network address to accept |
| * clients to work with given device |
| */ |
| struct dst_export_ctl |
| { |
| char device[DST_NAMELEN]; |
| struct dst_network_ctl ctl; |
| }; |
| |
| enum { |
| DST_CFG = 1, /* Request remote configuration */ |
| DST_IO, /* IO command */ |
| DST_IO_RESPONSE, /* IO response */ |
| DST_PING, /* Keepalive message */ |
| DST_NCMD_MAX, |
| }; |
| |
| struct dst_cmd |
| { |
| /* Network command itself, see above */ |
| __u32 cmd; |
| /* |
| * Size of the attached data |
| * (in most cases, for READ command it means how many bytes were requested) |
| */ |
| __u32 size; |
| /* Crypto size: number of attached bytes with digest/hmac */ |
| __u32 csize; |
| /* Here we can carry secret data */ |
| __u32 reserved; |
| /* Read/write bits, see how they are encoded in bio structure */ |
| __u64 rw; |
| /* BIO flags */ |
| __u64 flags; |
| /* Unique command id (like transaction ID) */ |
| __u64 id; |
| /* Sector to start IO from */ |
| __u64 sector; |
| /* Hash data is placed after this header */ |
| __u8 hash[0]; |
| }; |
| |
| /* |
| * Convert command to/from network byte order. |
| * We do not use hton*() functions, since there is |
| * no 64-bit implementation. |
| */ |
| static inline void dst_convert_cmd(struct dst_cmd *c) |
| { |
| c->cmd = __cpu_to_be32(c->cmd); |
| c->csize = __cpu_to_be32(c->csize); |
| c->size = __cpu_to_be32(c->size); |
| c->sector = __cpu_to_be64(c->sector); |
| c->id = __cpu_to_be64(c->id); |
| c->flags = __cpu_to_be64(c->flags); |
| c->rw = __cpu_to_be64(c->rw); |
| } |
| |
| /* Transaction id */ |
| typedef __u64 dst_gen_t; |
| |
| #ifdef __KERNEL__ |
| |
| #include <linux/blkdev.h> |
| #include <linux/bio.h> |
| #include <linux/device.h> |
| #include <linux/mempool.h> |
| #include <linux/net.h> |
| #include <linux/poll.h> |
| #include <linux/rbtree.h> |
| |
| #ifdef CONFIG_DST_DEBUG |
| #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) |
| #else |
| static inline void __attribute__ ((format (printf, 1, 2))) |
| dprintk(const char *fmt, ...) {} |
| #endif |
| |
| struct dst_node; |
| |
| struct dst_trans |
| { |
| /* DST node we are working with */ |
| struct dst_node *n; |
| |
| /* Entry inside transaction tree */ |
| struct rb_node trans_entry; |
| |
| /* Merlin kills this transaction when this memory cell equals zero */ |
| atomic_t refcnt; |
| |
| /* How this transaction should be processed by crypto engine */ |
| short enc; |
| /* How many times this transaction was resent */ |
| short retries; |
| /* Completion status */ |
| int error; |
| |
| /* When did we send it to the remote peer */ |
| long send_time; |
| |
| /* My name is... |
| * Well, computers does not speak, they have unique id instead */ |
| dst_gen_t gen; |
| |
| /* Block IO we are working with */ |
| struct bio *bio; |
| |
| /* Network command for above block IO request */ |
| struct dst_cmd cmd; |
| }; |
| |
| struct dst_crypto_engine |
| { |
| /* What should we do with all block requests */ |
| struct crypto_hash *hash; |
| struct crypto_ablkcipher *cipher; |
| |
| /* Pool of pages used to encrypt data into before sending */ |
| int page_num; |
| struct page **pages; |
| |
| /* What to do with current request */ |
| int enc; |
| /* Who we are and where do we go */ |
| struct scatterlist *src, *dst; |
| |
| /* Maximum timeout waiting for encryption to be completed */ |
| long timeout; |
| /* IV is a 64-bit sequential counter */ |
| u64 iv; |
| |
| /* Secret data */ |
| void *private; |
| |
| /* Cached temporary data lives here */ |
| int size; |
| void *data; |
| }; |
| |
| struct dst_state |
| { |
| /* The main state protection */ |
| struct mutex state_lock; |
| |
| /* Polling machinery for sockets */ |
| wait_queue_t wait; |
| wait_queue_head_t *whead; |
| /* Most of events are being waited here */ |
| wait_queue_head_t thread_wait; |
| |
| /* Who owns this? */ |
| struct dst_node *node; |
| |
| /* Network address for this state */ |
| struct dst_network_ctl ctl; |
| |
| /* Permissions to work with: read-only or rw connection */ |
| u32 permissions; |
| |
| /* Called when we need to clean private data */ |
| void (* cleanup)(struct dst_state *st); |
| |
| /* Used by the server: BIO completion queues BIOs here */ |
| struct list_head request_list; |
| spinlock_t request_lock; |
| |
| /* Guess what? No, it is not number of planets */ |
| atomic_t refcnt; |
| |
| /* This flags is set when connection should be dropped */ |
| int need_exit; |
| |
| /* |
| * Socket to work with. Second pointer is used for |
| * lockless check if socket was changed before performing |
| * next action (like working with cached polling result) |
| */ |
| struct socket *socket, *read_socket; |
| |
| /* Cached preallocated data */ |
| void *data; |
| unsigned int size; |
| |
| /* Currently processed command */ |
| struct dst_cmd cmd; |
| }; |
| |
| struct dst_info |
| { |
| /* Device size */ |
| u64 size; |
| |
| /* Local device name for export devices */ |
| char local[DST_NAMELEN]; |
| |
| /* Network setup */ |
| struct dst_network_ctl net; |
| |
| /* Sysfs bits use this */ |
| struct device device; |
| }; |
| |
| struct dst_node |
| { |
| struct list_head node_entry; |
| |
| /* Hi, my name is stored here */ |
| char name[DST_NAMELEN]; |
| /* My cache name is stored here */ |
| char cache_name[DST_NAMELEN]; |
| |
| /* Block device attached to given node. |
| * Only valid for exporting nodes */ |
| struct block_device *bdev; |
| /* Network state machine for given peer */ |
| struct dst_state *state; |
| |
| /* Block IO machinery */ |
| struct request_queue *queue; |
| struct gendisk *disk; |
| |
| /* Number of threads in processing pool */ |
| int thread_num; |
| /* Maximum number of pages in single IO */ |
| int max_pages; |
| |
| /* I'm that big in bytes */ |
| loff_t size; |
| |
| /* Exported to userspace node information */ |
| struct dst_info *info; |
| |
| /* |
| * Security attribute list. |
| * Used only by exporting node currently. |
| */ |
| struct list_head security_list; |
| struct mutex security_lock; |
| |
| /* |
| * When this unerflows below zero, university collapses. |
| * But this will not happen, since node will be freed, |
| * when reference counter reaches zero. |
| */ |
| atomic_t refcnt; |
| |
| /* How precisely should I be started? */ |
| int (*start)(struct dst_node *); |
| |
| /* Crypto capabilities */ |
| struct dst_crypto_ctl crypto; |
| u8 *hash_key; |
| u8 *cipher_key; |
| |
| /* Pool of processing thread */ |
| struct thread_pool *pool; |
| |
| /* Transaction IDs live here */ |
| atomic_long_t gen; |
| |
| /* |
| * How frequently and how many times transaction |
| * tree should be scanned to drop stale objects. |
| */ |
| long trans_scan_timeout; |
| int trans_max_retries; |
| |
| /* Small gnomes live here */ |
| struct rb_root trans_root; |
| struct mutex trans_lock; |
| |
| /* |
| * Transaction cache/memory pool. |
| * It is big enough to contain not only transaction |
| * itself, but additional crypto data (digest/hmac). |
| */ |
| struct kmem_cache *trans_cache; |
| mempool_t *trans_pool; |
| |
| /* This entity scans transaction tree */ |
| struct delayed_work trans_work; |
| |
| wait_queue_head_t wait; |
| }; |
| |
| /* Kernel representation of the security attribute */ |
| struct dst_secure |
| { |
| struct list_head sec_entry; |
| struct dst_secure_user sec; |
| }; |
| |
| int dst_process_bio(struct dst_node *n, struct bio *bio); |
| |
| int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); |
| int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); |
| |
| static inline struct dst_state *dst_state_get(struct dst_state *st) |
| { |
| BUG_ON(atomic_read(&st->refcnt) == 0); |
| atomic_inc(&st->refcnt); |
| return st; |
| } |
| |
| void dst_state_put(struct dst_state *st); |
| |
| struct dst_state *dst_state_alloc(struct dst_node *n); |
| int dst_state_socket_create(struct dst_state *st); |
| void dst_state_socket_release(struct dst_state *st); |
| |
| void dst_state_exit_connected(struct dst_state *st); |
| |
| int dst_state_schedule_receiver(struct dst_state *st); |
| |
| void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); |
| |
| static inline void dst_state_lock(struct dst_state *st) |
| { |
| mutex_lock(&st->state_lock); |
| } |
| |
| static inline void dst_state_unlock(struct dst_state *st) |
| { |
| mutex_unlock(&st->state_lock); |
| } |
| |
| void dst_poll_exit(struct dst_state *st); |
| int dst_poll_init(struct dst_state *st); |
| |
| static inline unsigned int dst_state_poll(struct dst_state *st) |
| { |
| unsigned int revents = POLLHUP | POLLERR; |
| |
| dst_state_lock(st); |
| if (st->socket) |
| revents = st->socket->ops->poll(NULL, st->socket, NULL); |
| dst_state_unlock(st); |
| |
| return revents; |
| } |
| |
| static inline int dst_thread_setup(void *private, void *data) |
| { |
| return 0; |
| } |
| |
| void dst_node_put(struct dst_node *n); |
| |
| static inline struct dst_node *dst_node_get(struct dst_node *n) |
| { |
| atomic_inc(&n->refcnt); |
| return n; |
| } |
| |
| int dst_data_recv(struct dst_state *st, void *data, unsigned int size); |
| int dst_recv_cdata(struct dst_state *st, void *cdata); |
| int dst_data_send_header(struct socket *sock, |
| void *data, unsigned int size, int more); |
| |
| int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); |
| |
| int dst_process_io(struct dst_state *st); |
| int dst_export_crypto(struct dst_node *n, struct bio *bio); |
| int dst_export_send_bio(struct bio *bio); |
| int dst_start_export(struct dst_node *n); |
| |
| int __init dst_export_init(void); |
| void dst_export_exit(void); |
| |
| /* Private structure for export block IO requests */ |
| struct dst_export_priv |
| { |
| struct list_head request_entry; |
| struct dst_state *state; |
| struct bio *bio; |
| struct dst_cmd cmd; |
| }; |
| |
| static inline void dst_trans_get(struct dst_trans *t) |
| { |
| atomic_inc(&t->refcnt); |
| } |
| |
| struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); |
| int dst_trans_remove(struct dst_trans *t); |
| int dst_trans_remove_nolock(struct dst_trans *t); |
| void dst_trans_put(struct dst_trans *t); |
| |
| /* |
| * Convert bio into network command. |
| */ |
| static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, |
| u32 command, u64 id) |
| { |
| cmd->cmd = command; |
| cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; |
| cmd->rw = bio->bi_rw; |
| cmd->size = bio->bi_size; |
| cmd->csize = 0; |
| cmd->id = id; |
| cmd->sector = bio->bi_sector; |
| }; |
| |
| int dst_trans_send(struct dst_trans *t); |
| int dst_trans_crypto(struct dst_trans *t); |
| |
| int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); |
| void dst_node_crypto_exit(struct dst_node *n); |
| |
| static inline int dst_need_crypto(struct dst_node *n) |
| { |
| struct dst_crypto_ctl *c = &n->crypto; |
| /* |
| * Logical OR is appropriate here, but boolean one produces |
| * more optimal code, so it is used instead. |
| */ |
| return (c->hash_algo[0] | c->cipher_algo[0]); |
| } |
| |
| int dst_node_trans_init(struct dst_node *n, unsigned int size); |
| void dst_node_trans_exit(struct dst_node *n); |
| |
| /* |
| * Pool of threads. |
| * Ready list contains threads currently free to be used, |
| * active one contains threads with some work scheduled for them. |
| * Caller can wait in given queue when thread is ready. |
| */ |
| struct thread_pool |
| { |
| int thread_num; |
| struct mutex thread_lock; |
| struct list_head ready_list, active_list; |
| |
| wait_queue_head_t wait; |
| }; |
| |
| void thread_pool_del_worker(struct thread_pool *p); |
| void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); |
| int thread_pool_add_worker(struct thread_pool *p, |
| char *name, |
| unsigned int id, |
| void *(* init)(void *data), |
| void (* cleanup)(void *data), |
| void *data); |
| |
| void thread_pool_destroy(struct thread_pool *p); |
| struct thread_pool *thread_pool_create(int num, char *name, |
| void *(* init)(void *data), |
| void (* cleanup)(void *data), |
| void *data); |
| |
| int thread_pool_schedule(struct thread_pool *p, |
| int (* setup)(void *stored_private, void *setup_data), |
| int (* action)(void *stored_private, void *setup_data), |
| void *setup_data, long timeout); |
| int thread_pool_schedule_private(struct thread_pool *p, |
| int (* setup)(void *private, void *data), |
| int (* action)(void *private, void *data), |
| void *data, long timeout, void *id); |
| |
| #endif /* __KERNEL__ */ |
| #endif /* __DST_H */ |