blob: 4a16829969a68b9560f0c63ff6ef175886683458 [file] [log] [blame]
JP Abgrall053e3102011-06-20 12:41:46 -07001/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14 */
15#define DEBUG
16
17#include <linux/file.h>
18#include <linux/inetdevice.h>
19#include <linux/module.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_qtaguid.h>
JP Abgrall41c27d82013-04-08 15:09:26 -070022#include <linux/ratelimit.h>
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -070023#include <linux/seq_file.h>
JP Abgrall053e3102011-06-20 12:41:46 -070024#include <linux/skbuff.h>
25#include <linux/workqueue.h>
26#include <net/addrconf.h>
27#include <net/sock.h>
28#include <net/tcp.h>
29#include <net/udp.h>
30
JP Abgrall6dc87f72012-04-17 16:00:07 -070031#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
32#include <linux/netfilter_ipv6/ip6_tables.h>
33#endif
34
JP Abgrall053e3102011-06-20 12:41:46 -070035#include <linux/netfilter/xt_socket.h>
36#include "xt_qtaguid_internal.h"
37#include "xt_qtaguid_print.h"
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -070038#include "../../fs/proc/internal.h"
JP Abgrall053e3102011-06-20 12:41:46 -070039
40/*
41 * We only use the xt_socket funcs within a similar context to avoid unexpected
42 * return values.
43 */
44#define XT_SOCKET_SUPPORTED_HOOKS \
45 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
46
47
48static const char *module_procdirname = "xt_qtaguid";
49static struct proc_dir_entry *xt_qtaguid_procdir;
50
51static unsigned int proc_iface_perms = S_IRUGO;
52module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
53
54static struct proc_dir_entry *xt_qtaguid_stats_file;
55static unsigned int proc_stats_perms = S_IRUGO;
56module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
57
58static struct proc_dir_entry *xt_qtaguid_ctrl_file;
JP Abgrall7b634222013-01-04 18:18:36 -080059
60/* Everybody can write. But proc_ctrl_write_limited is true by default which
61 * limits what can be controlled. See the can_*() functions.
62 */
JP Abgrall053e3102011-06-20 12:41:46 -070063static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
JP Abgrall053e3102011-06-20 12:41:46 -070064module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
65
JP Abgrall7b634222013-01-04 18:18:36 -080066/* Limited by default, so the gid of the ctrl and stats proc entries
67 * will limit what can be done. See the can_*() functions.
68 */
69static bool proc_stats_readall_limited = true;
70static bool proc_ctrl_write_limited = true;
71
72module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
JP Abgrall053e3102011-06-20 12:41:46 -070073 S_IRUGO | S_IWUSR);
JP Abgrall7b634222013-01-04 18:18:36 -080074module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
JP Abgrall053e3102011-06-20 12:41:46 -070075 S_IRUGO | S_IWUSR);
76
77/*
78 * Limit the number of active tags (via socket tags) for a given UID.
79 * Multiple processes could share the UID.
80 */
81static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
83
84/*
85 * After the kernel has initiallized this module, it is still possible
86 * to make it passive.
87 * Setting passive to Y:
88 * - the iface stats handling will not act on notifications.
89 * - iptables matches will never match.
90 * - ctrl commands silently succeed.
91 * - stats are always empty.
92 * This is mostly usefull when a bug is suspected.
93 */
94static bool module_passive;
95module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
96
97/*
98 * Control how qtaguid data is tracked per proc/uid.
99 * Setting tag_tracking_passive to Y:
100 * - don't create proc specific structs to track tags
101 * - don't check that active tag stats exceed some limits.
102 * - don't clean up socket tags on process exits.
103 * This is mostly usefull when a bug is suspected.
104 */
105static bool qtu_proc_handling_passive;
106module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
107 S_IRUGO | S_IWUSR);
108
109#define QTU_DEV_NAME "xt_qtaguid"
110
111uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
113
114/*---------------------------------------------------------------------------*/
115static const char *iface_stat_procdirname = "iface_stat";
116static struct proc_dir_entry *iface_stat_procdir;
JP Abgrallcb5e8772012-04-27 12:57:39 -0700117/*
118 * The iface_stat_all* will go away once userspace gets use to the new fields
119 * that have a format line.
120 */
JP Abgrall053e3102011-06-20 12:41:46 -0700121static const char *iface_stat_all_procfilename = "iface_stat_all";
122static struct proc_dir_entry *iface_stat_all_procfile;
JP Abgrallcb5e8772012-04-27 12:57:39 -0700123static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
124static struct proc_dir_entry *iface_stat_fmt_procfile;
125
JP Abgrall053e3102011-06-20 12:41:46 -0700126
JP Abgrall053e3102011-06-20 12:41:46 -0700127static LIST_HEAD(iface_stat_list);
128static DEFINE_SPINLOCK(iface_stat_list_lock);
129
130static struct rb_root sock_tag_tree = RB_ROOT;
131static DEFINE_SPINLOCK(sock_tag_list_lock);
132
133static struct rb_root tag_counter_set_tree = RB_ROOT;
134static DEFINE_SPINLOCK(tag_counter_set_list_lock);
135
136static struct rb_root uid_tag_data_tree = RB_ROOT;
137static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
138
139static struct rb_root proc_qtu_data_tree = RB_ROOT;
140/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
141
142static struct qtaguid_event_counts qtu_events;
143/*----------------------------------------------*/
144static bool can_manipulate_uids(void)
145{
146 /* root pwnd */
JP Abgrall7b634222013-01-04 18:18:36 -0800147 return in_egroup_p(xt_qtaguid_ctrl_file->gid)
148 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
149 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
JP Abgrall053e3102011-06-20 12:41:46 -0700150}
151
152static bool can_impersonate_uid(uid_t uid)
153{
154 return uid == current_fsuid() || can_manipulate_uids();
155}
156
157static bool can_read_other_uid_stats(uid_t uid)
158{
159 /* root pwnd */
JP Abgrall7b634222013-01-04 18:18:36 -0800160 return in_egroup_p(xt_qtaguid_stats_file->gid)
161 || unlikely(!current_fsuid()) || uid == current_fsuid()
162 || unlikely(!proc_stats_readall_limited)
163 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
JP Abgrall053e3102011-06-20 12:41:46 -0700164}
165
166static inline void dc_add_byte_packets(struct data_counters *counters, int set,
167 enum ifs_tx_rx direction,
168 enum ifs_proto ifs_proto,
169 int bytes,
170 int packets)
171{
172 counters->bpc[set][direction][ifs_proto].bytes += bytes;
173 counters->bpc[set][direction][ifs_proto].packets += packets;
174}
175
JP Abgrall053e3102011-06-20 12:41:46 -0700176static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
177{
178 struct rb_node *node = root->rb_node;
179
180 while (node) {
181 struct tag_node *data = rb_entry(node, struct tag_node, node);
182 int result;
183 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
184 " node=%p data=%p\n", tag, node, data);
185 result = tag_compare(tag, data->tag);
186 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
187 " data.tag=0x%llx (uid=%u) res=%d\n",
188 tag, data->tag, get_uid_from_tag(data->tag), result);
189 if (result < 0)
190 node = node->rb_left;
191 else if (result > 0)
192 node = node->rb_right;
193 else
194 return data;
195 }
196 return NULL;
197}
198
199static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
200{
201 struct rb_node **new = &(root->rb_node), *parent = NULL;
202
203 /* Figure out where to put new node */
204 while (*new) {
205 struct tag_node *this = rb_entry(*new, struct tag_node,
206 node);
207 int result = tag_compare(data->tag, this->tag);
208 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
209 " (uid=%u)\n", __func__,
210 this->tag,
211 get_uid_from_tag(this->tag));
212 parent = *new;
213 if (result < 0)
214 new = &((*new)->rb_left);
215 else if (result > 0)
216 new = &((*new)->rb_right);
217 else
218 BUG();
219 }
220
221 /* Add new node and rebalance tree. */
222 rb_link_node(&data->node, parent, new);
223 rb_insert_color(&data->node, root);
224}
225
226static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
227{
228 tag_node_tree_insert(&data->tn, root);
229}
230
231static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
232{
233 struct tag_node *node = tag_node_tree_search(root, tag);
234 if (!node)
235 return NULL;
236 return rb_entry(&node->node, struct tag_stat, tn.node);
237}
238
239static void tag_counter_set_tree_insert(struct tag_counter_set *data,
240 struct rb_root *root)
241{
242 tag_node_tree_insert(&data->tn, root);
243}
244
245static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
246 tag_t tag)
247{
248 struct tag_node *node = tag_node_tree_search(root, tag);
249 if (!node)
250 return NULL;
251 return rb_entry(&node->node, struct tag_counter_set, tn.node);
252
253}
254
255static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
256{
257 tag_node_tree_insert(&data->tn, root);
258}
259
260static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
261{
262 struct tag_node *node = tag_node_tree_search(root, tag);
263 if (!node)
264 return NULL;
265 return rb_entry(&node->node, struct tag_ref, tn.node);
266}
267
268static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
269 const struct sock *sk)
270{
271 struct rb_node *node = root->rb_node;
272
273 while (node) {
274 struct sock_tag *data = rb_entry(node, struct sock_tag,
275 sock_node);
276 if (sk < data->sk)
277 node = node->rb_left;
278 else if (sk > data->sk)
279 node = node->rb_right;
280 else
281 return data;
282 }
283 return NULL;
284}
285
286static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
287{
288 struct rb_node **new = &(root->rb_node), *parent = NULL;
289
290 /* Figure out where to put new node */
291 while (*new) {
292 struct sock_tag *this = rb_entry(*new, struct sock_tag,
293 sock_node);
294 parent = *new;
295 if (data->sk < this->sk)
296 new = &((*new)->rb_left);
297 else if (data->sk > this->sk)
298 new = &((*new)->rb_right);
299 else
300 BUG();
301 }
302
303 /* Add new node and rebalance tree. */
304 rb_link_node(&data->sock_node, parent, new);
305 rb_insert_color(&data->sock_node, root);
306}
307
308static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
309{
310 struct rb_node *node;
311 struct sock_tag *st_entry;
312
313 node = rb_first(st_to_free_tree);
314 while (node) {
315 st_entry = rb_entry(node, struct sock_tag, sock_node);
316 node = rb_next(node);
317 CT_DEBUG("qtaguid: %s(): "
318 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
319 st_entry->sk,
320 st_entry->tag,
321 get_uid_from_tag(st_entry->tag));
322 rb_erase(&st_entry->sock_node, st_to_free_tree);
323 sockfd_put(st_entry->socket);
324 kfree(st_entry);
325 }
326}
327
328static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
329 const pid_t pid)
330{
331 struct rb_node *node = root->rb_node;
332
333 while (node) {
334 struct proc_qtu_data *data = rb_entry(node,
335 struct proc_qtu_data,
336 node);
337 if (pid < data->pid)
338 node = node->rb_left;
339 else if (pid > data->pid)
340 node = node->rb_right;
341 else
342 return data;
343 }
344 return NULL;
345}
346
347static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
348 struct rb_root *root)
349{
350 struct rb_node **new = &(root->rb_node), *parent = NULL;
351
352 /* Figure out where to put new node */
353 while (*new) {
354 struct proc_qtu_data *this = rb_entry(*new,
355 struct proc_qtu_data,
356 node);
357 parent = *new;
358 if (data->pid < this->pid)
359 new = &((*new)->rb_left);
360 else if (data->pid > this->pid)
361 new = &((*new)->rb_right);
362 else
363 BUG();
364 }
365
366 /* Add new node and rebalance tree. */
367 rb_link_node(&data->node, parent, new);
368 rb_insert_color(&data->node, root);
369}
370
371static void uid_tag_data_tree_insert(struct uid_tag_data *data,
372 struct rb_root *root)
373{
374 struct rb_node **new = &(root->rb_node), *parent = NULL;
375
376 /* Figure out where to put new node */
377 while (*new) {
378 struct uid_tag_data *this = rb_entry(*new,
379 struct uid_tag_data,
380 node);
381 parent = *new;
382 if (data->uid < this->uid)
383 new = &((*new)->rb_left);
384 else if (data->uid > this->uid)
385 new = &((*new)->rb_right);
386 else
387 BUG();
388 }
389
390 /* Add new node and rebalance tree. */
391 rb_link_node(&data->node, parent, new);
392 rb_insert_color(&data->node, root);
393}
394
395static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
396 uid_t uid)
397{
398 struct rb_node *node = root->rb_node;
399
400 while (node) {
401 struct uid_tag_data *data = rb_entry(node,
402 struct uid_tag_data,
403 node);
404 if (uid < data->uid)
405 node = node->rb_left;
406 else if (uid > data->uid)
407 node = node->rb_right;
408 else
409 return data;
410 }
411 return NULL;
412}
413
414/*
415 * Allocates a new uid_tag_data struct if needed.
416 * Returns a pointer to the found or allocated uid_tag_data.
417 * Returns a PTR_ERR on failures, and lock is not held.
418 * If found is not NULL:
419 * sets *found to true if not allocated.
420 * sets *found to false if allocated.
421 */
422struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
423{
424 struct uid_tag_data *utd_entry;
425
426 /* Look for top level uid_tag_data for the UID */
427 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
428 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
429
430 if (found_res)
431 *found_res = utd_entry;
432 if (utd_entry)
433 return utd_entry;
434
435 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
436 if (!utd_entry) {
437 pr_err("qtaguid: get_uid_data(%u): "
438 "tag data alloc failed\n", uid);
439 return ERR_PTR(-ENOMEM);
440 }
441
442 utd_entry->uid = uid;
443 utd_entry->tag_ref_tree = RB_ROOT;
444 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
445 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
446 return utd_entry;
447}
448
449/* Never returns NULL. Either PTR_ERR or a valid ptr. */
450static struct tag_ref *new_tag_ref(tag_t new_tag,
451 struct uid_tag_data *utd_entry)
452{
453 struct tag_ref *tr_entry;
454 int res;
455
456 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
457 pr_info("qtaguid: new_tag_ref(0x%llx): "
458 "tag ref alloc quota exceeded. max=%d\n",
459 new_tag, max_sock_tags);
460 res = -EMFILE;
461 goto err_res;
462
463 }
464
465 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
466 if (!tr_entry) {
467 pr_err("qtaguid: new_tag_ref(0x%llx): "
468 "tag ref alloc failed\n",
469 new_tag);
470 res = -ENOMEM;
471 goto err_res;
472 }
473 tr_entry->tn.tag = new_tag;
474 /* tr_entry->num_sock_tags handled by caller */
475 utd_entry->num_active_tags++;
476 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
477 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
478 " inserted new tag ref %p\n",
479 new_tag, tr_entry);
480 return tr_entry;
481
482err_res:
483 return ERR_PTR(res);
484}
485
486static struct tag_ref *lookup_tag_ref(tag_t full_tag,
487 struct uid_tag_data **utd_res)
488{
489 struct uid_tag_data *utd_entry;
490 struct tag_ref *tr_entry;
491 bool found_utd;
492 uid_t uid = get_uid_from_tag(full_tag);
493
494 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
495 full_tag, uid);
496
497 utd_entry = get_uid_data(uid, &found_utd);
498 if (IS_ERR_OR_NULL(utd_entry)) {
499 if (utd_res)
500 *utd_res = utd_entry;
501 return NULL;
502 }
503
504 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
505 if (utd_res)
506 *utd_res = utd_entry;
507 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
508 full_tag, utd_entry, tr_entry);
509 return tr_entry;
510}
511
512/* Never returns NULL. Either PTR_ERR or a valid ptr. */
513static struct tag_ref *get_tag_ref(tag_t full_tag,
514 struct uid_tag_data **utd_res)
515{
516 struct uid_tag_data *utd_entry;
517 struct tag_ref *tr_entry;
518
519 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
520 full_tag);
521 spin_lock_bh(&uid_tag_data_tree_lock);
522 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
523 BUG_ON(IS_ERR_OR_NULL(utd_entry));
524 if (!tr_entry)
525 tr_entry = new_tag_ref(full_tag, utd_entry);
526
527 spin_unlock_bh(&uid_tag_data_tree_lock);
528 if (utd_res)
529 *utd_res = utd_entry;
530 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
531 full_tag, utd_entry, tr_entry);
532 return tr_entry;
533}
534
535/* Checks and maybe frees the UID Tag Data entry */
536static void put_utd_entry(struct uid_tag_data *utd_entry)
537{
538 /* Are we done with the UID tag data entry? */
539 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
540 !utd_entry->num_pqd) {
541 DR_DEBUG("qtaguid: %s(): "
542 "erase utd_entry=%p uid=%u "
543 "by pid=%u tgid=%u uid=%u\n", __func__,
544 utd_entry, utd_entry->uid,
545 current->pid, current->tgid, current_fsuid());
546 BUG_ON(utd_entry->num_active_tags);
547 rb_erase(&utd_entry->node, &uid_tag_data_tree);
548 kfree(utd_entry);
549 } else {
550 DR_DEBUG("qtaguid: %s(): "
551 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
552 __func__, utd_entry, utd_entry->num_active_tags,
553 utd_entry->num_pqd);
554 BUG_ON(!(utd_entry->num_active_tags ||
555 utd_entry->num_pqd));
556 }
557}
558
559/*
560 * If no sock_tags are using this tag_ref,
561 * decrements refcount of utd_entry, removes tr_entry
562 * from utd_entry->tag_ref_tree and frees.
563 */
564static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
565 struct uid_tag_data *utd_entry)
566{
567 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
568 tr_entry, tr_entry->tn.tag,
569 get_uid_from_tag(tr_entry->tn.tag));
570 if (!tr_entry->num_sock_tags) {
571 BUG_ON(!utd_entry->num_active_tags);
572 utd_entry->num_active_tags--;
573 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
574 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
575 kfree(tr_entry);
576 }
577}
578
579static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
580{
581 struct rb_node *node;
582 struct tag_ref *tr_entry;
583 tag_t acct_tag;
584
585 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
586 full_tag, get_uid_from_tag(full_tag));
587 acct_tag = get_atag_from_tag(full_tag);
588 node = rb_first(&utd_entry->tag_ref_tree);
589 while (node) {
590 tr_entry = rb_entry(node, struct tag_ref, tn.node);
591 node = rb_next(node);
592 if (!acct_tag || tr_entry->tn.tag == full_tag)
593 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
594 }
595}
596
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700597static int read_proc_u64(struct file *file, char __user *buf,
598 size_t size, loff_t *ppos)
JP Abgrall053e3102011-06-20 12:41:46 -0700599{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700600 uint64_t *valuep = PDE_DATA(file_inode(file));
601 char tmp[24];
602 size_t tmp_size;
JP Abgrall053e3102011-06-20 12:41:46 -0700603
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700604 tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
605 return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
JP Abgrall053e3102011-06-20 12:41:46 -0700606}
607
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700608static int read_proc_bool(struct file *file, char __user *buf,
609 size_t size, loff_t *ppos)
JP Abgrall053e3102011-06-20 12:41:46 -0700610{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700611 bool *valuep = PDE_DATA(file_inode(file));
612 char tmp[24];
613 size_t tmp_size;
JP Abgrall053e3102011-06-20 12:41:46 -0700614
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700615 tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
616 return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
JP Abgrall053e3102011-06-20 12:41:46 -0700617}
618
619static int get_active_counter_set(tag_t tag)
620{
621 int active_set = 0;
622 struct tag_counter_set *tcs;
623
624 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
625 " (uid=%u)\n",
626 tag, get_uid_from_tag(tag));
627 /* For now we only handle UID tags for active sets */
628 tag = get_utag_from_tag(tag);
629 spin_lock_bh(&tag_counter_set_list_lock);
630 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
631 if (tcs)
632 active_set = tcs->active_set;
633 spin_unlock_bh(&tag_counter_set_list_lock);
634 return active_set;
635}
636
637/*
638 * Find the entry for tracking the specified interface.
639 * Caller must hold iface_stat_list_lock
640 */
641static struct iface_stat *get_iface_entry(const char *ifname)
642{
643 struct iface_stat *iface_entry;
644
645 /* Find the entry for tracking the specified tag within the interface */
646 if (ifname == NULL) {
647 pr_info("qtaguid: iface_stat: get() NULL device name\n");
648 return NULL;
649 }
650
651 /* Iterate over interfaces */
652 list_for_each_entry(iface_entry, &iface_stat_list, list) {
653 if (!strcmp(ifname, iface_entry->ifname))
654 goto done;
655 }
656 iface_entry = NULL;
657done:
658 return iface_entry;
659}
660
JP Abgrallcd44bc92013-01-28 16:50:44 -0800661/* This is for fmt2 only */
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700662static void pp_iface_stat_header(struct seq_file *m)
JP Abgrallcd44bc92013-01-28 16:50:44 -0800663{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700664 seq_puts(m,
665 "ifname "
666 "total_skb_rx_bytes total_skb_rx_packets "
667 "total_skb_tx_bytes total_skb_tx_packets "
668 "rx_tcp_bytes rx_tcp_packets "
669 "rx_udp_bytes rx_udp_packets "
670 "rx_other_bytes rx_other_packets "
671 "tx_tcp_bytes tx_tcp_packets "
672 "tx_udp_bytes tx_udp_packets "
673 "tx_other_bytes tx_other_packets\n"
674 );
JP Abgrallcd44bc92013-01-28 16:50:44 -0800675}
676
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700677static void pp_iface_stat_line(struct seq_file *m,
678 struct iface_stat *iface_entry)
JP Abgrall053e3102011-06-20 12:41:46 -0700679{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700680 struct data_counters *cnts;
681 int cnt_set = 0; /* We only use one set for the device */
682 cnts = &iface_entry->totals_via_skb;
683 seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
684 "%llu %llu %llu %llu %llu %llu %llu %llu\n",
685 iface_entry->ifname,
686 dc_sum_bytes(cnts, cnt_set, IFS_RX),
687 dc_sum_packets(cnts, cnt_set, IFS_RX),
688 dc_sum_bytes(cnts, cnt_set, IFS_TX),
689 dc_sum_packets(cnts, cnt_set, IFS_TX),
690 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
691 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
692 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
693 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
694 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
695 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
696 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
697 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
698 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
699 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
700 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
701 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
702}
JP Abgrall053e3102011-06-20 12:41:46 -0700703
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700704struct proc_iface_stat_fmt_info {
705 int fmt;
706};
JP Abgrall053e3102011-06-20 12:41:46 -0700707
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700708static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
709{
710 struct proc_iface_stat_fmt_info *p = m->private;
711 loff_t n = *pos;
JP Abgrallcb5e8772012-04-27 12:57:39 -0700712
JP Abgrall053e3102011-06-20 12:41:46 -0700713 /*
714 * This lock will prevent iface_stat_update() from changing active,
715 * and in turn prevent an interface from unregistering itself.
716 */
717 spin_lock_bh(&iface_stat_list_lock);
JP Abgrall053e3102011-06-20 12:41:46 -0700718
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700719 if (unlikely(module_passive))
720 return NULL;
JP Abgrall053e3102011-06-20 12:41:46 -0700721
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700722 if (!n && p->fmt == 2)
723 pp_iface_stat_header(m);
724
725 return seq_list_start(&iface_stat_list, n);
JP Abgrall053e3102011-06-20 12:41:46 -0700726}
727
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700728static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
729{
730 return seq_list_next(p, &iface_stat_list, pos);
731}
732
733static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
734{
735 spin_unlock_bh(&iface_stat_list_lock);
736}
737
738static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
739{
740 struct proc_iface_stat_fmt_info *p = m->private;
741 struct iface_stat *iface_entry;
742 struct rtnl_link_stats64 dev_stats, *stats;
743 struct rtnl_link_stats64 no_dev_stats = {0};
744
745
746 CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
747 current->pid, current->tgid, current_fsuid());
748
749 iface_entry = list_entry(v, struct iface_stat, list);
750
751 if (iface_entry->active) {
752 stats = dev_get_stats(iface_entry->net_dev,
753 &dev_stats);
754 } else {
755 stats = &no_dev_stats;
756 }
757 /*
758 * If the meaning of the data changes, then update the fmtX
759 * string.
760 */
761 if (p->fmt == 1) {
762 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
763 iface_entry->ifname,
764 iface_entry->active,
765 iface_entry->totals_via_dev[IFS_RX].bytes,
766 iface_entry->totals_via_dev[IFS_RX].packets,
767 iface_entry->totals_via_dev[IFS_TX].bytes,
768 iface_entry->totals_via_dev[IFS_TX].packets,
769 stats->rx_bytes, stats->rx_packets,
770 stats->tx_bytes, stats->tx_packets
771 );
772 } else {
773 pp_iface_stat_line(m, iface_entry);
774 }
775 return 0;
776}
777
778static const struct file_operations read_u64_fops = {
779 .read = read_proc_u64,
780 .llseek = default_llseek,
781};
782
783static const struct file_operations read_bool_fops = {
784 .read = read_proc_bool,
785 .llseek = default_llseek,
786};
787
JP Abgrall053e3102011-06-20 12:41:46 -0700788static void iface_create_proc_worker(struct work_struct *work)
789{
790 struct proc_dir_entry *proc_entry;
791 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
792 iface_work);
793 struct iface_stat *new_iface = isw->iface_entry;
794
795 /* iface_entries are not deleted, so safe to manipulate. */
796 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
797 if (IS_ERR_OR_NULL(proc_entry)) {
798 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
799 kfree(isw);
800 return;
801 }
802
803 new_iface->proc_ptr = proc_entry;
804
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -0700805 proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
806 &read_u64_fops,
807 &new_iface->totals_via_dev[IFS_TX].bytes);
808 proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
809 &read_u64_fops,
810 &new_iface->totals_via_dev[IFS_RX].bytes);
811 proc_create_data("tx_packets", proc_iface_perms, proc_entry,
812 &read_u64_fops,
813 &new_iface->totals_via_dev[IFS_TX].packets);
814 proc_create_data("rx_packets", proc_iface_perms, proc_entry,
815 &read_u64_fops,
816 &new_iface->totals_via_dev[IFS_RX].packets);
817 proc_create_data("active", proc_iface_perms, proc_entry,
818 &read_bool_fops, &new_iface->active);
JP Abgrall053e3102011-06-20 12:41:46 -0700819
820 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
821 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
822 kfree(isw);
823}
824
825/*
826 * Will set the entry's active state, and
827 * update the net_dev accordingly also.
828 */
829static void _iface_stat_set_active(struct iface_stat *entry,
830 struct net_device *net_dev,
831 bool activate)
832{
833 if (activate) {
834 entry->net_dev = net_dev;
835 entry->active = true;
836 IF_DEBUG("qtaguid: %s(%s): "
837 "enable tracking. rfcnt=%d\n", __func__,
838 entry->ifname,
839 __this_cpu_read(*net_dev->pcpu_refcnt));
840 } else {
841 entry->active = false;
842 entry->net_dev = NULL;
843 IF_DEBUG("qtaguid: %s(%s): "
844 "disable tracking. rfcnt=%d\n", __func__,
845 entry->ifname,
846 __this_cpu_read(*net_dev->pcpu_refcnt));
847
848 }
849}
850
851/* Caller must hold iface_stat_list_lock */
852static struct iface_stat *iface_alloc(struct net_device *net_dev)
853{
854 struct iface_stat *new_iface;
855 struct iface_stat_work *isw;
856
857 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
858 if (new_iface == NULL) {
859 pr_err("qtaguid: iface_stat: create(%s): "
860 "iface_stat alloc failed\n", net_dev->name);
861 return NULL;
862 }
863 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
864 if (new_iface->ifname == NULL) {
865 pr_err("qtaguid: iface_stat: create(%s): "
866 "ifname alloc failed\n", net_dev->name);
867 kfree(new_iface);
868 return NULL;
869 }
870 spin_lock_init(&new_iface->tag_stat_list_lock);
871 new_iface->tag_stat_tree = RB_ROOT;
872 _iface_stat_set_active(new_iface, net_dev, true);
873
874 /*
875 * ipv6 notifier chains are atomic :(
876 * No create_proc_read_entry() for you!
877 */
878 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
879 if (!isw) {
880 pr_err("qtaguid: iface_stat: create(%s): "
881 "work alloc failed\n", new_iface->ifname);
882 _iface_stat_set_active(new_iface, net_dev, false);
883 kfree(new_iface->ifname);
884 kfree(new_iface);
885 return NULL;
886 }
887 isw->iface_entry = new_iface;
888 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
889 schedule_work(&isw->iface_work);
890 list_add(&new_iface->list, &iface_stat_list);
891 return new_iface;
892}
893
894static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
895 struct iface_stat *iface)
896{
897 struct rtnl_link_stats64 dev_stats, *stats;
898 bool stats_rewound;
899
900 stats = dev_get_stats(net_dev, &dev_stats);
901 /* No empty packets */
902 stats_rewound =
903 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
904 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
905
906 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
907 "bytes rx/tx=%llu/%llu "
908 "active=%d last_known=%d "
909 "stats_rewound=%d\n", __func__,
910 net_dev ? net_dev->name : "?",
911 iface, net_dev,
912 stats->rx_bytes, stats->tx_bytes,
913 iface->active, iface->last_known_valid, stats_rewound);
914
915 if (iface->active && iface->last_known_valid && stats_rewound) {
916 pr_warn_once("qtaguid: iface_stat: %s(%s): "
917 "iface reset its stats unexpectedly\n", __func__,
918 net_dev->name);
919
JP Abgrallcb5e8772012-04-27 12:57:39 -0700920 iface->totals_via_dev[IFS_TX].bytes +=
921 iface->last_known[IFS_TX].bytes;
922 iface->totals_via_dev[IFS_TX].packets +=
JP Abgrall053e3102011-06-20 12:41:46 -0700923 iface->last_known[IFS_TX].packets;
JP Abgrallcb5e8772012-04-27 12:57:39 -0700924 iface->totals_via_dev[IFS_RX].bytes +=
925 iface->last_known[IFS_RX].bytes;
926 iface->totals_via_dev[IFS_RX].packets +=
JP Abgrall053e3102011-06-20 12:41:46 -0700927 iface->last_known[IFS_RX].packets;
928 iface->last_known_valid = false;
929 IF_DEBUG("qtaguid: %s(%s): iface=%p "
930 "used last known bytes rx/tx=%llu/%llu\n", __func__,
931 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
932 iface->last_known[IFS_TX].bytes);
933 }
934}
935
936/*
937 * Create a new entry for tracking the specified interface.
938 * Do nothing if the entry already exists.
939 * Called when an interface is configured with a valid IP address.
940 */
941static void iface_stat_create(struct net_device *net_dev,
942 struct in_ifaddr *ifa)
943{
944 struct in_device *in_dev = NULL;
945 const char *ifname;
946 struct iface_stat *entry;
947 __be32 ipaddr = 0;
948 struct iface_stat *new_iface;
949
950 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
951 net_dev ? net_dev->name : "?",
952 ifa, net_dev);
953 if (!net_dev) {
954 pr_err("qtaguid: iface_stat: create(): no net dev\n");
955 return;
956 }
957
958 ifname = net_dev->name;
959 if (!ifa) {
960 in_dev = in_dev_get(net_dev);
961 if (!in_dev) {
962 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
963 ifname);
964 return;
965 }
966 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
967 ifname, in_dev);
968 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
969 IF_DEBUG("qtaguid: iface_stat: create(%s): "
970 "ifa=%p ifa_label=%s\n",
971 ifname, ifa,
972 ifa->ifa_label ? ifa->ifa_label : "(null)");
973 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
974 break;
975 }
976 }
977
978 if (!ifa) {
979 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
980 ifname);
981 goto done_put;
982 }
983 ipaddr = ifa->ifa_local;
984
985 spin_lock_bh(&iface_stat_list_lock);
986 entry = get_iface_entry(ifname);
987 if (entry != NULL) {
JP Abgrall053e3102011-06-20 12:41:46 -0700988 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
989 ifname, entry);
990 iface_check_stats_reset_and_adjust(net_dev, entry);
JP Abgrall0740bc22013-02-06 17:40:07 -0800991 _iface_stat_set_active(entry, net_dev, true);
JP Abgrall053e3102011-06-20 12:41:46 -0700992 IF_DEBUG("qtaguid: %s(%s): "
993 "tracking now %d on ip=%pI4\n", __func__,
JP Abgrall0740bc22013-02-06 17:40:07 -0800994 entry->ifname, true, &ipaddr);
JP Abgrall053e3102011-06-20 12:41:46 -0700995 goto done_unlock_put;
996 }
997
998 new_iface = iface_alloc(net_dev);
999 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1000 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1001done_unlock_put:
1002 spin_unlock_bh(&iface_stat_list_lock);
1003done_put:
1004 if (in_dev)
1005 in_dev_put(in_dev);
1006}
1007
1008static void iface_stat_create_ipv6(struct net_device *net_dev,
1009 struct inet6_ifaddr *ifa)
1010{
1011 struct in_device *in_dev;
1012 const char *ifname;
1013 struct iface_stat *entry;
1014 struct iface_stat *new_iface;
1015 int addr_type;
1016
1017 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1018 ifa, net_dev, net_dev ? net_dev->name : "");
1019 if (!net_dev) {
1020 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1021 return;
1022 }
1023 ifname = net_dev->name;
1024
1025 in_dev = in_dev_get(net_dev);
1026 if (!in_dev) {
1027 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1028 ifname);
1029 return;
1030 }
1031
1032 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1033 ifname, in_dev);
1034
1035 if (!ifa) {
1036 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1037 ifname);
1038 goto done_put;
1039 }
1040 addr_type = ipv6_addr_type(&ifa->addr);
1041
1042 spin_lock_bh(&iface_stat_list_lock);
1043 entry = get_iface_entry(ifname);
1044 if (entry != NULL) {
JP Abgrall053e3102011-06-20 12:41:46 -07001045 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1046 ifname, entry);
1047 iface_check_stats_reset_and_adjust(net_dev, entry);
JP Abgrall0740bc22013-02-06 17:40:07 -08001048 _iface_stat_set_active(entry, net_dev, true);
JP Abgrall053e3102011-06-20 12:41:46 -07001049 IF_DEBUG("qtaguid: %s(%s): "
1050 "tracking now %d on ip=%pI6c\n", __func__,
JP Abgrall0740bc22013-02-06 17:40:07 -08001051 entry->ifname, true, &ifa->addr);
JP Abgrall053e3102011-06-20 12:41:46 -07001052 goto done_unlock_put;
1053 }
1054
1055 new_iface = iface_alloc(net_dev);
1056 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1057 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1058
1059done_unlock_put:
1060 spin_unlock_bh(&iface_stat_list_lock);
1061done_put:
1062 in_dev_put(in_dev);
1063}
1064
1065static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1066{
1067 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1068 return sock_tag_tree_search(&sock_tag_tree, sk);
1069}
1070
1071static struct sock_tag *get_sock_stat(const struct sock *sk)
1072{
1073 struct sock_tag *sock_tag_entry;
1074 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1075 if (!sk)
1076 return NULL;
1077 spin_lock_bh(&sock_tag_list_lock);
1078 sock_tag_entry = get_sock_stat_nl(sk);
1079 spin_unlock_bh(&sock_tag_list_lock);
1080 return sock_tag_entry;
1081}
1082
JP Abgrallcb5e8772012-04-27 12:57:39 -07001083static int ipx_proto(const struct sk_buff *skb,
1084 struct xt_action_param *par)
1085{
1086 int thoff = 0, tproto;
1087
1088 switch (par->family) {
1089 case NFPROTO_IPV6:
1090 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1091 if (tproto < 0)
1092 MT_DEBUG("%s(): transport header not found in ipv6"
1093 " skb=%p\n", __func__, skb);
1094 break;
1095 case NFPROTO_IPV4:
1096 tproto = ip_hdr(skb)->protocol;
1097 break;
1098 default:
1099 tproto = IPPROTO_RAW;
1100 }
1101 return tproto;
1102}
1103
JP Abgrall053e3102011-06-20 12:41:46 -07001104static void
1105data_counters_update(struct data_counters *dc, int set,
1106 enum ifs_tx_rx direction, int proto, int bytes)
1107{
1108 switch (proto) {
1109 case IPPROTO_TCP:
1110 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1111 break;
1112 case IPPROTO_UDP:
1113 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1114 break;
1115 case IPPROTO_IP:
1116 default:
1117 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1118 1);
1119 break;
1120 }
1121}
1122
1123/*
1124 * Update stats for the specified interface. Do nothing if the entry
1125 * does not exist (when a device was never configured with an IP address).
1126 * Called when an device is being unregistered.
1127 */
1128static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1129{
1130 struct rtnl_link_stats64 dev_stats, *stats;
1131 struct iface_stat *entry;
1132
1133 stats = dev_get_stats(net_dev, &dev_stats);
1134 spin_lock_bh(&iface_stat_list_lock);
1135 entry = get_iface_entry(net_dev->name);
1136 if (entry == NULL) {
1137 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1138 net_dev->name);
1139 spin_unlock_bh(&iface_stat_list_lock);
1140 return;
1141 }
1142
1143 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1144 net_dev->name, entry);
1145 if (!entry->active) {
1146 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1147 net_dev->name);
1148 spin_unlock_bh(&iface_stat_list_lock);
1149 return;
1150 }
1151
1152 if (stash_only) {
1153 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1154 entry->last_known[IFS_TX].packets = stats->tx_packets;
1155 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1156 entry->last_known[IFS_RX].packets = stats->rx_packets;
1157 entry->last_known_valid = true;
1158 IF_DEBUG("qtaguid: %s(%s): "
1159 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1160 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1161 spin_unlock_bh(&iface_stat_list_lock);
1162 return;
1163 }
JP Abgrallcb5e8772012-04-27 12:57:39 -07001164 entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1165 entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1166 entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1167 entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
JP Abgrall053e3102011-06-20 12:41:46 -07001168 /* We don't need the last_known[] anymore */
1169 entry->last_known_valid = false;
1170 _iface_stat_set_active(entry, net_dev, false);
1171 IF_DEBUG("qtaguid: %s(%s): "
1172 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1173 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1174 spin_unlock_bh(&iface_stat_list_lock);
1175}
1176
JP Abgrallcb5e8772012-04-27 12:57:39 -07001177/*
1178 * Update stats for the specified interface from the skb.
1179 * Do nothing if the entry
1180 * does not exist (when a device was never configured with an IP address).
1181 * Called on each sk.
1182 */
1183static void iface_stat_update_from_skb(const struct sk_buff *skb,
1184 struct xt_action_param *par)
1185{
1186 struct iface_stat *entry;
1187 const struct net_device *el_dev;
1188 enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1189 int bytes = skb->len;
JP Abgrallcd44bc92013-01-28 16:50:44 -08001190 int proto;
JP Abgrallcb5e8772012-04-27 12:57:39 -07001191
1192 if (!skb->dev) {
1193 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1194 el_dev = par->in ? : par->out;
1195 } else {
1196 const struct net_device *other_dev;
1197 el_dev = skb->dev;
1198 other_dev = par->in ? : par->out;
1199 if (el_dev != other_dev) {
1200 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1201 "par->(in/out)=%p %s\n",
1202 par->hooknum, el_dev, el_dev->name, other_dev,
1203 other_dev->name);
1204 }
1205 }
1206
1207 if (unlikely(!el_dev)) {
JP Abgrall41c27d82013-04-08 15:09:26 -07001208 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1209 par->hooknum, __func__);
JP Abgrallcb5e8772012-04-27 12:57:39 -07001210 BUG();
1211 } else if (unlikely(!el_dev->name)) {
JP Abgrall41c27d82013-04-08 15:09:26 -07001212 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1213 par->hooknum, __func__);
JP Abgrallcb5e8772012-04-27 12:57:39 -07001214 BUG();
1215 } else {
JP Abgrallcd44bc92013-01-28 16:50:44 -08001216 proto = ipx_proto(skb, par);
JP Abgrallcb5e8772012-04-27 12:57:39 -07001217 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1218 par->hooknum, el_dev->name, el_dev->type,
1219 par->family, proto);
1220 }
1221
1222 spin_lock_bh(&iface_stat_list_lock);
1223 entry = get_iface_entry(el_dev->name);
1224 if (entry == NULL) {
1225 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1226 __func__, el_dev->name);
1227 spin_unlock_bh(&iface_stat_list_lock);
1228 return;
1229 }
1230
1231 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1232 el_dev->name, entry);
1233
JP Abgrallcd44bc92013-01-28 16:50:44 -08001234 data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1235 bytes);
JP Abgrallcb5e8772012-04-27 12:57:39 -07001236 spin_unlock_bh(&iface_stat_list_lock);
1237}
1238
JP Abgrall053e3102011-06-20 12:41:46 -07001239static void tag_stat_update(struct tag_stat *tag_entry,
1240 enum ifs_tx_rx direction, int proto, int bytes)
1241{
1242 int active_set;
1243 active_set = get_active_counter_set(tag_entry->tn.tag);
1244 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1245 "dir=%d proto=%d bytes=%d)\n",
1246 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1247 active_set, direction, proto, bytes);
1248 data_counters_update(&tag_entry->counters, active_set, direction,
1249 proto, bytes);
1250 if (tag_entry->parent_counters)
1251 data_counters_update(tag_entry->parent_counters, active_set,
1252 direction, proto, bytes);
1253}
1254
1255/*
1256 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1257 * the interface.
1258 * iface_entry->tag_stat_list_lock should be held.
1259 */
1260static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1261 tag_t tag)
1262{
1263 struct tag_stat *new_tag_stat_entry = NULL;
1264 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1265 " (uid=%u)\n", __func__,
1266 iface_entry, tag, get_uid_from_tag(tag));
1267 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1268 if (!new_tag_stat_entry) {
1269 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1270 goto done;
1271 }
1272 new_tag_stat_entry->tn.tag = tag;
1273 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1274done:
1275 return new_tag_stat_entry;
1276}
1277
1278static void if_tag_stat_update(const char *ifname, uid_t uid,
1279 const struct sock *sk, enum ifs_tx_rx direction,
1280 int proto, int bytes)
1281{
1282 struct tag_stat *tag_stat_entry;
1283 tag_t tag, acct_tag;
1284 tag_t uid_tag;
1285 struct data_counters *uid_tag_counters;
1286 struct sock_tag *sock_tag_entry;
1287 struct iface_stat *iface_entry;
JP Abgrall37757e92012-04-13 19:22:35 -07001288 struct tag_stat *new_tag_stat = NULL;
JP Abgrall053e3102011-06-20 12:41:46 -07001289 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1290 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1291 ifname, uid, sk, direction, proto, bytes);
1292
1293
1294 iface_entry = get_iface_entry(ifname);
1295 if (!iface_entry) {
JP Abgrall41c27d82013-04-08 15:09:26 -07001296 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1297 "%s not found\n", ifname);
JP Abgrall053e3102011-06-20 12:41:46 -07001298 return;
1299 }
1300 /* It is ok to process data when an iface_entry is inactive */
1301
1302 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1303 ifname, iface_entry);
1304
1305 /*
1306 * Look for a tagged sock.
1307 * It will have an acct_uid.
1308 */
1309 sock_tag_entry = get_sock_stat(sk);
1310 if (sock_tag_entry) {
1311 tag = sock_tag_entry->tag;
1312 acct_tag = get_atag_from_tag(tag);
1313 uid_tag = get_utag_from_tag(tag);
1314 } else {
1315 acct_tag = make_atag_from_value(0);
1316 tag = combine_atag_with_uid(acct_tag, uid);
1317 uid_tag = make_tag_from_uid(uid);
1318 }
1319 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1320 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1321 tag, get_uid_from_tag(tag), iface_entry);
1322 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1323 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1324
1325 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1326 tag);
1327 if (tag_stat_entry) {
1328 /*
1329 * Updating the {acct_tag, uid_tag} entry handles both stats:
1330 * {0, uid_tag} will also get updated.
1331 */
1332 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1333 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1334 return;
1335 }
1336
1337 /* Loop over tag list under this interface for {0,uid_tag} */
1338 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1339 uid_tag);
1340 if (!tag_stat_entry) {
1341 /* Here: the base uid_tag did not exist */
1342 /*
1343 * No parent counters. So
1344 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1345 */
1346 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
Pontus Fuchsaed83f82012-11-19 11:44:51 -08001347 if (!new_tag_stat)
1348 goto unlock;
JP Abgrall053e3102011-06-20 12:41:46 -07001349 uid_tag_counters = &new_tag_stat->counters;
1350 } else {
1351 uid_tag_counters = &tag_stat_entry->counters;
1352 }
1353
1354 if (acct_tag) {
JP Abgrall37757e92012-04-13 19:22:35 -07001355 /* Create the child {acct_tag, uid_tag} and hook up parent. */
JP Abgrall053e3102011-06-20 12:41:46 -07001356 new_tag_stat = create_if_tag_stat(iface_entry, tag);
Pontus Fuchsaed83f82012-11-19 11:44:51 -08001357 if (!new_tag_stat)
1358 goto unlock;
JP Abgrall053e3102011-06-20 12:41:46 -07001359 new_tag_stat->parent_counters = uid_tag_counters;
JP Abgrall37757e92012-04-13 19:22:35 -07001360 } else {
1361 /*
1362 * For new_tag_stat to be still NULL here would require:
1363 * {0, uid_tag} exists
1364 * and {acct_tag, uid_tag} doesn't exist
1365 * AND acct_tag == 0.
1366 * Impossible. This reassures us that new_tag_stat
1367 * below will always be assigned.
1368 */
1369 BUG_ON(!new_tag_stat);
JP Abgrall053e3102011-06-20 12:41:46 -07001370 }
1371 tag_stat_update(new_tag_stat, direction, proto, bytes);
Pontus Fuchsaed83f82012-11-19 11:44:51 -08001372unlock:
JP Abgrall053e3102011-06-20 12:41:46 -07001373 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1374}
1375
1376static int iface_netdev_event_handler(struct notifier_block *nb,
1377 unsigned long event, void *ptr) {
1378 struct net_device *dev = ptr;
1379
1380 if (unlikely(module_passive))
1381 return NOTIFY_DONE;
1382
1383 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1384 "ev=0x%lx/%s netdev=%p->name=%s\n",
1385 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1386
1387 switch (event) {
1388 case NETDEV_UP:
1389 iface_stat_create(dev, NULL);
1390 atomic64_inc(&qtu_events.iface_events);
1391 break;
1392 case NETDEV_DOWN:
1393 case NETDEV_UNREGISTER:
1394 iface_stat_update(dev, event == NETDEV_DOWN);
1395 atomic64_inc(&qtu_events.iface_events);
1396 break;
1397 }
1398 return NOTIFY_DONE;
1399}
1400
1401static int iface_inet6addr_event_handler(struct notifier_block *nb,
1402 unsigned long event, void *ptr)
1403{
1404 struct inet6_ifaddr *ifa = ptr;
1405 struct net_device *dev;
1406
1407 if (unlikely(module_passive))
1408 return NOTIFY_DONE;
1409
1410 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1411 "ev=0x%lx/%s ifa=%p\n",
1412 event, netdev_evt_str(event), ifa);
1413
1414 switch (event) {
1415 case NETDEV_UP:
1416 BUG_ON(!ifa || !ifa->idev);
1417 dev = (struct net_device *)ifa->idev->dev;
1418 iface_stat_create_ipv6(dev, ifa);
1419 atomic64_inc(&qtu_events.iface_events);
1420 break;
1421 case NETDEV_DOWN:
1422 case NETDEV_UNREGISTER:
1423 BUG_ON(!ifa || !ifa->idev);
1424 dev = (struct net_device *)ifa->idev->dev;
1425 iface_stat_update(dev, event == NETDEV_DOWN);
1426 atomic64_inc(&qtu_events.iface_events);
1427 break;
1428 }
1429 return NOTIFY_DONE;
1430}
1431
1432static int iface_inetaddr_event_handler(struct notifier_block *nb,
1433 unsigned long event, void *ptr)
1434{
1435 struct in_ifaddr *ifa = ptr;
1436 struct net_device *dev;
1437
1438 if (unlikely(module_passive))
1439 return NOTIFY_DONE;
1440
1441 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1442 "ev=0x%lx/%s ifa=%p\n",
1443 event, netdev_evt_str(event), ifa);
1444
1445 switch (event) {
1446 case NETDEV_UP:
1447 BUG_ON(!ifa || !ifa->ifa_dev);
1448 dev = ifa->ifa_dev->dev;
1449 iface_stat_create(dev, ifa);
1450 atomic64_inc(&qtu_events.iface_events);
1451 break;
1452 case NETDEV_DOWN:
1453 case NETDEV_UNREGISTER:
1454 BUG_ON(!ifa || !ifa->ifa_dev);
1455 dev = ifa->ifa_dev->dev;
1456 iface_stat_update(dev, event == NETDEV_DOWN);
1457 atomic64_inc(&qtu_events.iface_events);
1458 break;
1459 }
1460 return NOTIFY_DONE;
1461}
1462
1463static struct notifier_block iface_netdev_notifier_blk = {
1464 .notifier_call = iface_netdev_event_handler,
1465};
1466
1467static struct notifier_block iface_inetaddr_notifier_blk = {
1468 .notifier_call = iface_inetaddr_event_handler,
1469};
1470
1471static struct notifier_block iface_inet6addr_notifier_blk = {
1472 .notifier_call = iface_inet6addr_event_handler,
1473};
1474
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001475static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1476 .start = iface_stat_fmt_proc_start,
1477 .next = iface_stat_fmt_proc_next,
1478 .stop = iface_stat_fmt_proc_stop,
1479 .show = iface_stat_fmt_proc_show,
1480};
1481
1482static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1483{
1484 struct proc_iface_stat_fmt_info *s;
1485
1486 s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1487 sizeof(struct proc_iface_stat_fmt_info));
1488 if (!s)
1489 return -ENOMEM;
1490
1491 s->fmt = (int)PDE_DATA(inode);
1492 return 0;
1493}
1494
1495static const struct file_operations proc_iface_stat_fmt_fops = {
1496 .open = proc_iface_stat_fmt_open,
1497 .read = seq_read,
1498 .llseek = seq_lseek,
Greg Hackmanne85ee212013-12-04 17:39:27 -08001499 .release = seq_release_private,
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001500};
1501
JP Abgrall053e3102011-06-20 12:41:46 -07001502static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1503{
1504 int err;
1505
1506 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1507 if (!iface_stat_procdir) {
1508 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1509 err = -1;
1510 goto err;
1511 }
1512
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001513 iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1514 proc_iface_perms,
1515 parent_procdir,
1516 &proc_iface_stat_fmt_fops,
1517 (void *)1 /* fmt1 */);
JP Abgrall053e3102011-06-20 12:41:46 -07001518 if (!iface_stat_all_procfile) {
1519 pr_err("qtaguid: iface_stat: init "
JP Abgrallcb5e8772012-04-27 12:57:39 -07001520 " failed to create stat_old proc entry\n");
JP Abgrall053e3102011-06-20 12:41:46 -07001521 err = -1;
1522 goto err_zap_entry;
1523 }
JP Abgrallcb5e8772012-04-27 12:57:39 -07001524
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001525 iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1526 proc_iface_perms,
1527 parent_procdir,
1528 &proc_iface_stat_fmt_fops,
1529 (void *)2 /* fmt2 */);
JP Abgrallcb5e8772012-04-27 12:57:39 -07001530 if (!iface_stat_fmt_procfile) {
1531 pr_err("qtaguid: iface_stat: init "
1532 " failed to create stat_all proc entry\n");
1533 err = -1;
1534 goto err_zap_all_stats_entry;
1535 }
JP Abgrall053e3102011-06-20 12:41:46 -07001536
1537
1538 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1539 if (err) {
1540 pr_err("qtaguid: iface_stat: init "
1541 "failed to register dev event handler\n");
JP Abgrallcb5e8772012-04-27 12:57:39 -07001542 goto err_zap_all_stats_entries;
JP Abgrall053e3102011-06-20 12:41:46 -07001543 }
1544 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1545 if (err) {
1546 pr_err("qtaguid: iface_stat: init "
1547 "failed to register ipv4 dev event handler\n");
1548 goto err_unreg_nd;
1549 }
1550
1551 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1552 if (err) {
1553 pr_err("qtaguid: iface_stat: init "
1554 "failed to register ipv6 dev event handler\n");
1555 goto err_unreg_ip4_addr;
1556 }
1557 return 0;
1558
1559err_unreg_ip4_addr:
1560 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1561err_unreg_nd:
1562 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
JP Abgrallcb5e8772012-04-27 12:57:39 -07001563err_zap_all_stats_entries:
1564 remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
JP Abgrall053e3102011-06-20 12:41:46 -07001565err_zap_all_stats_entry:
1566 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1567err_zap_entry:
1568 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1569err:
1570 return err;
1571}
1572
1573static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1574 struct xt_action_param *par)
1575{
1576 struct sock *sk;
1577 unsigned int hook_mask = (1 << par->hooknum);
1578
1579 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1580 par->hooknum, par->family);
1581
1582 /*
1583 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1584 * return garbage SKs.
1585 */
1586 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1587 return NULL;
1588
1589 switch (par->family) {
1590 case NFPROTO_IPV6:
1591 sk = xt_socket_get6_sk(skb, par);
1592 break;
1593 case NFPROTO_IPV4:
1594 sk = xt_socket_get4_sk(skb, par);
1595 break;
1596 default:
1597 return NULL;
1598 }
1599
JP Abgrall053e3102011-06-20 12:41:46 -07001600 if (sk) {
1601 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1602 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
JP Abgrall785489d2013-02-20 16:38:34 -08001603 /*
1604 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1605 * "struct inet_timewait_sock" which is missing fields.
1606 */
JP Abgrall053e3102011-06-20 12:41:46 -07001607 if (sk->sk_state == TCP_TIME_WAIT) {
1608 xt_socket_put_sk(sk);
1609 sk = NULL;
1610 }
1611 }
1612 return sk;
1613}
1614
1615static void account_for_uid(const struct sk_buff *skb,
1616 const struct sock *alternate_sk, uid_t uid,
1617 struct xt_action_param *par)
1618{
1619 const struct net_device *el_dev;
1620
1621 if (!skb->dev) {
1622 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1623 el_dev = par->in ? : par->out;
1624 } else {
1625 const struct net_device *other_dev;
1626 el_dev = skb->dev;
1627 other_dev = par->in ? : par->out;
1628 if (el_dev != other_dev) {
1629 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1630 "par->(in/out)=%p %s\n",
1631 par->hooknum, el_dev, el_dev->name, other_dev,
1632 other_dev->name);
1633 }
1634 }
1635
1636 if (unlikely(!el_dev)) {
1637 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1638 } else if (unlikely(!el_dev->name)) {
1639 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1640 } else {
JP Abgrall6dc87f72012-04-17 16:00:07 -07001641 int proto = ipx_proto(skb, par);
1642 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1643 par->hooknum, el_dev->name, el_dev->type,
1644 par->family, proto);
JP Abgrall053e3102011-06-20 12:41:46 -07001645
1646 if_tag_stat_update(el_dev->name, uid,
1647 skb->sk ? skb->sk : alternate_sk,
1648 par->in ? IFS_RX : IFS_TX,
JP Abgrall6dc87f72012-04-17 16:00:07 -07001649 proto, skb->len);
JP Abgrall053e3102011-06-20 12:41:46 -07001650 }
1651}
1652
1653static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1654{
1655 const struct xt_qtaguid_match_info *info = par->matchinfo;
1656 const struct file *filp;
1657 bool got_sock = false;
1658 struct sock *sk;
1659 uid_t sock_uid;
1660 bool res;
1661
1662 if (unlikely(module_passive))
1663 return (info->match ^ info->invert) == 0;
1664
1665 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1666 par->hooknum, skb, par->in, par->out, par->family);
1667
1668 atomic64_inc(&qtu_events.match_calls);
1669 if (skb == NULL) {
1670 res = (info->match ^ info->invert) == 0;
1671 goto ret_res;
1672 }
1673
JP Abgrallcb5e8772012-04-27 12:57:39 -07001674 switch (par->hooknum) {
1675 case NF_INET_PRE_ROUTING:
1676 case NF_INET_POST_ROUTING:
1677 atomic64_inc(&qtu_events.match_calls_prepost);
1678 iface_stat_update_from_skb(skb, par);
1679 /*
1680 * We are done in pre/post. The skb will get processed
1681 * further alter.
1682 */
1683 res = (info->match ^ info->invert);
1684 goto ret_res;
1685 break;
1686 /* default: Fall through and do UID releated work */
1687 }
JP Abgrall053e3102011-06-20 12:41:46 -07001688
JP Abgrallcb5e8772012-04-27 12:57:39 -07001689 sk = skb->sk;
JP Abgrall785489d2013-02-20 16:38:34 -08001690 /*
1691 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1692 * "struct inet_timewait_sock" which is missing fields.
1693 * So we ignore it.
1694 */
1695 if (sk && sk->sk_state == TCP_TIME_WAIT)
1696 sk = NULL;
JP Abgrall053e3102011-06-20 12:41:46 -07001697 if (sk == NULL) {
1698 /*
1699 * A missing sk->sk_socket happens when packets are in-flight
1700 * and the matching socket is already closed and gone.
1701 */
1702 sk = qtaguid_find_sk(skb, par);
1703 /*
1704 * If we got the socket from the find_sk(), we will need to put
1705 * it back, as nf_tproxy_get_sock_v4() got it.
1706 */
1707 got_sock = sk;
1708 if (sk)
1709 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1710 else
1711 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1712 } else {
1713 atomic64_inc(&qtu_events.match_found_sk);
1714 }
JP Abgrall6dc87f72012-04-17 16:00:07 -07001715 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1716 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
JP Abgrall053e3102011-06-20 12:41:46 -07001717 if (sk != NULL) {
1718 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1719 par->hooknum, sk, sk->sk_socket,
1720 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1721 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1722 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1723 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1724 }
1725
1726 if (sk == NULL || sk->sk_socket == NULL) {
1727 /*
1728 * Here, the qtaguid_find_sk() using connection tracking
1729 * couldn't find the owner, so for now we just count them
1730 * against the system.
1731 */
1732 /*
1733 * TODO: unhack how to force just accounting.
1734 * For now we only do iface stats when the uid-owner is not
1735 * requested.
1736 */
1737 if (!(info->match & XT_QTAGUID_UID))
1738 account_for_uid(skb, sk, 0, par);
1739 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1740 par->hooknum,
1741 sk ? sk->sk_socket : NULL);
1742 res = (info->match ^ info->invert) == 0;
1743 atomic64_inc(&qtu_events.match_no_sk);
1744 goto put_sock_ret_res;
1745 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1746 res = false;
1747 goto put_sock_ret_res;
1748 }
1749 filp = sk->sk_socket->file;
1750 if (filp == NULL) {
1751 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1752 account_for_uid(skb, sk, 0, par);
1753 res = ((info->match ^ info->invert) &
1754 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1755 atomic64_inc(&qtu_events.match_no_sk_file);
1756 goto put_sock_ret_res;
1757 }
1758 sock_uid = filp->f_cred->fsuid;
1759 /*
1760 * TODO: unhack how to force just accounting.
1761 * For now we only do iface stats when the uid-owner is not requested
1762 */
1763 if (!(info->match & XT_QTAGUID_UID))
1764 account_for_uid(skb, sk, sock_uid, par);
1765
1766 /*
1767 * The following two tests fail the match when:
1768 * id not in range AND no inverted condition requested
1769 * or id in range AND inverted condition requested
1770 * Thus (!a && b) || (a && !b) == a ^ b
1771 */
1772 if (info->match & XT_QTAGUID_UID)
1773 if ((filp->f_cred->fsuid >= info->uid_min &&
1774 filp->f_cred->fsuid <= info->uid_max) ^
1775 !(info->invert & XT_QTAGUID_UID)) {
1776 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1777 par->hooknum);
1778 res = false;
1779 goto put_sock_ret_res;
1780 }
1781 if (info->match & XT_QTAGUID_GID)
1782 if ((filp->f_cred->fsgid >= info->gid_min &&
1783 filp->f_cred->fsgid <= info->gid_max) ^
1784 !(info->invert & XT_QTAGUID_GID)) {
1785 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1786 par->hooknum);
1787 res = false;
1788 goto put_sock_ret_res;
1789 }
1790
1791 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1792 res = true;
1793
1794put_sock_ret_res:
1795 if (got_sock)
1796 xt_socket_put_sk(sk);
1797ret_res:
1798 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1799 return res;
1800}
1801
1802#ifdef DDEBUG
1803/* This function is not in xt_qtaguid_print.c because of locks visibility */
1804static void prdebug_full_state(int indent_level, const char *fmt, ...)
1805{
1806 va_list args;
1807 char *fmt_buff;
1808 char *buff;
1809
1810 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1811 return;
1812
1813 fmt_buff = kasprintf(GFP_ATOMIC,
1814 "qtaguid: %s(): %s {\n", __func__, fmt);
1815 BUG_ON(!fmt_buff);
1816 va_start(args, fmt);
1817 buff = kvasprintf(GFP_ATOMIC,
1818 fmt_buff, args);
1819 BUG_ON(!buff);
1820 pr_debug("%s", buff);
1821 kfree(fmt_buff);
1822 kfree(buff);
1823 va_end(args);
1824
1825 spin_lock_bh(&sock_tag_list_lock);
1826 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1827 spin_unlock_bh(&sock_tag_list_lock);
1828
1829 spin_lock_bh(&sock_tag_list_lock);
1830 spin_lock_bh(&uid_tag_data_tree_lock);
1831 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1832 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1833 spin_unlock_bh(&uid_tag_data_tree_lock);
1834 spin_unlock_bh(&sock_tag_list_lock);
1835
1836 spin_lock_bh(&iface_stat_list_lock);
1837 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1838 spin_unlock_bh(&iface_stat_list_lock);
1839
1840 pr_debug("qtaguid: %s(): }\n", __func__);
1841}
1842#else
1843static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1844#endif
1845
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001846struct proc_ctrl_print_info {
1847 struct sock *sk; /* socket found by reading to sk_pos */
1848 loff_t sk_pos;
1849};
1850
1851static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1852{
1853 struct proc_ctrl_print_info *pcpi = m->private;
1854 struct sock_tag *sock_tag_entry = v;
1855 struct rb_node *node;
1856
1857 (*pos)++;
1858
1859 if (!v || v == SEQ_START_TOKEN)
1860 return NULL;
1861
1862 node = rb_next(&sock_tag_entry->sock_node);
1863 if (!node) {
1864 pcpi->sk = NULL;
1865 sock_tag_entry = SEQ_START_TOKEN;
1866 } else {
1867 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1868 pcpi->sk = sock_tag_entry->sk;
1869 }
1870 pcpi->sk_pos = *pos;
1871 return sock_tag_entry;
1872}
1873
1874static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1875{
1876 struct proc_ctrl_print_info *pcpi = m->private;
1877 struct sock_tag *sock_tag_entry;
1878 struct rb_node *node;
1879
1880 spin_lock_bh(&sock_tag_list_lock);
1881
1882 if (unlikely(module_passive))
1883 return NULL;
1884
1885 if (*pos == 0) {
1886 pcpi->sk_pos = 0;
1887 node = rb_first(&sock_tag_tree);
1888 if (!node) {
1889 pcpi->sk = NULL;
1890 return SEQ_START_TOKEN;
1891 }
1892 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1893 pcpi->sk = sock_tag_entry->sk;
1894 } else {
1895 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1896 NULL) ?: SEQ_START_TOKEN;
1897 if (*pos != pcpi->sk_pos) {
1898 /* seq_read skipped a next call */
1899 *pos = pcpi->sk_pos;
1900 return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1901 }
1902 }
1903 return sock_tag_entry;
1904}
1905
1906static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1907{
1908 spin_unlock_bh(&sock_tag_list_lock);
1909}
1910
JP Abgrall053e3102011-06-20 12:41:46 -07001911/*
1912 * Procfs reader to get all active socket tags using style "1)" as described in
1913 * fs/proc/generic.c
1914 */
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001915static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
JP Abgrall053e3102011-06-20 12:41:46 -07001916{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001917 struct sock_tag *sock_tag_entry = v;
JP Abgrall053e3102011-06-20 12:41:46 -07001918 uid_t uid;
JP Abgrall053e3102011-06-20 12:41:46 -07001919 long f_count;
1920
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001921 CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1922 current->pid, current->tgid, current_fsuid());
JP Abgrall053e3102011-06-20 12:41:46 -07001923
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001924 if (sock_tag_entry != SEQ_START_TOKEN) {
JP Abgrall053e3102011-06-20 12:41:46 -07001925 uid = get_uid_from_tag(sock_tag_entry->tag);
1926 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1927 "pid=%u\n",
1928 sock_tag_entry->sk,
1929 sock_tag_entry->tag,
1930 uid,
1931 sock_tag_entry->pid
1932 );
1933 f_count = atomic_long_read(
1934 &sock_tag_entry->socket->file->f_count);
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001935 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1936 "f_count=%lu\n",
1937 sock_tag_entry->sk,
1938 sock_tag_entry->tag, uid,
1939 sock_tag_entry->pid, f_count);
1940 } else {
1941 seq_printf(m, "events: sockets_tagged=%llu "
1942 "sockets_untagged=%llu "
1943 "counter_set_changes=%llu "
1944 "delete_cmds=%llu "
1945 "iface_events=%llu "
1946 "match_calls=%llu "
1947 "match_calls_prepost=%llu "
1948 "match_found_sk=%llu "
1949 "match_found_sk_in_ct=%llu "
1950 "match_found_no_sk_in_ct=%llu "
1951 "match_no_sk=%llu "
1952 "match_no_sk_file=%llu\n",
1953 atomic64_read(&qtu_events.sockets_tagged),
1954 atomic64_read(&qtu_events.sockets_untagged),
1955 atomic64_read(&qtu_events.counter_set_changes),
1956 atomic64_read(&qtu_events.delete_cmds),
1957 atomic64_read(&qtu_events.iface_events),
1958 atomic64_read(&qtu_events.match_calls),
1959 atomic64_read(&qtu_events.match_calls_prepost),
1960 atomic64_read(&qtu_events.match_found_sk),
1961 atomic64_read(&qtu_events.match_found_sk_in_ct),
1962 atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1963 atomic64_read(&qtu_events.match_no_sk),
1964 atomic64_read(&qtu_events.match_no_sk_file));
JP Abgrall053e3102011-06-20 12:41:46 -07001965
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001966 /* Count the following as part of the last item_index */
1967 prdebug_full_state(0, "proc ctrl");
JP Abgrall053e3102011-06-20 12:41:46 -07001968 }
1969
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07001970 return 0;
JP Abgrall053e3102011-06-20 12:41:46 -07001971}
1972
1973/*
1974 * Delete socket tags, and stat tags associated with a given
1975 * accouting tag and uid.
1976 */
1977static int ctrl_cmd_delete(const char *input)
1978{
1979 char cmd;
1980 uid_t uid;
1981 uid_t entry_uid;
1982 tag_t acct_tag;
1983 tag_t tag;
1984 int res, argc;
1985 struct iface_stat *iface_entry;
1986 struct rb_node *node;
1987 struct sock_tag *st_entry;
1988 struct rb_root st_to_free_tree = RB_ROOT;
1989 struct tag_stat *ts_entry;
1990 struct tag_counter_set *tcs_entry;
1991 struct tag_ref *tr_entry;
1992 struct uid_tag_data *utd_entry;
1993
1994 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1995 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1996 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1997 acct_tag, uid);
1998 if (argc < 2) {
1999 res = -EINVAL;
2000 goto err;
2001 }
2002 if (!valid_atag(acct_tag)) {
2003 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2004 res = -EINVAL;
2005 goto err;
2006 }
2007 if (argc < 3) {
2008 uid = current_fsuid();
2009 } else if (!can_impersonate_uid(uid)) {
2010 pr_info("qtaguid: ctrl_delete(%s): "
2011 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2012 input, current->pid, current->tgid, current_fsuid());
2013 res = -EPERM;
2014 goto err;
2015 }
2016
2017 tag = combine_atag_with_uid(acct_tag, uid);
2018 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2019 "looking for tag=0x%llx (uid=%u)\n",
2020 input, tag, uid);
2021
2022 /* Delete socket tags */
2023 spin_lock_bh(&sock_tag_list_lock);
2024 node = rb_first(&sock_tag_tree);
2025 while (node) {
2026 st_entry = rb_entry(node, struct sock_tag, sock_node);
2027 entry_uid = get_uid_from_tag(st_entry->tag);
2028 node = rb_next(node);
2029 if (entry_uid != uid)
2030 continue;
2031
2032 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2033 input, st_entry->tag, entry_uid);
2034
2035 if (!acct_tag || st_entry->tag == tag) {
2036 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2037 /* Can't sockfd_put() within spinlock, do it later. */
2038 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2039 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2040 BUG_ON(tr_entry->num_sock_tags <= 0);
2041 tr_entry->num_sock_tags--;
2042 /*
2043 * TODO: remove if, and start failing.
2044 * This is a hack to work around the fact that in some
2045 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2046 * and are trying to work around apps
2047 * that didn't open the /dev/xt_qtaguid.
2048 */
2049 if (st_entry->list.next && st_entry->list.prev)
2050 list_del(&st_entry->list);
2051 }
2052 }
2053 spin_unlock_bh(&sock_tag_list_lock);
2054
2055 sock_tag_tree_erase(&st_to_free_tree);
2056
2057 /* Delete tag counter-sets */
2058 spin_lock_bh(&tag_counter_set_list_lock);
2059 /* Counter sets are only on the uid tag, not full tag */
2060 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2061 if (tcs_entry) {
2062 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2063 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2064 input,
2065 tcs_entry->tn.tag,
2066 get_uid_from_tag(tcs_entry->tn.tag),
2067 tcs_entry->active_set);
2068 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2069 kfree(tcs_entry);
2070 }
2071 spin_unlock_bh(&tag_counter_set_list_lock);
2072
2073 /*
2074 * If acct_tag is 0, then all entries belonging to uid are
2075 * erased.
2076 */
2077 spin_lock_bh(&iface_stat_list_lock);
2078 list_for_each_entry(iface_entry, &iface_stat_list, list) {
2079 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2080 node = rb_first(&iface_entry->tag_stat_tree);
2081 while (node) {
2082 ts_entry = rb_entry(node, struct tag_stat, tn.node);
2083 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2084 node = rb_next(node);
2085
2086 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2087 "ts tag=0x%llx (uid=%u)\n",
2088 input, ts_entry->tn.tag, entry_uid);
2089
2090 if (entry_uid != uid)
2091 continue;
2092 if (!acct_tag || ts_entry->tn.tag == tag) {
2093 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2094 "erase ts: %s 0x%llx %u\n",
2095 input, iface_entry->ifname,
2096 get_atag_from_tag(ts_entry->tn.tag),
2097 entry_uid);
2098 rb_erase(&ts_entry->tn.node,
2099 &iface_entry->tag_stat_tree);
2100 kfree(ts_entry);
2101 }
2102 }
2103 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2104 }
2105 spin_unlock_bh(&iface_stat_list_lock);
2106
2107 /* Cleanup the uid_tag_data */
2108 spin_lock_bh(&uid_tag_data_tree_lock);
2109 node = rb_first(&uid_tag_data_tree);
2110 while (node) {
2111 utd_entry = rb_entry(node, struct uid_tag_data, node);
2112 entry_uid = utd_entry->uid;
2113 node = rb_next(node);
2114
2115 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2116 "utd uid=%u\n",
2117 input, entry_uid);
2118
2119 if (entry_uid != uid)
2120 continue;
2121 /*
2122 * Go over the tag_refs, and those that don't have
2123 * sock_tags using them are freed.
2124 */
2125 put_tag_ref_tree(tag, utd_entry);
2126 put_utd_entry(utd_entry);
2127 }
2128 spin_unlock_bh(&uid_tag_data_tree_lock);
2129
2130 atomic64_inc(&qtu_events.delete_cmds);
2131 res = 0;
2132
2133err:
2134 return res;
2135}
2136
2137static int ctrl_cmd_counter_set(const char *input)
2138{
2139 char cmd;
2140 uid_t uid = 0;
2141 tag_t tag;
2142 int res, argc;
2143 struct tag_counter_set *tcs;
2144 int counter_set;
2145
2146 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2147 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2148 "set=%d uid=%u\n", input, argc, cmd,
2149 counter_set, uid);
2150 if (argc != 3) {
2151 res = -EINVAL;
2152 goto err;
2153 }
2154 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2155 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2156 input);
2157 res = -EINVAL;
2158 goto err;
2159 }
2160 if (!can_manipulate_uids()) {
2161 pr_info("qtaguid: ctrl_counterset(%s): "
2162 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2163 input, current->pid, current->tgid, current_fsuid());
2164 res = -EPERM;
2165 goto err;
2166 }
2167
2168 tag = make_tag_from_uid(uid);
2169 spin_lock_bh(&tag_counter_set_list_lock);
2170 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2171 if (!tcs) {
2172 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2173 if (!tcs) {
2174 spin_unlock_bh(&tag_counter_set_list_lock);
2175 pr_err("qtaguid: ctrl_counterset(%s): "
2176 "failed to alloc counter set\n",
2177 input);
2178 res = -ENOMEM;
2179 goto err;
2180 }
2181 tcs->tn.tag = tag;
2182 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2183 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2184 "(uid=%u) set=%d\n",
2185 input, tag, get_uid_from_tag(tag), counter_set);
2186 }
2187 tcs->active_set = counter_set;
2188 spin_unlock_bh(&tag_counter_set_list_lock);
2189 atomic64_inc(&qtu_events.counter_set_changes);
2190 res = 0;
2191
2192err:
2193 return res;
2194}
2195
2196static int ctrl_cmd_tag(const char *input)
2197{
2198 char cmd;
2199 int sock_fd = 0;
2200 uid_t uid = 0;
2201 tag_t acct_tag = make_atag_from_value(0);
2202 tag_t full_tag;
2203 struct socket *el_socket;
2204 int res, argc;
2205 struct sock_tag *sock_tag_entry;
2206 struct tag_ref *tag_ref_entry;
2207 struct uid_tag_data *uid_tag_data_entry;
2208 struct proc_qtu_data *pqd_entry;
2209
2210 /* Unassigned args will get defaulted later. */
2211 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2212 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2213 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2214 acct_tag, uid);
2215 if (argc < 2) {
2216 res = -EINVAL;
2217 goto err;
2218 }
2219 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2220 if (!el_socket) {
2221 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
JP Abgrallcb5e8772012-04-27 12:57:39 -07002222 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2223 input, sock_fd, res, current->pid, current->tgid,
2224 current_fsuid());
JP Abgrall053e3102011-06-20 12:41:46 -07002225 goto err;
2226 }
2227 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2228 input, atomic_long_read(&el_socket->file->f_count),
2229 el_socket->sk);
2230 if (argc < 3) {
2231 acct_tag = make_atag_from_value(0);
2232 } else if (!valid_atag(acct_tag)) {
2233 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2234 res = -EINVAL;
2235 goto err_put;
2236 }
2237 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2238 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
JP Abgrall7b634222013-01-04 18:18:36 -08002239 "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
JP Abgrall053e3102011-06-20 12:41:46 -07002240 input, current->pid, current->tgid, current_uid(),
2241 current_euid(), current_fsuid(),
JP Abgrall7b634222013-01-04 18:18:36 -08002242 xt_qtaguid_ctrl_file->gid,
2243 in_group_p(xt_qtaguid_ctrl_file->gid),
2244 in_egroup_p(xt_qtaguid_ctrl_file->gid));
JP Abgrall053e3102011-06-20 12:41:46 -07002245 if (argc < 4) {
2246 uid = current_fsuid();
2247 } else if (!can_impersonate_uid(uid)) {
2248 pr_info("qtaguid: ctrl_tag(%s): "
2249 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2250 input, current->pid, current->tgid, current_fsuid());
2251 res = -EPERM;
2252 goto err_put;
2253 }
2254 full_tag = combine_atag_with_uid(acct_tag, uid);
2255
2256 spin_lock_bh(&sock_tag_list_lock);
2257 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2258 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2259 if (IS_ERR(tag_ref_entry)) {
2260 res = PTR_ERR(tag_ref_entry);
2261 spin_unlock_bh(&sock_tag_list_lock);
2262 goto err_put;
2263 }
2264 tag_ref_entry->num_sock_tags++;
2265 if (sock_tag_entry) {
2266 struct tag_ref *prev_tag_ref_entry;
2267
2268 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2269 "st@%p ...->f_count=%ld\n",
2270 input, el_socket->sk, sock_tag_entry,
2271 atomic_long_read(&el_socket->file->f_count));
2272 /*
2273 * This is a re-tagging, so release the sock_fd that was
2274 * locked at the time of the 1st tagging.
2275 * There is still the ref from this call's sockfd_lookup() so
2276 * it can be done within the spinlock.
2277 */
2278 sockfd_put(sock_tag_entry->socket);
2279 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2280 &uid_tag_data_entry);
2281 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2282 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2283 prev_tag_ref_entry->num_sock_tags--;
2284 sock_tag_entry->tag = full_tag;
2285 } else {
2286 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2287 input, el_socket->sk);
2288 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2289 GFP_ATOMIC);
2290 if (!sock_tag_entry) {
2291 pr_err("qtaguid: ctrl_tag(%s): "
2292 "socket tag alloc failed\n",
2293 input);
2294 spin_unlock_bh(&sock_tag_list_lock);
2295 res = -ENOMEM;
2296 goto err_tag_unref_put;
2297 }
2298 sock_tag_entry->sk = el_socket->sk;
2299 sock_tag_entry->socket = el_socket;
2300 sock_tag_entry->pid = current->tgid;
2301 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2302 uid);
2303 spin_lock_bh(&uid_tag_data_tree_lock);
2304 pqd_entry = proc_qtu_data_tree_search(
2305 &proc_qtu_data_tree, current->tgid);
2306 /*
2307 * TODO: remove if, and start failing.
2308 * At first, we want to catch user-space code that is not
2309 * opening the /dev/xt_qtaguid.
2310 */
2311 if (IS_ERR_OR_NULL(pqd_entry))
2312 pr_warn_once(
2313 "qtaguid: %s(): "
2314 "User space forgot to open /dev/xt_qtaguid? "
2315 "pid=%u tgid=%u uid=%u\n", __func__,
2316 current->pid, current->tgid,
2317 current_fsuid());
2318 else
2319 list_add(&sock_tag_entry->list,
2320 &pqd_entry->sock_tag_list);
2321 spin_unlock_bh(&uid_tag_data_tree_lock);
2322
2323 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2324 atomic64_inc(&qtu_events.sockets_tagged);
2325 }
2326 spin_unlock_bh(&sock_tag_list_lock);
2327 /* We keep the ref to the socket (file) until it is untagged */
2328 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2329 input, sock_tag_entry,
2330 atomic_long_read(&el_socket->file->f_count));
2331 return 0;
2332
2333err_tag_unref_put:
2334 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2335 tag_ref_entry->num_sock_tags--;
2336 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2337err_put:
2338 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2339 input, atomic_long_read(&el_socket->file->f_count) - 1);
2340 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2341 sockfd_put(el_socket);
2342 return res;
2343
2344err:
2345 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2346 return res;
2347}
2348
2349static int ctrl_cmd_untag(const char *input)
2350{
2351 char cmd;
2352 int sock_fd = 0;
2353 struct socket *el_socket;
2354 int res, argc;
2355 struct sock_tag *sock_tag_entry;
2356 struct tag_ref *tag_ref_entry;
2357 struct uid_tag_data *utd_entry;
2358 struct proc_qtu_data *pqd_entry;
2359
2360 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2361 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2362 input, argc, cmd, sock_fd);
2363 if (argc < 2) {
2364 res = -EINVAL;
2365 goto err;
2366 }
2367 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2368 if (!el_socket) {
2369 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
JP Abgrallcb5e8772012-04-27 12:57:39 -07002370 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2371 input, sock_fd, res, current->pid, current->tgid,
2372 current_fsuid());
JP Abgrall053e3102011-06-20 12:41:46 -07002373 goto err;
2374 }
2375 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2376 input, atomic_long_read(&el_socket->file->f_count),
2377 el_socket->sk);
2378 spin_lock_bh(&sock_tag_list_lock);
2379 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2380 if (!sock_tag_entry) {
2381 spin_unlock_bh(&sock_tag_list_lock);
2382 res = -EINVAL;
2383 goto err_put;
2384 }
2385 /*
2386 * The socket already belongs to the current process
2387 * so it can do whatever it wants to it.
2388 */
2389 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2390
2391 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2392 BUG_ON(!tag_ref_entry);
2393 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2394 spin_lock_bh(&uid_tag_data_tree_lock);
2395 pqd_entry = proc_qtu_data_tree_search(
2396 &proc_qtu_data_tree, current->tgid);
2397 /*
2398 * TODO: remove if, and start failing.
2399 * At first, we want to catch user-space code that is not
2400 * opening the /dev/xt_qtaguid.
2401 */
2402 if (IS_ERR_OR_NULL(pqd_entry))
2403 pr_warn_once("qtaguid: %s(): "
2404 "User space forgot to open /dev/xt_qtaguid? "
2405 "pid=%u tgid=%u uid=%u\n", __func__,
2406 current->pid, current->tgid, current_fsuid());
2407 else
2408 list_del(&sock_tag_entry->list);
2409 spin_unlock_bh(&uid_tag_data_tree_lock);
2410 /*
2411 * We don't free tag_ref from the utd_entry here,
2412 * only during a cmd_delete().
2413 */
2414 tag_ref_entry->num_sock_tags--;
2415 spin_unlock_bh(&sock_tag_list_lock);
2416 /*
2417 * Release the sock_fd that was grabbed at tag time,
2418 * and once more for the sockfd_lookup() here.
2419 */
2420 sockfd_put(sock_tag_entry->socket);
2421 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2422 input, sock_tag_entry,
2423 atomic_long_read(&el_socket->file->f_count) - 1);
2424 sockfd_put(el_socket);
2425
2426 kfree(sock_tag_entry);
2427 atomic64_inc(&qtu_events.sockets_untagged);
2428
2429 return 0;
2430
2431err_put:
2432 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2433 input, atomic_long_read(&el_socket->file->f_count) - 1);
2434 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2435 sockfd_put(el_socket);
2436 return res;
2437
2438err:
2439 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2440 return res;
2441}
2442
2443static int qtaguid_ctrl_parse(const char *input, int count)
2444{
2445 char cmd;
2446 int res;
2447
JP Abgrallcb5e8772012-04-27 12:57:39 -07002448 CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2449 input, current->pid, current->tgid, current_fsuid());
2450
JP Abgrall053e3102011-06-20 12:41:46 -07002451 cmd = input[0];
2452 /* Collect params for commands */
2453 switch (cmd) {
2454 case 'd':
2455 res = ctrl_cmd_delete(input);
2456 break;
2457
2458 case 's':
2459 res = ctrl_cmd_counter_set(input);
2460 break;
2461
2462 case 't':
2463 res = ctrl_cmd_tag(input);
2464 break;
2465
2466 case 'u':
2467 res = ctrl_cmd_untag(input);
2468 break;
2469
2470 default:
2471 res = -EINVAL;
2472 goto err;
2473 }
2474 if (!res)
2475 res = count;
2476err:
2477 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2478 return res;
2479}
2480
2481#define MAX_QTAGUID_CTRL_INPUT_LEN 255
2482static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002483 size_t count, loff_t *offp)
JP Abgrall053e3102011-06-20 12:41:46 -07002484{
2485 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2486
2487 if (unlikely(module_passive))
2488 return count;
2489
2490 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2491 return -EINVAL;
2492
2493 if (copy_from_user(input_buf, buffer, count))
2494 return -EFAULT;
2495
2496 input_buf[count] = '\0';
2497 return qtaguid_ctrl_parse(input_buf, count);
2498}
2499
2500struct proc_print_info {
JP Abgrall053e3102011-06-20 12:41:46 -07002501 struct iface_stat *iface_entry;
JP Abgrall053e3102011-06-20 12:41:46 -07002502 int item_index;
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002503 tag_t tag; /* tag found by reading to tag_pos */
2504 off_t tag_pos;
2505 int tag_item_index;
JP Abgrall053e3102011-06-20 12:41:46 -07002506};
2507
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002508static void pp_stats_header(struct seq_file *m)
JP Abgrall053e3102011-06-20 12:41:46 -07002509{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002510 seq_puts(m,
2511 "idx iface acct_tag_hex uid_tag_int cnt_set "
2512 "rx_bytes rx_packets "
2513 "tx_bytes tx_packets "
2514 "rx_tcp_bytes rx_tcp_packets "
2515 "rx_udp_bytes rx_udp_packets "
2516 "rx_other_bytes rx_other_packets "
2517 "tx_tcp_bytes tx_tcp_packets "
2518 "tx_udp_bytes tx_udp_packets "
2519 "tx_other_bytes tx_other_packets\n");
JP Abgrall053e3102011-06-20 12:41:46 -07002520}
2521
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002522static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2523 int cnt_set)
JP Abgrall053e3102011-06-20 12:41:46 -07002524{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002525 int ret;
2526 struct data_counters *cnts;
2527 tag_t tag = ts_entry->tn.tag;
2528 uid_t stat_uid = get_uid_from_tag(tag);
2529 struct proc_print_info *ppi = m->private;
2530 /* Detailed tags are not available to everybody */
2531 if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) {
2532 CT_DEBUG("qtaguid: stats line: "
2533 "%s 0x%llx %u: insufficient priv "
2534 "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2535 ppi->iface_entry->ifname,
2536 get_atag_from_tag(tag), stat_uid,
2537 current->pid, current->tgid, current_fsuid(),
2538 xt_qtaguid_stats_file->gid);
2539 return 0;
2540 }
2541 ppi->item_index++;
2542 cnts = &ts_entry->counters;
2543 ret = seq_printf(m, "%d %s 0x%llx %u %u "
2544 "%llu %llu "
2545 "%llu %llu "
2546 "%llu %llu "
2547 "%llu %llu "
2548 "%llu %llu "
2549 "%llu %llu "
2550 "%llu %llu "
2551 "%llu %llu\n",
2552 ppi->item_index,
2553 ppi->iface_entry->ifname,
2554 get_atag_from_tag(tag),
2555 stat_uid,
2556 cnt_set,
2557 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2558 dc_sum_packets(cnts, cnt_set, IFS_RX),
2559 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2560 dc_sum_packets(cnts, cnt_set, IFS_TX),
2561 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2562 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2563 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2564 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2565 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2566 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2567 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2568 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2569 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2570 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2571 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2572 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2573 return ret ?: 1;
2574}
2575
2576static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2577{
2578 int ret;
JP Abgrall053e3102011-06-20 12:41:46 -07002579 int counter_set;
2580 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2581 counter_set++) {
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002582 ret = pp_stats_line(m, ts_entry, counter_set);
2583 if (ret < 0)
JP Abgrall053e3102011-06-20 12:41:46 -07002584 return false;
JP Abgrall053e3102011-06-20 12:41:46 -07002585 }
2586 return true;
2587}
2588
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002589static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2590{
2591 struct iface_stat *iface_entry;
2592
2593 if (!ptr)
2594 return false;
2595
2596 list_for_each_entry(iface_entry, &iface_stat_list, list)
2597 if (iface_entry == ptr)
2598 return true;
2599 return false;
2600}
2601
2602static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2603{
2604 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2605 list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2606 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2607 return;
2608 }
2609 ppi->iface_entry = NULL;
2610}
2611
2612static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2613{
2614 struct proc_print_info *ppi = m->private;
2615 struct tag_stat *ts_entry;
2616 struct rb_node *node;
2617
2618 if (!v) {
2619 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2620 return NULL;
2621 }
2622
2623 (*pos)++;
2624
2625 if (!ppi->iface_entry || unlikely(module_passive))
2626 return NULL;
2627
2628 if (v == SEQ_START_TOKEN)
2629 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2630 else
2631 node = rb_next(&((struct tag_stat *)v)->tn.node);
2632
2633 while (!node) {
2634 qtaguid_stats_proc_next_iface_entry(ppi);
2635 if (!ppi->iface_entry)
2636 return NULL;
2637 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2638 }
2639
2640 ts_entry = rb_entry(node, struct tag_stat, tn.node);
2641 ppi->tag = ts_entry->tn.tag;
2642 ppi->tag_pos = *pos;
2643 ppi->tag_item_index = ppi->item_index;
2644 return ts_entry;
2645}
2646
2647static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2648{
2649 struct proc_print_info *ppi = m->private;
2650 struct tag_stat *ts_entry = NULL;
2651
2652 spin_lock_bh(&iface_stat_list_lock);
2653
2654 if (*pos == 0) {
2655 ppi->item_index = 1;
2656 ppi->tag_pos = 0;
2657 if (list_empty(&iface_stat_list)) {
2658 ppi->iface_entry = NULL;
2659 } else {
2660 ppi->iface_entry = list_first_entry(&iface_stat_list,
2661 struct iface_stat,
2662 list);
2663 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2664 }
2665 return SEQ_START_TOKEN;
2666 }
2667 if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2668 if (ppi->iface_entry) {
2669 pr_err("qtaguid: %s(): iface_entry %p not found\n",
2670 __func__, ppi->iface_entry);
2671 ppi->iface_entry = NULL;
2672 }
2673 return NULL;
2674 }
2675
2676 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2677
2678 if (!ppi->tag_pos) {
2679 /* seq_read skipped first next call */
2680 ts_entry = SEQ_START_TOKEN;
2681 } else {
2682 ts_entry = tag_stat_tree_search(
2683 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2684 if (!ts_entry) {
2685 pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2686 __func__, ppi->tag);
2687 return NULL;
2688 }
2689 }
2690
2691 if (*pos == ppi->tag_pos) { /* normal resume */
2692 ppi->item_index = ppi->tag_item_index;
2693 } else {
2694 /* seq_read skipped a next call */
2695 *pos = ppi->tag_pos;
2696 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2697 }
2698
2699 return ts_entry;
2700}
2701
2702static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2703{
2704 struct proc_print_info *ppi = m->private;
2705 if (ppi->iface_entry)
2706 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2707 spin_unlock_bh(&iface_stat_list_lock);
2708}
2709
JP Abgrall053e3102011-06-20 12:41:46 -07002710/*
2711 * Procfs reader to get all tag stats using style "1)" as described in
2712 * fs/proc/generic.c
2713 * Groups all protocols tx/rx bytes.
2714 */
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002715static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
JP Abgrall053e3102011-06-20 12:41:46 -07002716{
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002717 struct tag_stat *ts_entry = v;
JP Abgrall053e3102011-06-20 12:41:46 -07002718
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002719 if (v == SEQ_START_TOKEN)
2720 pp_stats_header(m);
2721 else
2722 pp_sets(m, ts_entry);
JP Abgrall053e3102011-06-20 12:41:46 -07002723
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002724 return 0;
JP Abgrall053e3102011-06-20 12:41:46 -07002725}
2726
2727/*------------------------------------------*/
2728static int qtudev_open(struct inode *inode, struct file *file)
2729{
2730 struct uid_tag_data *utd_entry;
2731 struct proc_qtu_data *pqd_entry;
2732 struct proc_qtu_data *new_pqd_entry;
2733 int res;
2734 bool utd_entry_found;
2735
2736 if (unlikely(qtu_proc_handling_passive))
2737 return 0;
2738
2739 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2740 current->pid, current->tgid, current_fsuid());
2741
2742 spin_lock_bh(&uid_tag_data_tree_lock);
2743
2744 /* Look for existing uid data, or alloc one. */
2745 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2746 if (IS_ERR_OR_NULL(utd_entry)) {
2747 res = PTR_ERR(utd_entry);
JP Abgrall5c08f772012-10-09 20:38:21 -07002748 goto err_unlock;
JP Abgrall053e3102011-06-20 12:41:46 -07002749 }
2750
2751 /* Look for existing PID based proc_data */
2752 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2753 current->tgid);
2754 if (pqd_entry) {
2755 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2756 "%s already opened\n",
2757 current->pid, current->tgid, current_fsuid(),
2758 QTU_DEV_NAME);
2759 res = -EBUSY;
2760 goto err_unlock_free_utd;
2761 }
2762
2763 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2764 if (!new_pqd_entry) {
2765 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2766 "proc data alloc failed\n",
2767 current->pid, current->tgid, current_fsuid());
2768 res = -ENOMEM;
2769 goto err_unlock_free_utd;
2770 }
2771 new_pqd_entry->pid = current->tgid;
2772 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2773 new_pqd_entry->parent_tag_data = utd_entry;
2774 utd_entry->num_pqd++;
2775
2776 proc_qtu_data_tree_insert(new_pqd_entry,
2777 &proc_qtu_data_tree);
2778
2779 spin_unlock_bh(&uid_tag_data_tree_lock);
2780 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2781 current_fsuid(), new_pqd_entry);
2782 file->private_data = new_pqd_entry;
2783 return 0;
2784
2785err_unlock_free_utd:
2786 if (!utd_entry_found) {
2787 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2788 kfree(utd_entry);
2789 }
JP Abgrall5c08f772012-10-09 20:38:21 -07002790err_unlock:
JP Abgrall053e3102011-06-20 12:41:46 -07002791 spin_unlock_bh(&uid_tag_data_tree_lock);
JP Abgrall053e3102011-06-20 12:41:46 -07002792 return res;
2793}
2794
2795static int qtudev_release(struct inode *inode, struct file *file)
2796{
2797 struct proc_qtu_data *pqd_entry = file->private_data;
2798 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2799 struct sock_tag *st_entry;
2800 struct rb_root st_to_free_tree = RB_ROOT;
2801 struct list_head *entry, *next;
2802 struct tag_ref *tr;
2803
2804 if (unlikely(qtu_proc_handling_passive))
2805 return 0;
2806
2807 /*
2808 * Do not trust the current->pid, it might just be a kworker cleaning
2809 * up after a dead proc.
2810 */
2811 DR_DEBUG("qtaguid: qtudev_release(): "
2812 "pid=%u tgid=%u uid=%u "
2813 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2814 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2815 pqd_entry, pqd_entry->pid, utd_entry,
2816 utd_entry->num_active_tags);
2817
2818 spin_lock_bh(&sock_tag_list_lock);
2819 spin_lock_bh(&uid_tag_data_tree_lock);
2820
2821 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2822 st_entry = list_entry(entry, struct sock_tag, list);
2823 DR_DEBUG("qtaguid: %s(): "
2824 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2825 __func__,
2826 st_entry, st_entry->sk,
2827 current->pid, current->tgid,
2828 pqd_entry->parent_tag_data->uid);
2829
2830 utd_entry = uid_tag_data_tree_search(
2831 &uid_tag_data_tree,
2832 get_uid_from_tag(st_entry->tag));
2833 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2834 DR_DEBUG("qtaguid: %s(): "
2835 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2836 st_entry->tag, utd_entry);
2837 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2838 st_entry->tag);
2839 BUG_ON(!tr);
2840 BUG_ON(tr->num_sock_tags <= 0);
2841 tr->num_sock_tags--;
2842 free_tag_ref_from_utd_entry(tr, utd_entry);
2843
2844 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2845 list_del(&st_entry->list);
2846 /* Can't sockfd_put() within spinlock, do it later. */
2847 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2848
2849 /*
2850 * Try to free the utd_entry if no other proc_qtu_data is
2851 * using it (num_pqd is 0) and it doesn't have active tags
2852 * (num_active_tags is 0).
2853 */
2854 put_utd_entry(utd_entry);
2855 }
2856
2857 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2858 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2859 pqd_entry->parent_tag_data->num_pqd--;
2860 put_utd_entry(pqd_entry->parent_tag_data);
2861 kfree(pqd_entry);
2862 file->private_data = NULL;
2863
2864 spin_unlock_bh(&uid_tag_data_tree_lock);
2865 spin_unlock_bh(&sock_tag_list_lock);
2866
2867
2868 sock_tag_tree_erase(&st_to_free_tree);
2869
2870 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2871 current->pid, current->tgid);
2872 return 0;
2873}
2874
2875/*------------------------------------------*/
2876static const struct file_operations qtudev_fops = {
2877 .owner = THIS_MODULE,
2878 .open = qtudev_open,
2879 .release = qtudev_release,
2880};
2881
2882static struct miscdevice qtu_device = {
2883 .minor = MISC_DYNAMIC_MINOR,
2884 .name = QTU_DEV_NAME,
2885 .fops = &qtudev_fops,
2886 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2887};
2888
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002889static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2890 .start = qtaguid_ctrl_proc_start,
2891 .next = qtaguid_ctrl_proc_next,
2892 .stop = qtaguid_ctrl_proc_stop,
2893 .show = qtaguid_ctrl_proc_show,
2894};
2895
2896static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2897{
2898 return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2899 sizeof(struct proc_ctrl_print_info));
2900}
2901
2902static const struct file_operations proc_qtaguid_ctrl_fops = {
2903 .open = proc_qtaguid_ctrl_open,
2904 .read = seq_read,
2905 .write = qtaguid_ctrl_proc_write,
2906 .llseek = seq_lseek,
Greg Hackmanne85ee212013-12-04 17:39:27 -08002907 .release = seq_release_private,
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002908};
2909
2910static const struct seq_operations proc_qtaguid_stats_seqops = {
2911 .start = qtaguid_stats_proc_start,
2912 .next = qtaguid_stats_proc_next,
2913 .stop = qtaguid_stats_proc_stop,
2914 .show = qtaguid_stats_proc_show,
2915};
2916
2917static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2918{
2919 return seq_open_private(file, &proc_qtaguid_stats_seqops,
2920 sizeof(struct proc_print_info));
2921}
2922
2923static const struct file_operations proc_qtaguid_stats_fops = {
2924 .open = proc_qtaguid_stats_open,
2925 .read = seq_read,
2926 .llseek = seq_lseek,
2927 .release = seq_release_private,
2928};
2929
JP Abgrall053e3102011-06-20 12:41:46 -07002930/*------------------------------------------*/
2931static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2932{
2933 int ret;
2934 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2935 if (!*res_procdir) {
2936 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2937 ret = -ENOMEM;
2938 goto no_dir;
2939 }
2940
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002941 xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2942 *res_procdir,
2943 &proc_qtaguid_ctrl_fops,
2944 NULL);
JP Abgrall053e3102011-06-20 12:41:46 -07002945 if (!xt_qtaguid_ctrl_file) {
2946 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2947 " file\n");
2948 ret = -ENOMEM;
2949 goto no_ctrl_entry;
2950 }
JP Abgrall053e3102011-06-20 12:41:46 -07002951
Arve Hjønnevågb0e078c2013-05-13 20:45:02 -07002952 xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2953 *res_procdir,
2954 &proc_qtaguid_stats_fops,
2955 NULL);
JP Abgrall053e3102011-06-20 12:41:46 -07002956 if (!xt_qtaguid_stats_file) {
2957 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2958 "file\n");
2959 ret = -ENOMEM;
2960 goto no_stats_entry;
2961 }
JP Abgrall053e3102011-06-20 12:41:46 -07002962 /*
2963 * TODO: add support counter hacking
2964 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2965 */
2966 return 0;
2967
2968no_stats_entry:
2969 remove_proc_entry("ctrl", *res_procdir);
2970no_ctrl_entry:
2971 remove_proc_entry("xt_qtaguid", NULL);
2972no_dir:
2973 return ret;
2974}
2975
2976static struct xt_match qtaguid_mt_reg __read_mostly = {
2977 /*
2978 * This module masquerades as the "owner" module so that iptables
2979 * tools can deal with it.
2980 */
2981 .name = "owner",
2982 .revision = 1,
2983 .family = NFPROTO_UNSPEC,
2984 .match = qtaguid_mt,
2985 .matchsize = sizeof(struct xt_qtaguid_match_info),
2986 .me = THIS_MODULE,
2987};
2988
2989static int __init qtaguid_mt_init(void)
2990{
2991 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2992 || iface_stat_init(xt_qtaguid_procdir)
2993 || xt_register_match(&qtaguid_mt_reg)
2994 || misc_register(&qtu_device))
2995 return -1;
2996 return 0;
2997}
2998
2999/*
3000 * TODO: allow unloading of the module.
3001 * For now stats are permanent.
3002 * Kconfig forces'y/n' and never an 'm'.
3003 */
3004
3005module_init(qtaguid_mt_init);
3006MODULE_AUTHOR("jpa <jpa@google.com>");
3007MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3008MODULE_LICENSE("GPL");
3009MODULE_ALIAS("ipt_owner");
3010MODULE_ALIAS("ip6t_owner");
3011MODULE_ALIAS("ipt_qtaguid");
3012MODULE_ALIAS("ip6t_qtaguid");