blob: c7052707a6a40d4889cb54f29c952d344c5d118d [file] [log] [blame]
JP Abgrallbaf0db42011-06-20 12:41:46 -07001/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __XT_QTAGUID_INTERNAL_H__
11#define __XT_QTAGUID_INTERNAL_H__
12
13#include <linux/types.h>
14#include <linux/rbtree.h>
15#include <linux/spinlock_types.h>
16#include <linux/workqueue.h>
17
18/* Iface handling */
19#define IDEBUG_MASK (1<<0)
20/* Iptable Matching. Per packet. */
21#define MDEBUG_MASK (1<<1)
22/* Red-black tree handling. Per packet. */
23#define RDEBUG_MASK (1<<2)
24/* procfs ctrl/stats handling */
25#define CDEBUG_MASK (1<<3)
26/* dev and resource tracking */
27#define DDEBUG_MASK (1<<4)
28
29/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
30#define DEFAULT_DEBUG_MASK 0
31
32/*
33 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
34 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
35 */
36#define IDEBUG
37#define MDEBUG
38#define RDEBUG
39#define CDEBUG
40#define DDEBUG
41
42#define MSK_DEBUG(mask, ...) do { \
43 if (unlikely(qtaguid_debug_mask & (mask))) \
44 pr_debug(__VA_ARGS__); \
45 } while (0)
46#ifdef IDEBUG
47#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
48#else
49#define IF_DEBUG(...) no_printk(__VA_ARGS__)
50#endif
51#ifdef MDEBUG
52#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
53#else
54#define MT_DEBUG(...) no_printk(__VA_ARGS__)
55#endif
56#ifdef RDEBUG
57#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
58#else
59#define RB_DEBUG(...) no_printk(__VA_ARGS__)
60#endif
61#ifdef CDEBUG
62#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
63#else
64#define CT_DEBUG(...) no_printk(__VA_ARGS__)
65#endif
66#ifdef DDEBUG
67#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
68#else
69#define DR_DEBUG(...) no_printk(__VA_ARGS__)
70#endif
71
72extern uint qtaguid_debug_mask;
73
74/*---------------------------------------------------------------------------*/
75/*
76 * Tags:
77 *
78 * They represent what the data usage counters will be tracked against.
79 * By default a tag is just based on the UID.
80 * The UID is used as the base for policing, and can not be ignored.
81 * So a tag will always at least represent a UID (uid_tag).
82 *
83 * A tag can be augmented with an "accounting tag" which is associated
84 * with a UID.
85 * User space can set the acct_tag portion of the tag which is then used
86 * with sockets: all data belonging to that socket will be counted against the
87 * tag. The policing is then based on the tag's uid_tag portion,
88 * and stats are collected for the acct_tag portion separately.
89 *
90 * There could be
91 * a: {acct_tag=1, uid_tag=10003}
92 * b: {acct_tag=2, uid_tag=10003}
93 * c: {acct_tag=3, uid_tag=10003}
94 * d: {acct_tag=0, uid_tag=10003}
95 * a, b, and c represent tags associated with specific sockets.
96 * d is for the totals for that uid, including all untagged traffic.
97 * Typically d is used with policing/quota rules.
98 *
99 * We want tag_t big enough to distinguish uid_t and acct_tag.
100 * It might become a struct if needed.
101 * Nothing should be using it as an int.
102 */
103typedef uint64_t tag_t; /* Only used via accessors */
104
105#define TAG_UID_MASK 0xFFFFFFFFULL
106#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
107
108static inline int tag_compare(tag_t t1, tag_t t2)
109{
110 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
111}
112
113static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
114{
115 return acct_tag | uid;
116}
117static inline tag_t make_tag_from_uid(uid_t uid)
118{
119 return uid;
120}
121static inline uid_t get_uid_from_tag(tag_t tag)
122{
123 return tag & TAG_UID_MASK;
124}
125static inline tag_t get_utag_from_tag(tag_t tag)
126{
127 return tag & TAG_UID_MASK;
128}
129static inline tag_t get_atag_from_tag(tag_t tag)
130{
131 return tag & TAG_ACCT_MASK;
132}
133
134static inline bool valid_atag(tag_t tag)
135{
136 return !(tag & TAG_UID_MASK);
137}
138static inline tag_t make_atag_from_value(uint32_t value)
139{
140 return (uint64_t)value << 32;
141}
142/*---------------------------------------------------------------------------*/
143
144/*
145 * Maximum number of socket tags that a UID is allowed to have active.
146 * Multiple processes belonging to the same UID contribute towards this limit.
147 * Special UIDs that can impersonate a UID also contribute (e.g. download
148 * manager, ...)
149 */
150#define DEFAULT_MAX_SOCK_TAGS 1024
151
152/*
153 * For now we only track 2 sets of counters.
154 * The default set is 0.
155 * Userspace can activate another set for a given uid being tracked.
156 */
157#define IFS_MAX_COUNTER_SETS 2
158
159enum ifs_tx_rx {
160 IFS_TX,
161 IFS_RX,
162 IFS_MAX_DIRECTIONS
163};
164
165/* For now, TCP, UDP, the rest */
166enum ifs_proto {
167 IFS_TCP,
168 IFS_UDP,
169 IFS_PROTO_OTHER,
170 IFS_MAX_PROTOS
171};
172
173struct byte_packet_counters {
174 uint64_t bytes;
175 uint64_t packets;
176};
177
178struct data_counters {
179 struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
180};
181
JP Abgrall87f93e82013-01-28 16:50:44 -0800182static inline uint64_t dc_sum_bytes(struct data_counters *counters,
183 int set,
184 enum ifs_tx_rx direction)
185{
186 return counters->bpc[set][direction][IFS_TCP].bytes
187 + counters->bpc[set][direction][IFS_UDP].bytes
188 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
189}
190
191static inline uint64_t dc_sum_packets(struct data_counters *counters,
192 int set,
193 enum ifs_tx_rx direction)
194{
195 return counters->bpc[set][direction][IFS_TCP].packets
196 + counters->bpc[set][direction][IFS_UDP].packets
197 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
198}
199
200
JP Abgrallbaf0db42011-06-20 12:41:46 -0700201/* Generic X based nodes used as a base for rb_tree ops */
202struct tag_node {
203 struct rb_node node;
204 tag_t tag;
205};
206
207struct tag_stat {
208 struct tag_node tn;
209 struct data_counters counters;
210 /*
211 * If this tag is acct_tag based, we need to count against the
212 * matching parent uid_tag.
213 */
214 struct data_counters *parent_counters;
215};
216
217struct iface_stat {
218 struct list_head list; /* in iface_stat_list */
219 char *ifname;
220 bool active;
221 /* net_dev is only valid for active iface_stat */
222 struct net_device *net_dev;
223
JP Abgrall9e0858c2012-04-27 12:57:39 -0700224 struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
JP Abgrall87f93e82013-01-28 16:50:44 -0800225 struct data_counters totals_via_skb;
JP Abgrallbaf0db42011-06-20 12:41:46 -0700226 /*
227 * We keep the last_known, because some devices reset their counters
228 * just before NETDEV_UP, while some will reset just before
229 * NETDEV_REGISTER (which is more normal).
230 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
231 * its current dev stats smaller that what was previously known, we
232 * assume an UNREGISTER and just use the last_known.
233 */
234 struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
235 /* last_known is usable when last_known_valid is true */
236 bool last_known_valid;
237
238 struct proc_dir_entry *proc_ptr;
239
240 struct rb_root tag_stat_tree;
241 spinlock_t tag_stat_list_lock;
242};
243
244/* This is needed to create proc_dir_entries from atomic context. */
245struct iface_stat_work {
246 struct work_struct iface_work;
247 struct iface_stat *iface_entry;
248};
249
250/*
251 * Track tag that this socket is transferring data for, and not necessarily
252 * the uid that owns the socket.
253 * This is the tag against which tag_stat.counters will be billed.
254 * These structs need to be looked up by sock and pid.
255 */
256struct sock_tag {
257 struct rb_node sock_node;
258 struct sock *sk; /* Only used as a number, never dereferenced */
JP Abgrallbaf0db42011-06-20 12:41:46 -0700259 /* Used to associate with a given pid */
260 struct list_head list; /* in proc_qtu_data.sock_tag_list */
261 pid_t pid;
262
263 tag_t tag;
264};
265
266struct qtaguid_event_counts {
267 /* Various successful events */
268 atomic64_t sockets_tagged;
269 atomic64_t sockets_untagged;
270 atomic64_t counter_set_changes;
271 atomic64_t delete_cmds;
272 atomic64_t iface_events; /* Number of NETDEV_* events handled */
273
274 atomic64_t match_calls; /* Number of times iptables called mt */
JP Abgrall9e0858c2012-04-27 12:57:39 -0700275 /* Number of times iptables called mt from pre or post routing hooks */
276 atomic64_t match_calls_prepost;
JP Abgrallbaf0db42011-06-20 12:41:46 -0700277 /*
278 * match_found_sk_*: numbers related to the netfilter matching
279 * function finding a sock for the sk_buff.
280 * Total skbs processed is sum(match_found*).
281 */
282 atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
283 /* The connection tracker had or didn't have the sk. */
284 atomic64_t match_found_sk_in_ct;
285 atomic64_t match_found_no_sk_in_ct;
286 /*
287 * No sk could be found. No apparent owner. Could happen with
288 * unsolicited traffic.
289 */
290 atomic64_t match_no_sk;
291 /*
Chenbo Fenge63ae192017-04-20 18:54:13 -0700292 * The file ptr in the sk_socket wasn't there and we couldn't get GID.
JP Abgrallbaf0db42011-06-20 12:41:46 -0700293 * This might happen for traffic while the socket is being closed.
294 */
Chenbo Fenge63ae192017-04-20 18:54:13 -0700295 atomic64_t match_no_sk_gid;
JP Abgrallbaf0db42011-06-20 12:41:46 -0700296};
297
298/* Track the set active_set for the given tag. */
299struct tag_counter_set {
300 struct tag_node tn;
301 int active_set;
302};
303
304/*----------------------------------------------*/
305/*
306 * The qtu uid data is used to track resources that are created directly or
307 * indirectly by processes (uid tracked).
308 * It is shared by the processes with the same uid.
309 * Some of the resource will be counted to prevent further rogue allocations,
310 * some will need freeing once the owner process (uid) exits.
311 */
312struct uid_tag_data {
313 struct rb_node node;
314 uid_t uid;
315
316 /*
317 * For the uid, how many accounting tags have been set.
318 */
319 int num_active_tags;
320 /* Track the number of proc_qtu_data that reference it */
321 int num_pqd;
322 struct rb_root tag_ref_tree;
323 /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
324};
325
326struct tag_ref {
327 struct tag_node tn;
328
329 /*
330 * This tracks the number of active sockets that have a tag on them
331 * which matches this tag_ref.tn.tag.
332 * A tag ref can live on after the sockets are untagged.
333 * A tag ref can only be removed during a tag delete command.
334 */
335 int num_sock_tags;
336};
337
338struct proc_qtu_data {
339 struct rb_node node;
340 pid_t pid;
341
342 struct uid_tag_data *parent_tag_data;
343
344 /* Tracks the sock_tags that need freeing upon this proc's death */
345 struct list_head sock_tag_list;
346 /* No spinlock_t sock_tag_list_lock; use the global one. */
347};
348
349/*----------------------------------------------*/
350#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */