JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Kernel iptables module to track stats for packets based on user tags. |
| 3 | * |
| 4 | * (C) 2011 Google, Inc |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License version 2 as |
| 8 | * published by the Free Software Foundation. |
| 9 | */ |
| 10 | #ifndef __XT_QTAGUID_INTERNAL_H__ |
| 11 | #define __XT_QTAGUID_INTERNAL_H__ |
| 12 | |
| 13 | #include <linux/types.h> |
| 14 | #include <linux/rbtree.h> |
| 15 | #include <linux/spinlock_types.h> |
| 16 | #include <linux/workqueue.h> |
| 17 | |
| 18 | /* Iface handling */ |
| 19 | #define IDEBUG_MASK (1<<0) |
| 20 | /* Iptable Matching. Per packet. */ |
| 21 | #define MDEBUG_MASK (1<<1) |
| 22 | /* Red-black tree handling. Per packet. */ |
| 23 | #define RDEBUG_MASK (1<<2) |
| 24 | /* procfs ctrl/stats handling */ |
| 25 | #define CDEBUG_MASK (1<<3) |
| 26 | /* dev and resource tracking */ |
| 27 | #define DDEBUG_MASK (1<<4) |
| 28 | |
| 29 | /* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ |
| 30 | #define DEFAULT_DEBUG_MASK 0 |
| 31 | |
| 32 | /* |
| 33 | * (Un)Define these *DEBUG to compile out/in the pr_debug calls. |
| 34 | * All undef: text size ~ 0x3030; all def: ~ 0x4404. |
| 35 | */ |
| 36 | #define IDEBUG |
| 37 | #define MDEBUG |
| 38 | #define RDEBUG |
| 39 | #define CDEBUG |
| 40 | #define DDEBUG |
| 41 | |
| 42 | #define MSK_DEBUG(mask, ...) do { \ |
| 43 | if (unlikely(qtaguid_debug_mask & (mask))) \ |
| 44 | pr_debug(__VA_ARGS__); \ |
| 45 | } while (0) |
| 46 | #ifdef IDEBUG |
| 47 | #define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) |
| 48 | #else |
| 49 | #define IF_DEBUG(...) no_printk(__VA_ARGS__) |
| 50 | #endif |
| 51 | #ifdef MDEBUG |
| 52 | #define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) |
| 53 | #else |
| 54 | #define MT_DEBUG(...) no_printk(__VA_ARGS__) |
| 55 | #endif |
| 56 | #ifdef RDEBUG |
| 57 | #define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) |
| 58 | #else |
| 59 | #define RB_DEBUG(...) no_printk(__VA_ARGS__) |
| 60 | #endif |
| 61 | #ifdef CDEBUG |
| 62 | #define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) |
| 63 | #else |
| 64 | #define CT_DEBUG(...) no_printk(__VA_ARGS__) |
| 65 | #endif |
| 66 | #ifdef DDEBUG |
| 67 | #define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) |
| 68 | #else |
| 69 | #define DR_DEBUG(...) no_printk(__VA_ARGS__) |
| 70 | #endif |
| 71 | |
| 72 | extern uint qtaguid_debug_mask; |
| 73 | |
| 74 | /*---------------------------------------------------------------------------*/ |
| 75 | /* |
| 76 | * Tags: |
| 77 | * |
| 78 | * They represent what the data usage counters will be tracked against. |
| 79 | * By default a tag is just based on the UID. |
| 80 | * The UID is used as the base for policing, and can not be ignored. |
| 81 | * So a tag will always at least represent a UID (uid_tag). |
| 82 | * |
| 83 | * A tag can be augmented with an "accounting tag" which is associated |
| 84 | * with a UID. |
| 85 | * User space can set the acct_tag portion of the tag which is then used |
| 86 | * with sockets: all data belonging to that socket will be counted against the |
| 87 | * tag. The policing is then based on the tag's uid_tag portion, |
| 88 | * and stats are collected for the acct_tag portion separately. |
| 89 | * |
| 90 | * There could be |
| 91 | * a: {acct_tag=1, uid_tag=10003} |
| 92 | * b: {acct_tag=2, uid_tag=10003} |
| 93 | * c: {acct_tag=3, uid_tag=10003} |
| 94 | * d: {acct_tag=0, uid_tag=10003} |
| 95 | * a, b, and c represent tags associated with specific sockets. |
| 96 | * d is for the totals for that uid, including all untagged traffic. |
| 97 | * Typically d is used with policing/quota rules. |
| 98 | * |
| 99 | * We want tag_t big enough to distinguish uid_t and acct_tag. |
| 100 | * It might become a struct if needed. |
| 101 | * Nothing should be using it as an int. |
| 102 | */ |
| 103 | typedef uint64_t tag_t; /* Only used via accessors */ |
| 104 | |
| 105 | #define TAG_UID_MASK 0xFFFFFFFFULL |
| 106 | #define TAG_ACCT_MASK (~0xFFFFFFFFULL) |
| 107 | |
| 108 | static inline int tag_compare(tag_t t1, tag_t t2) |
| 109 | { |
| 110 | return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; |
| 111 | } |
| 112 | |
| 113 | static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) |
| 114 | { |
| 115 | return acct_tag | uid; |
| 116 | } |
| 117 | static inline tag_t make_tag_from_uid(uid_t uid) |
| 118 | { |
| 119 | return uid; |
| 120 | } |
| 121 | static inline uid_t get_uid_from_tag(tag_t tag) |
| 122 | { |
| 123 | return tag & TAG_UID_MASK; |
| 124 | } |
| 125 | static inline tag_t get_utag_from_tag(tag_t tag) |
| 126 | { |
| 127 | return tag & TAG_UID_MASK; |
| 128 | } |
| 129 | static inline tag_t get_atag_from_tag(tag_t tag) |
| 130 | { |
| 131 | return tag & TAG_ACCT_MASK; |
| 132 | } |
| 133 | |
| 134 | static inline bool valid_atag(tag_t tag) |
| 135 | { |
| 136 | return !(tag & TAG_UID_MASK); |
| 137 | } |
| 138 | static inline tag_t make_atag_from_value(uint32_t value) |
| 139 | { |
| 140 | return (uint64_t)value << 32; |
| 141 | } |
| 142 | /*---------------------------------------------------------------------------*/ |
| 143 | |
| 144 | /* |
| 145 | * Maximum number of socket tags that a UID is allowed to have active. |
| 146 | * Multiple processes belonging to the same UID contribute towards this limit. |
| 147 | * Special UIDs that can impersonate a UID also contribute (e.g. download |
| 148 | * manager, ...) |
| 149 | */ |
| 150 | #define DEFAULT_MAX_SOCK_TAGS 1024 |
| 151 | |
| 152 | /* |
| 153 | * For now we only track 2 sets of counters. |
| 154 | * The default set is 0. |
| 155 | * Userspace can activate another set for a given uid being tracked. |
| 156 | */ |
| 157 | #define IFS_MAX_COUNTER_SETS 2 |
| 158 | |
| 159 | enum ifs_tx_rx { |
| 160 | IFS_TX, |
| 161 | IFS_RX, |
| 162 | IFS_MAX_DIRECTIONS |
| 163 | }; |
| 164 | |
| 165 | /* For now, TCP, UDP, the rest */ |
| 166 | enum ifs_proto { |
| 167 | IFS_TCP, |
| 168 | IFS_UDP, |
| 169 | IFS_PROTO_OTHER, |
| 170 | IFS_MAX_PROTOS |
| 171 | }; |
| 172 | |
| 173 | struct byte_packet_counters { |
| 174 | uint64_t bytes; |
| 175 | uint64_t packets; |
| 176 | }; |
| 177 | |
| 178 | struct data_counters { |
| 179 | struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; |
| 180 | }; |
| 181 | |
JP Abgrall | 87f93e8 | 2013-01-28 16:50:44 -0800 | [diff] [blame] | 182 | static inline uint64_t dc_sum_bytes(struct data_counters *counters, |
| 183 | int set, |
| 184 | enum ifs_tx_rx direction) |
| 185 | { |
| 186 | return counters->bpc[set][direction][IFS_TCP].bytes |
| 187 | + counters->bpc[set][direction][IFS_UDP].bytes |
| 188 | + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; |
| 189 | } |
| 190 | |
| 191 | static inline uint64_t dc_sum_packets(struct data_counters *counters, |
| 192 | int set, |
| 193 | enum ifs_tx_rx direction) |
| 194 | { |
| 195 | return counters->bpc[set][direction][IFS_TCP].packets |
| 196 | + counters->bpc[set][direction][IFS_UDP].packets |
| 197 | + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; |
| 198 | } |
| 199 | |
| 200 | |
JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 201 | /* Generic X based nodes used as a base for rb_tree ops */ |
| 202 | struct tag_node { |
| 203 | struct rb_node node; |
| 204 | tag_t tag; |
| 205 | }; |
| 206 | |
| 207 | struct tag_stat { |
| 208 | struct tag_node tn; |
| 209 | struct data_counters counters; |
| 210 | /* |
| 211 | * If this tag is acct_tag based, we need to count against the |
| 212 | * matching parent uid_tag. |
| 213 | */ |
| 214 | struct data_counters *parent_counters; |
| 215 | }; |
| 216 | |
| 217 | struct iface_stat { |
| 218 | struct list_head list; /* in iface_stat_list */ |
| 219 | char *ifname; |
| 220 | bool active; |
| 221 | /* net_dev is only valid for active iface_stat */ |
| 222 | struct net_device *net_dev; |
| 223 | |
JP Abgrall | 9e0858c | 2012-04-27 12:57:39 -0700 | [diff] [blame] | 224 | struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; |
JP Abgrall | 87f93e8 | 2013-01-28 16:50:44 -0800 | [diff] [blame] | 225 | struct data_counters totals_via_skb; |
JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 226 | /* |
| 227 | * We keep the last_known, because some devices reset their counters |
| 228 | * just before NETDEV_UP, while some will reset just before |
| 229 | * NETDEV_REGISTER (which is more normal). |
| 230 | * So now, if the device didn't do a NETDEV_UNREGISTER and we see |
| 231 | * its current dev stats smaller that what was previously known, we |
| 232 | * assume an UNREGISTER and just use the last_known. |
| 233 | */ |
| 234 | struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; |
| 235 | /* last_known is usable when last_known_valid is true */ |
| 236 | bool last_known_valid; |
| 237 | |
| 238 | struct proc_dir_entry *proc_ptr; |
| 239 | |
| 240 | struct rb_root tag_stat_tree; |
| 241 | spinlock_t tag_stat_list_lock; |
| 242 | }; |
| 243 | |
| 244 | /* This is needed to create proc_dir_entries from atomic context. */ |
| 245 | struct iface_stat_work { |
| 246 | struct work_struct iface_work; |
| 247 | struct iface_stat *iface_entry; |
| 248 | }; |
| 249 | |
| 250 | /* |
| 251 | * Track tag that this socket is transferring data for, and not necessarily |
| 252 | * the uid that owns the socket. |
| 253 | * This is the tag against which tag_stat.counters will be billed. |
| 254 | * These structs need to be looked up by sock and pid. |
| 255 | */ |
| 256 | struct sock_tag { |
| 257 | struct rb_node sock_node; |
| 258 | struct sock *sk; /* Only used as a number, never dereferenced */ |
JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 259 | /* Used to associate with a given pid */ |
| 260 | struct list_head list; /* in proc_qtu_data.sock_tag_list */ |
| 261 | pid_t pid; |
| 262 | |
| 263 | tag_t tag; |
| 264 | }; |
| 265 | |
| 266 | struct qtaguid_event_counts { |
| 267 | /* Various successful events */ |
| 268 | atomic64_t sockets_tagged; |
| 269 | atomic64_t sockets_untagged; |
| 270 | atomic64_t counter_set_changes; |
| 271 | atomic64_t delete_cmds; |
| 272 | atomic64_t iface_events; /* Number of NETDEV_* events handled */ |
| 273 | |
| 274 | atomic64_t match_calls; /* Number of times iptables called mt */ |
JP Abgrall | 9e0858c | 2012-04-27 12:57:39 -0700 | [diff] [blame] | 275 | /* Number of times iptables called mt from pre or post routing hooks */ |
| 276 | atomic64_t match_calls_prepost; |
JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 277 | /* |
| 278 | * match_found_sk_*: numbers related to the netfilter matching |
| 279 | * function finding a sock for the sk_buff. |
| 280 | * Total skbs processed is sum(match_found*). |
| 281 | */ |
| 282 | atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ |
| 283 | /* The connection tracker had or didn't have the sk. */ |
| 284 | atomic64_t match_found_sk_in_ct; |
| 285 | atomic64_t match_found_no_sk_in_ct; |
| 286 | /* |
| 287 | * No sk could be found. No apparent owner. Could happen with |
| 288 | * unsolicited traffic. |
| 289 | */ |
| 290 | atomic64_t match_no_sk; |
| 291 | /* |
Chenbo Feng | 5d534f4 | 2017-04-20 18:54:13 -0700 | [diff] [blame] | 292 | * The file ptr in the sk_socket wasn't there and we couldn't get GID. |
JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 293 | * This might happen for traffic while the socket is being closed. |
| 294 | */ |
Chenbo Feng | 5d534f4 | 2017-04-20 18:54:13 -0700 | [diff] [blame] | 295 | atomic64_t match_no_sk_gid; |
JP Abgrall | baf0db4 | 2011-06-20 12:41:46 -0700 | [diff] [blame] | 296 | }; |
| 297 | |
| 298 | /* Track the set active_set for the given tag. */ |
| 299 | struct tag_counter_set { |
| 300 | struct tag_node tn; |
| 301 | int active_set; |
| 302 | }; |
| 303 | |
| 304 | /*----------------------------------------------*/ |
| 305 | /* |
| 306 | * The qtu uid data is used to track resources that are created directly or |
| 307 | * indirectly by processes (uid tracked). |
| 308 | * It is shared by the processes with the same uid. |
| 309 | * Some of the resource will be counted to prevent further rogue allocations, |
| 310 | * some will need freeing once the owner process (uid) exits. |
| 311 | */ |
| 312 | struct uid_tag_data { |
| 313 | struct rb_node node; |
| 314 | uid_t uid; |
| 315 | |
| 316 | /* |
| 317 | * For the uid, how many accounting tags have been set. |
| 318 | */ |
| 319 | int num_active_tags; |
| 320 | /* Track the number of proc_qtu_data that reference it */ |
| 321 | int num_pqd; |
| 322 | struct rb_root tag_ref_tree; |
| 323 | /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ |
| 324 | }; |
| 325 | |
| 326 | struct tag_ref { |
| 327 | struct tag_node tn; |
| 328 | |
| 329 | /* |
| 330 | * This tracks the number of active sockets that have a tag on them |
| 331 | * which matches this tag_ref.tn.tag. |
| 332 | * A tag ref can live on after the sockets are untagged. |
| 333 | * A tag ref can only be removed during a tag delete command. |
| 334 | */ |
| 335 | int num_sock_tags; |
| 336 | }; |
| 337 | |
| 338 | struct proc_qtu_data { |
| 339 | struct rb_node node; |
| 340 | pid_t pid; |
| 341 | |
| 342 | struct uid_tag_data *parent_tag_data; |
| 343 | |
| 344 | /* Tracks the sock_tags that need freeing upon this proc's death */ |
| 345 | struct list_head sock_tag_list; |
| 346 | /* No spinlock_t sock_tag_list_lock; use the global one. */ |
| 347 | }; |
| 348 | |
| 349 | /*----------------------------------------------*/ |
| 350 | #endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ |