blob: b4b106eeb9eccf97c45d089f9923f6282272fab1 [file] [log] [blame]
Lorenzo Colitti9353be22014-12-03 15:18:29 +09001/*
2 * Copyright 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * ring.c - packet ring buffer functions
17 */
18
Lorenzo Colitti9353be22014-12-03 15:18:29 +090019#include <arpa/inet.h>
junyulaic4e591a2018-11-26 22:36:10 +090020#include <errno.h>
Lorenzo Colitti9353be22014-12-03 15:18:29 +090021#include <linux/if.h>
22#include <linux/if_packet.h>
junyulaic4e591a2018-11-26 22:36:10 +090023#include <string.h>
24#include <sys/mman.h>
25#include <sys/socket.h>
Lorenzo Colitti9353be22014-12-03 15:18:29 +090026
Maciej Żenczykowski551367e2020-06-03 08:08:21 +000027#include "config.h"
Lorenzo Colitti9353be22014-12-03 15:18:29 +090028#include "logging.h"
29#include "ring.h"
30#include "translate.h"
Lorenzo Colitti9353be22014-12-03 15:18:29 +090031
Subash Abhinov Kasiviswanathan5c5b5342015-10-26 18:45:04 -060032#define TP_STATUS_CSUM_UNNECESSARY (1 << 7)
33
Lorenzo Colitti9353be22014-12-03 15:18:29 +090034int ring_create(struct tun_data *tunnel) {
Maciej Żenczykowski69c840f2019-11-18 12:00:49 -080035 // Will eventually be bound to htons(ETH_P_IPV6) protocol,
36 // but only after appropriate bpf filter is attached.
37 int packetsock = socket(AF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
Lorenzo Colitti9353be22014-12-03 15:18:29 +090038 if (packetsock < 0) {
39 logmsg(ANDROID_LOG_FATAL, "packet socket failed: %s", strerror(errno));
40 return -1;
41 }
42
43 int ver = TPACKET_V2;
junyulaic4e591a2018-11-26 22:36:10 +090044 if (setsockopt(packetsock, SOL_PACKET, PACKET_VERSION, (void *)&ver, sizeof(ver))) {
Lorenzo Colitti9353be22014-12-03 15:18:29 +090045 logmsg(ANDROID_LOG_FATAL, "setsockopt(PACKET_VERSION, %d) failed: %s", ver, strerror(errno));
46 return -1;
47 }
48
49 int on = 1;
junyulaic4e591a2018-11-26 22:36:10 +090050 if (setsockopt(packetsock, SOL_PACKET, PACKET_LOSS, (void *)&on, sizeof(on))) {
Lorenzo Colitti9353be22014-12-03 15:18:29 +090051 logmsg(ANDROID_LOG_WARN, "PACKET_LOSS failed: %s", strerror(errno));
52 }
53
54 struct packet_ring *ring = &tunnel->ring;
junyulaic4e591a2018-11-26 22:36:10 +090055 ring->numblocks = TP_NUM_BLOCKS;
Lorenzo Colitti9353be22014-12-03 15:18:29 +090056
57 int total_frames = TP_FRAMES * ring->numblocks;
58
59 struct tpacket_req req = {
junyulaic4e591a2018-11-26 22:36:10 +090060 .tp_frame_size = TP_FRAME_SIZE, // Frame size.
61 .tp_block_size = TP_BLOCK_SIZE, // Frames per block.
62 .tp_block_nr = ring->numblocks, // Number of blocks.
63 .tp_frame_nr = total_frames, // Total frames.
Lorenzo Colitti9353be22014-12-03 15:18:29 +090064 };
65
66 if (setsockopt(packetsock, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req)) < 0) {
67 logmsg(ANDROID_LOG_FATAL, "PACKET_RX_RING failed: %s", strerror(errno));
68 return -1;
69 }
70
71 size_t buflen = TP_BLOCK_SIZE * ring->numblocks;
junyulaic4e591a2018-11-26 22:36:10 +090072 ring->base = mmap(NULL, buflen, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED | MAP_POPULATE,
Lorenzo Colitti9353be22014-12-03 15:18:29 +090073 packetsock, 0);
74 if (ring->base == MAP_FAILED) {
75 logmsg(ANDROID_LOG_FATAL, "mmap %lu failed: %s", buflen, strerror(errno));
76 return -1;
77 }
78
junyulaic4e591a2018-11-26 22:36:10 +090079 ring->block = 0;
80 ring->slot = 0;
Lorenzo Colitti9353be22014-12-03 15:18:29 +090081 ring->numslots = TP_BLOCK_SIZE / TP_FRAME_SIZE;
junyulaic4e591a2018-11-26 22:36:10 +090082 ring->next = (struct tpacket2_hdr *)ring->base;
Lorenzo Colitti9353be22014-12-03 15:18:29 +090083
junyulaic4e591a2018-11-26 22:36:10 +090084 logmsg(ANDROID_LOG_INFO, "Using ring buffer with %d frames (%d bytes) at %p", total_frames,
85 buflen, ring->base);
Lorenzo Colitti9353be22014-12-03 15:18:29 +090086
87 return packetsock;
88}
89
90/* function: ring_advance
91 * advances to the next position in the packet ring
92 * ring - packet ring buffer
93 */
junyulaic4e591a2018-11-26 22:36:10 +090094static struct tpacket2_hdr *ring_advance(struct packet_ring *ring) {
95 uint8_t *next = (uint8_t *)ring->next;
Lorenzo Colitti9353be22014-12-03 15:18:29 +090096
97 ring->slot++;
98 next += TP_FRAME_SIZE;
99
100 if (ring->slot == ring->numslots) {
101 ring->slot = 0;
102 ring->block++;
103
104 if (ring->block < ring->numblocks) {
105 next += TP_FRAME_GAP;
106 } else {
107 ring->block = 0;
junyulaic4e591a2018-11-26 22:36:10 +0900108 next = (uint8_t *)ring->base;
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900109 }
110 }
111
junyulaic4e591a2018-11-26 22:36:10 +0900112 ring->next = (struct tpacket2_hdr *)next;
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900113 return ring->next;
114}
115
116/* function: ring_read
117 * reads a packet from the ring buffer and translates it
118 * read_fd - file descriptor to read original packet from
119 * write_fd - file descriptor to write translated packet to
120 * to_ipv6 - whether the packet is to be translated to ipv6 or ipv4
121 */
122void ring_read(struct packet_ring *ring, int write_fd, int to_ipv6) {
123 struct tpacket2_hdr *tp = ring->next;
Subash Abhinov Kasiviswanathan5c5b5342015-10-26 18:45:04 -0600124 uint16_t val = TP_CSUM_NONE;
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900125 if (tp->tp_status & TP_STATUS_USER) {
Subash Abhinov Kasiviswanathan5c5b5342015-10-26 18:45:04 -0600126 //We expect only GRO coalesced packets to have TP_STATUS_CSUMNOTREADY
127 //(ip_summed = CHECKSUM_PARTIAL) in this path. Note that these packets have already gone
128 //through checksum validation in GRO engine. CHECKSUM_PARTIAL is defined to be 3 while
129 //CHECKSUM_UNNECESSARY is defined to be 1.
130 //Kernel only checks for CHECKSUM_UNNECESSARY (TP_CSUM_UNNECESSARY) bit while processing a
131 //packet, so its ok to pass only this bit rather than the full ip_summed field.
132 if ((tp->tp_status & TP_STATUS_CSUMNOTREADY) || (tp->tp_status & TP_STATUS_CSUM_UNNECESSARY)) {
133 val = TP_CSUM_UNNECESSARY;
134 }
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900135 uint8_t *packet = ((uint8_t *) tp) + tp->tp_net;
Subash Abhinov Kasiviswanathan5c5b5342015-10-26 18:45:04 -0600136 translate_packet(write_fd, to_ipv6, packet, tp->tp_len, val);
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900137 tp->tp_status = TP_STATUS_KERNEL;
junyulaic4e591a2018-11-26 22:36:10 +0900138 tp = ring_advance(ring);
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900139 }
140}