blob: 1f2fe3903cff2f7189c8836566f826510adedb17 [file] [log] [blame]
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +01001/* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14
15 Transformations on dictionary words.
16*/
17
18#ifndef BROTLI_DEC_TRANSFORM_H_
19#define BROTLI_DEC_TRANSFORM_H_
20
21#include <stdio.h>
22#include <ctype.h>
23#include "./types.h"
24
25#if defined(__cplusplus) || defined(c_plusplus)
26extern "C" {
27#endif
28
29enum WordTransformType {
30 kIdentity = 0,
Zoltan Szabadka0829e372014-03-25 16:48:25 +010031 kOmitLast1 = 1,
32 kOmitLast2 = 2,
33 kOmitLast3 = 3,
34 kOmitLast4 = 4,
35 kOmitLast5 = 5,
36 kOmitLast6 = 6,
37 kOmitLast7 = 7,
38 kOmitLast8 = 8,
39 kOmitLast9 = 9,
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +010040 kUppercaseFirst = 10,
Zoltan Szabadka0829e372014-03-25 16:48:25 +010041 kUppercaseAll = 11,
42 kOmitFirst1 = 12,
43 kOmitFirst2 = 13,
44 kOmitFirst3 = 14,
45 kOmitFirst4 = 15,
46 kOmitFirst5 = 16,
47 kOmitFirst6 = 17,
48 kOmitFirst7 = 18,
49 kOmitFirst8 = 19,
50 kOmitFirst9 = 20
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +010051};
52
53typedef struct {
54 const char* prefix;
55 enum WordTransformType transform;
56 const char* suffix;
57} Transform;
58
59static const Transform kTransforms[] = {
Zoltan Szabadka0829e372014-03-25 16:48:25 +010060 { "", kIdentity, "" },
61 { "", kIdentity, " " },
62 { " ", kIdentity, " " },
63 { "", kOmitFirst1, "" },
64 { "", kUppercaseFirst, " " },
65 { "", kIdentity, " the " },
66 { " ", kIdentity, "" },
67 { "s ", kIdentity, " " },
68 { "", kIdentity, " of " },
69 { "", kUppercaseFirst, "" },
70 { "", kIdentity, " and " },
71 { "", kOmitFirst2, "" },
72 { "", kOmitLast1, "" },
73 { ", ", kIdentity, " " },
74 { "", kIdentity, ", " },
75 { " ", kUppercaseFirst, " " },
76 { "", kIdentity, " in " },
77 { "", kIdentity, " to " },
78 { "e ", kIdentity, " " },
79 { "", kIdentity, "\"" },
80 { "", kIdentity, "." },
81 { "", kIdentity, "\">" },
82 { "", kIdentity, "\n" },
83 { "", kOmitLast3, "" },
84 { "", kIdentity, "]" },
85 { "", kIdentity, " for " },
86 { "", kOmitFirst3, "" },
87 { "", kOmitLast2, "" },
88 { "", kIdentity, " a " },
89 { "", kIdentity, " that " },
90 { " ", kUppercaseFirst, "" },
91 { "", kIdentity, ". " },
92 { ".", kIdentity, "" },
93 { " ", kIdentity, ", " },
94 { "", kOmitFirst4, "" },
95 { "", kIdentity, " with " },
96 { "", kIdentity, "'" },
97 { "", kIdentity, " from " },
98 { "", kIdentity, " by " },
99 { "", kOmitFirst5, "" },
100 { "", kOmitFirst6, "" },
101 { " the ", kIdentity, "" },
102 { "", kOmitLast4, "" },
103 { "", kIdentity, ". The " },
104 { "", kUppercaseAll, "" },
105 { "", kIdentity, " on " },
106 { "", kIdentity, " as " },
107 { "", kIdentity, " is " },
108 { "", kOmitLast7, "" },
109 { "", kOmitLast1, "ing " },
110 { "", kIdentity, "\n\t" },
111 { "", kIdentity, ":" },
112 { " ", kIdentity, ". " },
113 { "", kIdentity, "ed " },
114 { "", kOmitFirst9, "" },
115 { "", kOmitFirst7, "" },
116 { "", kOmitLast6, "" },
117 { "", kIdentity, "(" },
118 { "", kUppercaseFirst, ", " },
119 { "", kOmitLast8, "" },
120 { "", kIdentity, " at " },
121 { "", kIdentity, "ly " },
122 { " the ", kIdentity, " of " },
123 { "", kOmitLast5, "" },
124 { "", kOmitLast9, "" },
125 { " ", kUppercaseFirst, ", " },
126 { "", kUppercaseFirst, "\"" },
127 { ".", kIdentity, "(" },
128 { "", kUppercaseAll, " " },
129 { "", kUppercaseFirst, "\">" },
130 { "", kIdentity, "=\"" },
131 { " ", kIdentity, "." },
132 { ".com/", kIdentity, "" },
133 { " the ", kIdentity, " of the " },
134 { "", kUppercaseFirst, "'" },
135 { "", kIdentity, ". This " },
136 { "", kIdentity, "," },
137 { ".", kIdentity, " " },
138 { "", kUppercaseFirst, "(" },
139 { "", kUppercaseFirst, "." },
140 { "", kIdentity, " not " },
141 { " ", kIdentity, "=\"" },
142 { "", kIdentity, "er " },
143 { " ", kUppercaseAll, " " },
144 { "", kIdentity, "al " },
145 { " ", kUppercaseAll, "" },
146 { "", kIdentity, "='" },
147 { "", kUppercaseAll, "\"" },
148 { "", kUppercaseFirst, ". " },
149 { " ", kIdentity, "(" },
150 { "", kIdentity, "ful " },
151 { " ", kUppercaseFirst, ". " },
152 { "", kIdentity, "ive " },
153 { "", kIdentity, "less " },
154 { "", kUppercaseAll, "'" },
155 { "", kIdentity, "est " },
156 { " ", kUppercaseFirst, "." },
157 { "", kUppercaseAll, "\">" },
158 { " ", kIdentity, "='" },
159 { "", kUppercaseFirst, "," },
160 { "", kIdentity, "ize " },
161 { "", kUppercaseAll, "." },
162 { "\xc2\xa0", kIdentity, "" },
163 { " ", kIdentity, "," },
164 { "", kUppercaseFirst, "=\"" },
165 { "", kUppercaseAll, "=\"" },
166 { "", kIdentity, "ous " },
167 { "", kUppercaseAll, ", " },
168 { "", kUppercaseFirst, "='" },
169 { " ", kUppercaseFirst, "," },
170 { " ", kUppercaseAll, "=\"" },
171 { " ", kUppercaseAll, ", " },
172 { "", kUppercaseAll, "," },
173 { "", kUppercaseAll, "(" },
174 { "", kUppercaseAll, ". " },
175 { " ", kUppercaseAll, "." },
176 { "", kUppercaseAll, "='" },
177 { " ", kUppercaseAll, ". " },
178 { " ", kUppercaseFirst, "=\"" },
179 { " ", kUppercaseAll, "='" },
180 { " ", kUppercaseFirst, "='" },
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100181};
182
183static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
184
185static int ToUpperCase(uint8_t *p, int len) {
186 if (p[0] < 0xc0) {
187 if (p[0] >= 'a' && p[0] <= 'z') {
188 p[0] ^= 32;
189 }
190 return 1;
191 }
192 /* An overly simplified uppercasing model for utf-8. */
193 if (p[0] < 0xe0) {
194 p[1] ^= 32;
195 return 2;
196 }
197 /* An arbitrary transform for three byte characters. */
198 p[2] ^= 5;
199 return 3;
200}
201
202static BROTLI_INLINE int TransformDictionaryWord(
203 uint8_t* dst, const uint8_t* word, int len, int transform) {
204 const char* prefix = kTransforms[transform].prefix;
205 const char* suffix = kTransforms[transform].suffix;
206 const int t = kTransforms[transform].transform;
Zoltan Szabadka0829e372014-03-25 16:48:25 +0100207 int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100208 int idx = 0;
209 int i = 0;
210 uint8_t* uppercase;
Zoltan Szabadka0829e372014-03-25 16:48:25 +0100211 if (skip > len) {
212 skip = len;
213 }
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100214 while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
Zoltan Szabadka0829e372014-03-25 16:48:25 +0100215 word += skip;
216 len -= skip;
217 if (t <= kOmitLast9) {
Zoltan Szabadka2733d6c2014-02-17 14:25:36 +0100218 len -= t;
219 }
220 while (i < len) { dst[idx++] = word[i++]; }
221 uppercase = &dst[idx - len];
222 if (t == kUppercaseFirst) {
223 ToUpperCase(uppercase, len);
224 } else if (t == kUppercaseAll) {
225 while (len > 0) {
226 int step = ToUpperCase(uppercase, len);
227 uppercase += step;
228 len -= step;
229 }
230 }
231 while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
232 return idx;
233}
234
235#if defined(__cplusplus) || defined(c_plusplus)
236} /* extern "C" */
237#endif
238
239#endif /* BROTLI_DEC_TRANSFORM_H_ */