Blame - feature-processor.h - platform/external/libtextclassifier

blob: 553bd1e01a6f0526e1587c87bf5fed5b4d1b7f10 [file] [log] [blame]

Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	1	/*
				2	* Copyright (C) 2017 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	// Feature processing for FFModel (feed-forward SmartSelection model).
				18
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	19	#ifndef LIBTEXTCLASSIFIER_FEATURE_PROCESSOR_H_
				20	#define LIBTEXTCLASSIFIER_FEATURE_PROCESSOR_H_
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	21
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	22	#include <map>
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	23	#include <memory>
Lukas Zilka	e5ea2ab	2017-10-11 10:50:05 +0200	[diff] [blame]	24	#include <set>
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	25	#include <string>
				26	#include <vector>
				27
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	28	#include "cached-features.h"
				29	#include "model_generated.h"
				30	#include "token-feature-extractor.h"
				31	#include "tokenizer.h"
				32	#include "types.h"
				33	#include "util/base/integral_types.h"
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	34	#include "util/base/logging.h"
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	35	#include "util/utf8/unicodetext.h"
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	36	#include "util/utf8/unilib.h"
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	37
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	38	namespace libtextclassifier2 {
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	39
				40	constexpr int kInvalidLabel = -1;
				41
				42	namespace internal {
				43
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	44	TokenFeatureExtractorOptions BuildTokenFeatureExtractorOptions(
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	45	const FeatureProcessorOptions* options);
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	46
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	47	// Splits tokens that contain the selection boundary inside them.
				48	// E.g. "foo{bar}@google.com" -> "foo", "bar", "@google.com"
				49	void SplitTokensOnSelectionBoundaries(CodepointSpan selection,
				50	std::vector<Token>* tokens);
				51
Matt Sharifi	be876dc	2017-03-17 17:02:43 +0100	[diff] [blame]	52	// Returns the index of token that corresponds to the codepoint span.
				53	int CenterTokenFromClick(CodepointSpan span, const std::vector<Token>& tokens);
				54
				55	// Returns the index of token that corresponds to the middle of the codepoint
				56	// span.
				57	int CenterTokenFromMiddleOfSelection(
				58	CodepointSpan span, const std::vector<Token>& selectable_tokens);
				59
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	60	// Strips the tokens from the tokens vector that are not used for feature
				61	// extraction because they are out of scope, or pads them so that there is
				62	// enough tokens in the required context_size for all inferences with a click
				63	// in relative_click_span.
				64	void StripOrPadTokens(TokenSpan relative_click_span, int context_size,
				65	std::vector<Token>* tokens, int* click_pos);
				66
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	67	// If unilib is not nullptr, just returns unilib. Otherwise, if unilib is
				68	// nullptr, will create UniLib, assign ownership to owned_unilib, and return it.
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	69	const UniLib* MaybeCreateUnilib(const UniLib* unilib,
				70	std::unique_ptr<UniLib>* owned_unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	71
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	72	} // namespace internal
				73
Lukas Zilka	40c18de	2017-04-10 17:22:22 +0200	[diff] [blame]	74	// Converts a codepoint span to a token span in the given list of tokens.
Lukas Zilka	726b4d2	2017-12-13 16:37:03 +0100	[diff] [blame]	75	// If snap_boundaries_to_containing_tokens is set to true, it is enough for a
				76	// token to overlap with the codepoint range to be considered part of it.
				77	// Otherwise it must be fully included in the range.
				78	TokenSpan CodepointSpanToTokenSpan(
				79	const std::vector<Token>& selectable_tokens, CodepointSpan codepoint_span,
				80	bool snap_boundaries_to_containing_tokens = false);
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	81
Lukas Zilka	40c18de	2017-04-10 17:22:22 +0200	[diff] [blame]	82	// Converts a token span to a codepoint span in the given list of tokens.
				83	CodepointSpan TokenSpanToCodepointSpan(
				84	const std::vector<Token>& selectable_tokens, TokenSpan token_span);
				85
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	86	// Takes care of preparing features for the span prediction model.
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	87	class FeatureProcessor {
				88	public:
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	89	// A cache mapping codepoint spans to embedded tokens features. An instance
				90	// can be provided to multiple calls to ExtractFeatures() operating on the
				91	// same context (the same codepoint spans corresponding to the same tokens),
				92	// as an optimization. Note that the tokenizations do not have to be
				93	// identical.
				94	typedef std::map<CodepointSpan, std::vector<float>> EmbeddingCache;
				95
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	96	// If unilib is nullptr, will create and own an instance of a UniLib,
				97	// otherwise will use what's passed in.
				98	explicit FeatureProcessor(const FeatureProcessorOptions* options,
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	99	const UniLib* unilib = nullptr)
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	100	: owned_unilib_(nullptr),
				101	unilib_(internal::MaybeCreateUnilib(unilib, &owned_unilib_)),
				102	feature_extractor_(internal::BuildTokenFeatureExtractorOptions(options),
				103	*unilib_),
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	104	options_(options),
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	105	tokenizer_(
				106	options->tokenization_codepoint_config() != nullptr
				107	? Tokenizer({options->tokenization_codepoint_config()->begin(),
				108	options->tokenization_codepoint_config()->end()},
				109	options->tokenize_on_script_change())
				110	: Tokenizer({}, /split_on_script_change=/false)) {
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	111	MakeLabelMaps();
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	112	if (options->supported_codepoint_ranges() != nullptr) {
				113	PrepareCodepointRanges({options->supported_codepoint_ranges()->begin(),
				114	options->supported_codepoint_ranges()->end()},
				115	&supported_codepoint_ranges_);
				116	}
				117	if (options->internal_tokenizer_codepoint_ranges() != nullptr) {
				118	PrepareCodepointRanges(
				119	{options->internal_tokenizer_codepoint_ranges()->begin(),
				120	options->internal_tokenizer_codepoint_ranges()->end()},
				121	&internal_tokenizer_codepoint_ranges_);
				122	}
Lukas Zilka	e5ea2ab	2017-10-11 10:50:05 +0200	[diff] [blame]	123	PrepareIgnoredSpanBoundaryCodepoints();
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	124	}
				125
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	126	// Tokenizes the input string using the selected tokenization method.
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	127	std::vector<Token> Tokenize(const std::string& text) const;
				128
				129	// Same as above but takes UnicodeText.
				130	std::vector<Token> Tokenize(const UnicodeText& text_unicode) const;
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	131
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	132	// Converts a label into a token span.
				133	bool LabelToTokenSpan(int label, TokenSpan* token_span) const;
				134
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	135	// Gets the total number of selection labels.
				136	int GetSelectionLabelCount() const { return label_to_selection_.size(); }
				137
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	138	// Gets the string value for given collection label.
				139	std::string LabelToCollection(int label) const;
				140
				141	// Gets the total number of collections of the model.
				142	int NumCollections() const { return collection_to_label_.size(); }
				143
				144	// Gets the name of the default collection.
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	145	std::string GetDefaultCollection() const;
				146
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	147	const FeatureProcessorOptions* GetOptions() const { return options_; }
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	148
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	149	// Retokenizes the context and input span, and finds the click position.
				150	// Depending on the options, might modify tokens (split them or remove them).
				151	void RetokenizeAndFindClick(const std::string& context,
				152	CodepointSpan input_span,
				153	bool only_use_line_with_click,
				154	std::vector<Token>* tokens, int* click_pos) const;
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	155
				156	// Same as above but takes UnicodeText.
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	157	void RetokenizeAndFindClick(const UnicodeText& context_unicode,
				158	CodepointSpan input_span,
				159	bool only_use_line_with_click,
				160	std::vector<Token>* tokens, int* click_pos) const;
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	161
				162	// Extracts features as a CachedFeatures object that can be used for repeated
				163	// inference over token spans in the given context.
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	164	bool ExtractFeatures(const std::vector<Token>& tokens, TokenSpan token_span,
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	165	CodepointSpan selection_span_for_feature,
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	166	const EmbeddingExecutor* embedding_executor,
				167	EmbeddingCache* embedding_cache, int feature_vector_size,
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	168	std::unique_ptr<CachedFeatures>* cached_features) const;
				169
				170	// Fills selection_label_spans with CodepointSpans that correspond to the
				171	// selection labels. The CodepointSpans are based on the codepoint ranges of
				172	// given tokens.
				173	bool SelectionLabelSpans(
				174	VectorSpan<Token> tokens,
				175	std::vector<CodepointSpan>* selection_label_spans) const;
				176
				177	int DenseFeaturesCount() const {
				178	return feature_extractor_.DenseFeaturesCount();
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	179	}
				180
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	181	int EmbeddingSize() const { return options_->embedding_size(); }
				182
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	183	// Splits context to several segments.
Lukas Zilka	726b4d2	2017-12-13 16:37:03 +0100	[diff] [blame]	184	std::vector<UnicodeTextRange> SplitContext(
				185	const UnicodeText& context_unicode) const;
				186
Lukas Zilka	e5ea2ab	2017-10-11 10:50:05 +0200	[diff] [blame]	187	// Strips boundary codepoints from the span in context and returns the new
				188	// start and end indices. If the span comprises entirely of boundary
				189	// codepoints, the first index of span is returned for both indices.
				190	CodepointSpan StripBoundaryCodepoints(const std::string& context,
				191	CodepointSpan span) const;
				192
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	193	// Same as above but takes UnicodeText.
				194	CodepointSpan StripBoundaryCodepoints(const UnicodeText& context_unicode,
				195	CodepointSpan span) const;
				196
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	197	protected:
Lukas Zilka	26e8c2e	2017-04-06 15:54:24 +0200	[diff] [blame]	198	// Represents a codepoint range [start, end).
				199	struct CodepointRange {
				200	int32 start;
				201	int32 end;
				202
				203	CodepointRange(int32 arg_start, int32 arg_end)
				204	: start(arg_start), end(arg_end) {}
				205	};
				206
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	207	// Returns the class id corresponding to the given string collection
				208	// identifier. There is a catch-all class id that the function returns for
				209	// unknown collections.
				210	int CollectionToLabel(const std::string& collection) const;
				211
				212	// Prepares mapping from collection names to labels.
				213	void MakeLabelMaps();
				214
				215	// Gets the number of spannable tokens for the model.
				216	//
				217	// Spannable tokens are those tokens of context, which the model predicts
				218	// selection spans over (i.e., there is 1:1 correspondence between the output
				219	// classes of the model and each of the spannable tokens).
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	220	int GetNumContextTokens() const { return options_->context_size() * 2 + 1; }
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	221
				222	// Converts a label into a span of codepoint indices corresponding to it
				223	// given output_tokens.
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	224	bool LabelToSpan(int label, const VectorSpan<Token>& output_tokens,
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	225	CodepointSpan* span) const;
				226
				227	// Converts a span to the corresponding label given output_tokens.
				228	bool SpanToLabel(const std::pair<CodepointIndex, CodepointIndex>& span,
				229	const std::vector<Token>& output_tokens, int* label) const;
				230
				231	// Converts a token span to the corresponding label.
				232	int TokenSpanToLabel(const std::pair<TokenIndex, TokenIndex>& span) const;
				233
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	234	void PrepareCodepointRanges(
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	235	const std::vector<const FeatureProcessorOptions_::CodepointRange*>&
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	236	codepoint_ranges,
				237	std::vector<CodepointRange>* prepared_codepoint_ranges);
Lukas Zilka	26e8c2e	2017-04-06 15:54:24 +0200	[diff] [blame]	238
				239	// Returns the ratio of supported codepoints to total number of codepoints in
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	240	// the given token span.
				241	float SupportedCodepointsRatio(const TokenSpan& token_span,
Lukas Zilka	26e8c2e	2017-04-06 15:54:24 +0200	[diff] [blame]	242	const std::vector<Token>& tokens) const;
				243
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	244	// Returns true if given codepoint is covered by the given sorted vector of
				245	// codepoint ranges.
				246	bool IsCodepointInRanges(
				247	int codepoint, const std::vector<CodepointRange>& codepoint_ranges) const;
Lukas Zilka	26e8c2e	2017-04-06 15:54:24 +0200	[diff] [blame]	248
Lukas Zilka	e5ea2ab	2017-10-11 10:50:05 +0200	[diff] [blame]	249	void PrepareIgnoredSpanBoundaryCodepoints();
				250
				251	// Counts the number of span boundary codepoints. If count_from_beginning is
				252	// True, the counting will start at the span_start iterator (inclusive) and at
				253	// maximum end at span_end (exclusive). If count_from_beginning is True, the
				254	// counting will start from span_end (exclusive) and end at span_start
				255	// (inclusive).
				256	int CountIgnoredSpanBoundaryCodepoints(
				257	const UnicodeText::const_iterator& span_start,
				258	const UnicodeText::const_iterator& span_end,
				259	bool count_from_beginning) const;
				260
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	261	// Finds the center token index in tokens vector, using the method defined
				262	// in options_.
				263	int FindCenterToken(CodepointSpan span,
				264	const std::vector<Token>& tokens) const;
				265
Lukas Zilka	40c18de	2017-04-10 17:22:22 +0200	[diff] [blame]	266	// Tokenizes the input text using ICU tokenizer.
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	267	bool ICUTokenize(const UnicodeText& context_unicode,
Lukas Zilka	40c18de	2017-04-10 17:22:22 +0200	[diff] [blame]	268	std::vector<Token>* result) const;
				269
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	270	// Takes the result of ICU tokenization and retokenizes stretches of tokens
				271	// made of a specific subset of characters using the internal tokenizer.
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	272	void InternalRetokenize(const UnicodeText& unicode_text,
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	273	std::vector<Token>* tokens) const;
				274
				275	// Tokenizes a substring of the unicode string, appending the resulting tokens
				276	// to the output vector. The resulting tokens have bounds relative to the full
				277	// string. Does nothing if the start of the span is negative.
				278	void TokenizeSubstring(const UnicodeText& unicode_text, CodepointSpan span,
				279	std::vector<Token>* result) const;
				280
Lukas Zilka	726b4d2	2017-12-13 16:37:03 +0100	[diff] [blame]	281	// Removes all tokens from tokens that are not on a line (defined by calling
				282	// SplitContext on the context) to which span points.
				283	void StripTokensFromOtherLines(const std::string& context, CodepointSpan span,
				284	std::vector<Token>* tokens) const;
				285
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	286	// Same as above but takes UnicodeText.
				287	void StripTokensFromOtherLines(const UnicodeText& context_unicode,
				288	CodepointSpan span,
				289	std::vector<Token>* tokens) const;
				290
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	291	// Extracts the features of a token and appends them to the output vector.
				292	// Uses the embedding cache to to avoid re-extracting the re-embedding the
				293	// sparse features for the same token.
				294	bool AppendTokenFeaturesWithCache(const Token& token,
				295	CodepointSpan selection_span_for_feature,
				296	const EmbeddingExecutor* embedding_executor,
				297	EmbeddingCache* embedding_cache,
				298	std::vector<float>* output_features) const;
				299
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	300	private:
				301	std::unique_ptr<UniLib> owned_unilib_;
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	302	const UniLib* unilib_;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	303
				304	protected:
Lukas Zilka	6bb39a8	2017-04-07 19:55:11 +0200	[diff] [blame]	305	const TokenFeatureExtractor feature_extractor_;
				306
Matt Sharifi	f95c3bd	2017-04-25 18:41:11 +0200	[diff] [blame]	307	// Codepoint ranges that define what codepoints are supported by the model.
				308	// NOTE: Must be sorted.
				309	std::vector<CodepointRange> supported_codepoint_ranges_;
				310
				311	// Codepoint ranges that define which tokens (consisting of which codepoints)
				312	// should be re-tokenized with the internal tokenizer in the mixed
				313	// tokenization mode.
				314	// NOTE: Must be sorted.
				315	std::vector<CodepointRange> internal_tokenizer_codepoint_ranges_;
				316
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	317	private:
Lukas Zilka	e5ea2ab	2017-10-11 10:50:05 +0200	[diff] [blame]	318	// Set of codepoints that will be stripped from beginning and end of
				319	// predicted spans.
				320	std::set<int32> ignored_span_boundary_codepoints_;
				321
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	322	const FeatureProcessorOptions* const options_;
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	323
				324	// Mapping between token selection spans and labels ids.
				325	std::map<TokenSpan, int> selection_to_label_;
				326	std::vector<TokenSpan> label_to_selection_;
				327
				328	// Mapping between collections and labels.
				329	std::map<std::string, int> collection_to_label_;
				330
				331	Tokenizer tokenizer_;
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	332	};
				333
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	334	} // namespace libtextclassifier2
Matt Sharifi	bda09f1	2017-03-10 12:29:15 +0100	[diff] [blame]	335
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	336	#endif // LIBTEXTCLASSIFIER_FEATURE_PROCESSOR_H_