blob: daf4fd0877d69e73b0ed6e027b1462eb98f2f63f [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ICING_SCHEMA_SECTION_H_
#define ICING_SCHEMA_SECTION_H_
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
namespace icing {
namespace lib {
using SectionId = int8_t;
// 4 bits for 16 values. NOTE: Increasing this value means that SectionIdMask
// must increase from an int16_t to an int32_t
inline constexpr int kSectionIdBits = 4;
inline constexpr SectionId kInvalidSectionId = (1 << kSectionIdBits);
inline constexpr SectionId kMaxSectionId = kInvalidSectionId - 1;
inline constexpr SectionId kMinSectionId = 0;
constexpr bool IsSectionIdValid(SectionId section_id) {
return section_id >= kMinSectionId && section_id <= kMaxSectionId;
}
using SectionIdMask = int16_t;
inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
static_assert(
kMaxSectionId < 8 * sizeof(SectionIdMask),
"SectionIdMask is not large enough to represent all section values!");
// TODO(samzheng): add more metadata when needed, e.g. tokenizer type,
struct SectionMetadata {
// Dot-joined property names, representing the location of section inside an
// document. E.g. "property1.property2"
std::string path;
// A unique id of property within a type config
SectionId id;
// How content in this section should be tokenized. It is invalid for a
// section to have tokenizer == 'NONE'.
IndexingConfig::TokenizerType::Code tokenizer;
// How tokens in this section should be matched.
//
// TermMatchType::UNKNOWN:
// Terms will not match anything
//
// TermMatchType::PREFIX:
// Terms will be stored as a prefix match, "fool" matches "foo" and "fool"
//
// TermMatchType::EXACT_ONLY:
// Terms will be only stored as an exact match, "fool" only matches "fool"
TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
SectionMetadata(SectionId id_in, TermMatchType::Code term_match_type_in,
IndexingConfig::TokenizerType::Code tokenizer,
std::string&& path_in)
: path(std::move(path_in)),
id(id_in),
tokenizer(tokenizer),
term_match_type(term_match_type_in) {}
};
// Section is an icing internal concept similar to document property but with
// extra metadata. The content can be a value or the combination of repeated
// values of a property.
struct Section {
SectionMetadata metadata;
std::vector<std::string> content;
Section(SectionMetadata&& metadata_in, std::vector<std::string>&& content_in)
: metadata(std::move(metadata_in)), content(std::move(content_in)) {}
};
} // namespace lib
} // namespace icing
#endif // ICING_SCHEMA_SECTION_H_