blob: 8dbe043689413ce659a8e151b88e23ff0ae65dab [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_
#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_
#include <cstdint>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/lite-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/schema/section.h"
namespace icing {
namespace lib {
class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
public:
explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
SectionIdMask section_restrict_mask)
: term_(term),
lite_index_(lite_index),
cached_hits_idx_(-1),
term_id_codec_(term_id_codec),
num_advance_calls_(0),
section_restrict_mask_(section_restrict_mask) {}
libtextclassifier3::Status Advance() override;
int32_t GetNumBlocksInspected() const override {
// TODO(b/137862424): Implement this once the main index is added.
return 0;
}
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
SectionIdMask section_mask =
doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
while (section_mask_copy) {
SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
section_mask_copy &= ~(1u << section_id);
}
TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
if (cur_term_stats.term == term_stats.term) {
// Same docId and same term, we don't need to add the term and the term
// frequency should always be the same
return;
}
}
matched_terms_stats->push_back(std::move(term_stats));
}
protected:
// Add DocHitInfos corresponding to term_ to cached_hits_.
virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
const std::string term_;
LiteIndex* const lite_index_;
// Stores hits retrieved from the index. This may only be a subset of the hits
// that are present in the index. Current value pointed to by the Iterator is
// tracked by cached_hits_idx_.
std::vector<DocHitInfo> cached_hits_;
int cached_hits_idx_;
const TermIdCodec* term_id_codec_;
int num_advance_calls_;
// Mask indicating which sections hits should be considered for.
// Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired.
const SectionIdMask section_restrict_mask_;
};
class DocHitInfoIteratorTermLiteExact : public DocHitInfoIteratorTermLite {
public:
explicit DocHitInfoIteratorTermLiteExact(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
SectionIdMask section_id_mask)
: DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
section_id_mask) {}
std::string ToString() const override;
protected:
libtextclassifier3::Status RetrieveMoreHits() override;
};
class DocHitInfoIteratorTermLitePrefix : public DocHitInfoIteratorTermLite {
public:
explicit DocHitInfoIteratorTermLitePrefix(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
const std::string& term,
SectionIdMask section_id_mask)
: DocHitInfoIteratorTermLite(term_id_codec, lite_index, term,
section_id_mask) {}
std::string ToString() const override;
protected:
libtextclassifier3::Status RetrieveMoreHits() override;
private:
// After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
// "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
// merged.
void SortAndDedupeDocumentIds();
};
} // namespace lib
} // namespace icing
#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_LITE_H_