blob: c811020e0fe820c998160f2252053614878c3d51 [file] [log] [blame]
Clement Courbet96715412018-05-07 09:09:48 +00001//===-- Clustering.h --------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// Utilities to compute benchmark result clusters.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
17
18#include "BenchmarkResult.h"
19#include "llvm/Support/Error.h"
20#include <vector>
21
22namespace exegesis {
23
24class InstructionBenchmarkClustering {
25public:
26 // Clusters `Points` using DBSCAN with the given parameters. See the cc file
27 // for more explanations on the algorithm.
28 static llvm::Expected<InstructionBenchmarkClustering>
29 create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
30 double Epsilon);
31
32 class ClusterId {
33 public:
34 static ClusterId noise() { return ClusterId(kNoise); }
35 static ClusterId error() { return ClusterId(kError); }
Clement Courbet72287212018-06-04 11:11:55 +000036 static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
Clement Courbet96715412018-05-07 09:09:48 +000037 ClusterId() : Id_(kUndef) {}
38 bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
Clement Courbet72287212018-06-04 11:11:55 +000039 bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
Clement Courbet96715412018-05-07 09:09:48 +000040
Clement Courbet17d3c252018-05-22 13:31:29 +000041 bool isValid() const { return Id_ <= kMaxValid; }
Clement Courbet96715412018-05-07 09:09:48 +000042 bool isUndef() const { return Id_ == kUndef; }
43 bool isNoise() const { return Id_ == kNoise; }
44 bool isError() const { return Id_ == kError; }
45
46 // Precondition: isValid().
47 size_t getId() const {
48 assert(isValid());
Clement Courbet17d3c252018-05-22 13:31:29 +000049 return Id_;
Clement Courbet96715412018-05-07 09:09:48 +000050 }
51
52 private:
Clement Courbet17d3c252018-05-22 13:31:29 +000053 explicit ClusterId(size_t Id) : Id_(Id) {}
Clement Courbet72287212018-06-04 11:11:55 +000054 static constexpr const size_t kMaxValid =
55 std::numeric_limits<size_t>::max() - 4;
Clement Courbet17d3c252018-05-22 13:31:29 +000056 static constexpr const size_t kNoise = kMaxValid + 1;
57 static constexpr const size_t kError = kMaxValid + 2;
58 static constexpr const size_t kUndef = kMaxValid + 3;
59 size_t Id_;
Clement Courbet96715412018-05-07 09:09:48 +000060 };
61
62 struct Cluster {
63 Cluster() = delete;
64 explicit Cluster(const ClusterId &Id) : Id(Id) {}
65
66 const ClusterId Id;
67 // Indices of benchmarks within the cluster.
68 std::vector<int> PointIndices;
69 };
70
71 ClusterId getClusterIdForPoint(size_t P) const {
72 return ClusterIdForPoint_[P];
73 }
74
Clement Courbet37f0ca02018-05-15 12:08:00 +000075 const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
76
Clement Courbet96715412018-05-07 09:09:48 +000077 const Cluster &getCluster(ClusterId Id) const {
78 assert(!Id.isUndef() && "unlabeled cluster");
79 if (Id.isNoise()) {
80 return NoiseCluster_;
81 }
82 if (Id.isError()) {
83 return ErrorCluster_;
84 }
85 return Clusters_[Id.getId()];
86 }
87
88 const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
89
Clement Courbet72287212018-06-04 11:11:55 +000090 // Returns true if the given point is within a distance Epsilon of each other.
91 bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
92 const std::vector<BenchmarkMeasure> &Q) const;
93
Clement Courbet96715412018-05-07 09:09:48 +000094private:
Clement Courbet37f0ca02018-05-15 12:08:00 +000095 InstructionBenchmarkClustering(
Clement Courbet72287212018-06-04 11:11:55 +000096 const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
Clement Courbet37f0ca02018-05-15 12:08:00 +000097 llvm::Error validateAndSetup();
Clement Courbet72287212018-06-04 11:11:55 +000098 void dbScan(size_t MinPts);
99 std::vector<size_t> rangeQuery(size_t Q) const;
100
Clement Courbet37f0ca02018-05-15 12:08:00 +0000101 const std::vector<InstructionBenchmark> &Points_;
Clement Courbet72287212018-06-04 11:11:55 +0000102 const double EpsilonSquared_;
Clement Courbet96715412018-05-07 09:09:48 +0000103 int NumDimensions_ = 0;
104 // ClusterForPoint_[P] is the cluster id for Points[P].
105 std::vector<ClusterId> ClusterIdForPoint_;
106 std::vector<Cluster> Clusters_;
107 Cluster NoiseCluster_;
108 Cluster ErrorCluster_;
109};
110
111} // namespace exegesis
112
113#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H