blob: 297651c85e9b5332abd885669d44104a6d0fe0a6 [file] [log] [blame]
Clement Courbet96715412018-05-07 09:09:48 +00001//===-- Clustering.h --------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// Utilities to compute benchmark result clusters.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
17
18#include "BenchmarkResult.h"
19#include "llvm/Support/Error.h"
20#include <vector>
21
Fangrui Song32401af2018-10-22 17:10:47 +000022namespace llvm {
Clement Courbet96715412018-05-07 09:09:48 +000023namespace exegesis {
24
25class InstructionBenchmarkClustering {
26public:
27 // Clusters `Points` using DBSCAN with the given parameters. See the cc file
28 // for more explanations on the algorithm.
29 static llvm::Expected<InstructionBenchmarkClustering>
30 create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
31 double Epsilon);
32
33 class ClusterId {
34 public:
35 static ClusterId noise() { return ClusterId(kNoise); }
36 static ClusterId error() { return ClusterId(kError); }
Clement Courbet72287212018-06-04 11:11:55 +000037 static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
Clement Courbet96715412018-05-07 09:09:48 +000038 ClusterId() : Id_(kUndef) {}
39 bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
Clement Courbet72287212018-06-04 11:11:55 +000040 bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
Clement Courbet96715412018-05-07 09:09:48 +000041
Clement Courbet17d3c252018-05-22 13:31:29 +000042 bool isValid() const { return Id_ <= kMaxValid; }
Clement Courbet96715412018-05-07 09:09:48 +000043 bool isUndef() const { return Id_ == kUndef; }
44 bool isNoise() const { return Id_ == kNoise; }
45 bool isError() const { return Id_ == kError; }
46
47 // Precondition: isValid().
48 size_t getId() const {
49 assert(isValid());
Clement Courbet17d3c252018-05-22 13:31:29 +000050 return Id_;
Clement Courbet96715412018-05-07 09:09:48 +000051 }
52
53 private:
Clement Courbet17d3c252018-05-22 13:31:29 +000054 explicit ClusterId(size_t Id) : Id_(Id) {}
Clement Courbet72287212018-06-04 11:11:55 +000055 static constexpr const size_t kMaxValid =
56 std::numeric_limits<size_t>::max() - 4;
Clement Courbet17d3c252018-05-22 13:31:29 +000057 static constexpr const size_t kNoise = kMaxValid + 1;
58 static constexpr const size_t kError = kMaxValid + 2;
59 static constexpr const size_t kUndef = kMaxValid + 3;
60 size_t Id_;
Clement Courbet96715412018-05-07 09:09:48 +000061 };
62
63 struct Cluster {
64 Cluster() = delete;
65 explicit Cluster(const ClusterId &Id) : Id(Id) {}
66
67 const ClusterId Id;
68 // Indices of benchmarks within the cluster.
69 std::vector<int> PointIndices;
70 };
71
72 ClusterId getClusterIdForPoint(size_t P) const {
73 return ClusterIdForPoint_[P];
74 }
75
Clement Courbet37f0ca02018-05-15 12:08:00 +000076 const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
77
Clement Courbet96715412018-05-07 09:09:48 +000078 const Cluster &getCluster(ClusterId Id) const {
79 assert(!Id.isUndef() && "unlabeled cluster");
80 if (Id.isNoise()) {
81 return NoiseCluster_;
82 }
83 if (Id.isError()) {
84 return ErrorCluster_;
85 }
86 return Clusters_[Id.getId()];
87 }
88
89 const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
90
Clement Courbet72287212018-06-04 11:11:55 +000091 // Returns true if the given point is within a distance Epsilon of each other.
92 bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
Roman Lebedev8e315b62018-11-19 13:28:36 +000093 const std::vector<BenchmarkMeasure> &Q) const {
94 double DistanceSquared = 0.0;
95 for (size_t I = 0, E = P.size(); I < E; ++I) {
96 const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
97 DistanceSquared += Diff * Diff;
98 }
99 return DistanceSquared <= EpsilonSquared_;
100 }
Clement Courbet72287212018-06-04 11:11:55 +0000101
Clement Courbet96715412018-05-07 09:09:48 +0000102private:
Clement Courbet37f0ca02018-05-15 12:08:00 +0000103 InstructionBenchmarkClustering(
Clement Courbet72287212018-06-04 11:11:55 +0000104 const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
Clement Courbet37f0ca02018-05-15 12:08:00 +0000105 llvm::Error validateAndSetup();
Clement Courbet72287212018-06-04 11:11:55 +0000106 void dbScan(size_t MinPts);
Roman Lebedev666d8552018-11-19 13:28:31 +0000107 void rangeQuery(size_t Q, llvm::SmallVectorImpl<size_t> &Scratchpad) const;
Clement Courbet72287212018-06-04 11:11:55 +0000108
Clement Courbet37f0ca02018-05-15 12:08:00 +0000109 const std::vector<InstructionBenchmark> &Points_;
Clement Courbet72287212018-06-04 11:11:55 +0000110 const double EpsilonSquared_;
Clement Courbet96715412018-05-07 09:09:48 +0000111 int NumDimensions_ = 0;
112 // ClusterForPoint_[P] is the cluster id for Points[P].
113 std::vector<ClusterId> ClusterIdForPoint_;
114 std::vector<Cluster> Clusters_;
115 Cluster NoiseCluster_;
116 Cluster ErrorCluster_;
117};
118
119} // namespace exegesis
Fangrui Song32401af2018-10-22 17:10:47 +0000120} // namespace llvm
Clement Courbet96715412018-05-07 09:09:48 +0000121
122#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H