blob: ad4cab395c3abc788ad3cea6806d9c1fa4989fc8 [file] [log] [blame]
Clement Courbet96715412018-05-07 09:09:48 +00001//===-- Clustering.h --------------------------------------------*- C++ -*-===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Clement Courbet96715412018-05-07 09:09:48 +00006//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Utilities to compute benchmark result clusters.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
15#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16
17#include "BenchmarkResult.h"
Roman Lebedev69716392019-02-20 09:14:04 +000018#include "llvm/ADT/Optional.h"
Clement Courbet96715412018-05-07 09:09:48 +000019#include "llvm/Support/Error.h"
Roman Lebedev69716392019-02-20 09:14:04 +000020#include <limits>
Clement Courbet96715412018-05-07 09:09:48 +000021#include <vector>
22
Fangrui Song32401af2018-10-22 17:10:47 +000023namespace llvm {
Clement Courbet96715412018-05-07 09:09:48 +000024namespace exegesis {
25
26class InstructionBenchmarkClustering {
27public:
28 // Clusters `Points` using DBSCAN with the given parameters. See the cc file
29 // for more explanations on the algorithm.
30 static llvm::Expected<InstructionBenchmarkClustering>
31 create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
Roman Lebedev542e5d72019-02-25 09:36:12 +000032 double AnalysisClusteringEpsilon,
33 llvm::Optional<unsigned> NumOpcodes = llvm::None);
Clement Courbet96715412018-05-07 09:09:48 +000034
35 class ClusterId {
36 public:
37 static ClusterId noise() { return ClusterId(kNoise); }
38 static ClusterId error() { return ClusterId(kError); }
Clement Courbet72287212018-06-04 11:11:55 +000039 static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
Roman Lebedev69716392019-02-20 09:14:04 +000040 static ClusterId makeValidUnstable(size_t Id) {
41 return ClusterId(Id, /*IsUnstable=*/true);
42 }
43
44 ClusterId() : Id_(kUndef), IsUnstable_(false) {}
45
46 // Compare id's, ignoring the 'unstability' bit.
Clement Courbet96715412018-05-07 09:09:48 +000047 bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
Clement Courbet72287212018-06-04 11:11:55 +000048 bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
Clement Courbet96715412018-05-07 09:09:48 +000049
Clement Courbet17d3c252018-05-22 13:31:29 +000050 bool isValid() const { return Id_ <= kMaxValid; }
Roman Lebedev69716392019-02-20 09:14:04 +000051 bool isUnstable() const { return IsUnstable_; }
Clement Courbet96715412018-05-07 09:09:48 +000052 bool isNoise() const { return Id_ == kNoise; }
53 bool isError() const { return Id_ == kError; }
Roman Lebedev69716392019-02-20 09:14:04 +000054 bool isUndef() const { return Id_ == kUndef; }
Clement Courbet96715412018-05-07 09:09:48 +000055
56 // Precondition: isValid().
57 size_t getId() const {
58 assert(isValid());
Clement Courbet17d3c252018-05-22 13:31:29 +000059 return Id_;
Clement Courbet96715412018-05-07 09:09:48 +000060 }
61
62 private:
Roman Lebedev69716392019-02-20 09:14:04 +000063 ClusterId(size_t Id, bool IsUnstable = false)
64 : Id_(Id), IsUnstable_(IsUnstable) {}
65
Clement Courbet72287212018-06-04 11:11:55 +000066 static constexpr const size_t kMaxValid =
Roman Lebedev69716392019-02-20 09:14:04 +000067 (std::numeric_limits<size_t>::max() >> 1) - 4;
Clement Courbet17d3c252018-05-22 13:31:29 +000068 static constexpr const size_t kNoise = kMaxValid + 1;
69 static constexpr const size_t kError = kMaxValid + 2;
70 static constexpr const size_t kUndef = kMaxValid + 3;
Roman Lebedev69716392019-02-20 09:14:04 +000071
72 size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
73 size_t IsUnstable_ : 1;
Clement Courbet96715412018-05-07 09:09:48 +000074 };
Roman Lebedev69716392019-02-20 09:14:04 +000075 static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
Clement Courbet96715412018-05-07 09:09:48 +000076
77 struct Cluster {
78 Cluster() = delete;
79 explicit Cluster(const ClusterId &Id) : Id(Id) {}
80
81 const ClusterId Id;
82 // Indices of benchmarks within the cluster.
83 std::vector<int> PointIndices;
84 };
85
86 ClusterId getClusterIdForPoint(size_t P) const {
87 return ClusterIdForPoint_[P];
88 }
89
Clement Courbet37f0ca02018-05-15 12:08:00 +000090 const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
91
Clement Courbet96715412018-05-07 09:09:48 +000092 const Cluster &getCluster(ClusterId Id) const {
93 assert(!Id.isUndef() && "unlabeled cluster");
94 if (Id.isNoise()) {
95 return NoiseCluster_;
96 }
97 if (Id.isError()) {
98 return ErrorCluster_;
99 }
100 return Clusters_[Id.getId()];
101 }
102
103 const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
104
Clement Courbet72287212018-06-04 11:11:55 +0000105 // Returns true if the given point is within a distance Epsilon of each other.
106 bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
Roman Lebedev542e5d72019-02-25 09:36:12 +0000107 const std::vector<BenchmarkMeasure> &Q,
108 const double EpsilonSquared_) const {
Roman Lebedev8e315b62018-11-19 13:28:36 +0000109 double DistanceSquared = 0.0;
110 for (size_t I = 0, E = P.size(); I < E; ++I) {
111 const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
112 DistanceSquared += Diff * Diff;
113 }
114 return DistanceSquared <= EpsilonSquared_;
115 }
Clement Courbet72287212018-06-04 11:11:55 +0000116
Clement Courbet96715412018-05-07 09:09:48 +0000117private:
Clement Courbet37f0ca02018-05-15 12:08:00 +0000118 InstructionBenchmarkClustering(
Roman Lebedev542e5d72019-02-25 09:36:12 +0000119 const std::vector<InstructionBenchmark> &Points,
120 double AnalysisClusteringEpsilonSquared);
Roman Lebedev69716392019-02-20 09:14:04 +0000121
Clement Courbet37f0ca02018-05-15 12:08:00 +0000122 llvm::Error validateAndSetup();
Clement Courbet72287212018-06-04 11:11:55 +0000123 void dbScan(size_t MinPts);
Roman Lebedev69716392019-02-20 09:14:04 +0000124 void stabilize(unsigned NumOpcodes);
Roman Lebedev71fdb572018-11-19 13:28:41 +0000125 void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
Clement Courbet72287212018-06-04 11:11:55 +0000126
Clement Courbet37f0ca02018-05-15 12:08:00 +0000127 const std::vector<InstructionBenchmark> &Points_;
Roman Lebedev542e5d72019-02-25 09:36:12 +0000128 const double AnalysisClusteringEpsilonSquared_;
Clement Courbet96715412018-05-07 09:09:48 +0000129 int NumDimensions_ = 0;
130 // ClusterForPoint_[P] is the cluster id for Points[P].
131 std::vector<ClusterId> ClusterIdForPoint_;
132 std::vector<Cluster> Clusters_;
133 Cluster NoiseCluster_;
134 Cluster ErrorCluster_;
135};
136
137} // namespace exegesis
Fangrui Song32401af2018-10-22 17:10:47 +0000138} // namespace llvm
Clement Courbet96715412018-05-07 09:09:48 +0000139
140#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H