Scott Anderson | b0114cb | 2012-04-09 14:08:22 -0700 | [diff] [blame] | 1 | // Copyright 2006 Google Inc. All Rights Reserved. |
| 2 | |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | // sat.h : sat stress test object interface and data structures |
| 16 | |
| 17 | #ifndef STRESSAPPTEST_SAT_H_ |
| 18 | #define STRESSAPPTEST_SAT_H_ |
| 19 | |
| 20 | #include <signal.h> |
| 21 | |
| 22 | #include <map> |
| 23 | #include <string> |
| 24 | #include <vector> |
| 25 | |
| 26 | // This file must work with autoconf on its public version, |
| 27 | // so these includes are correct. |
| 28 | #include "finelock_queue.h" |
| 29 | #include "queue.h" |
| 30 | #include "sattypes.h" |
| 31 | #include "worker.h" |
| 32 | #include "os.h" |
| 33 | |
| 34 | // SAT stress test class. |
| 35 | class Sat { |
| 36 | public: |
| 37 | // Enum for page queue implementation switch. |
| 38 | enum PageQueueType { SAT_ONELOCK, SAT_FINELOCK }; |
| 39 | |
| 40 | Sat(); |
| 41 | virtual ~Sat(); |
| 42 | |
| 43 | // Read configuration from arguments. Called first. |
| 44 | bool ParseArgs(int argc, char **argv); |
| 45 | virtual bool CheckGoogleSpecificArgs(int argc, char **argv, int *i); |
| 46 | // Initialize data structures, subclasses, and resources, |
| 47 | // based on command line args. |
| 48 | // Called after ParseArgs(). |
| 49 | bool Initialize(); |
| 50 | |
| 51 | // Execute the test. Initialize() and ParseArgs() must be called first. |
| 52 | // This must be called from a single-threaded program. |
| 53 | bool Run(); |
| 54 | |
| 55 | // Pretty print result summary. |
| 56 | // Called after Run(). |
| 57 | // Return value is success or failure of the SAT run, *not* of this function! |
| 58 | bool PrintResults(); |
| 59 | |
| 60 | // Pretty print version info. |
| 61 | bool PrintVersion(); |
| 62 | |
| 63 | // Pretty print help. |
| 64 | virtual void PrintHelp(); |
| 65 | |
| 66 | // Clean up allocations and resources. |
| 67 | // Called last. |
| 68 | bool Cleanup(); |
| 69 | |
| 70 | // Abort Run(). Only for use by Run()-installed signal handlers. |
| 71 | void Break() { user_break_ = true; } |
| 72 | |
| 73 | // Fetch and return empty and full pages into the empty and full pools. |
| 74 | bool GetValid(struct page_entry *pe); |
| 75 | bool PutValid(struct page_entry *pe); |
| 76 | bool GetEmpty(struct page_entry *pe); |
| 77 | bool PutEmpty(struct page_entry *pe); |
| 78 | |
| 79 | bool GetValid(struct page_entry *pe, int32 tag); |
| 80 | bool GetEmpty(struct page_entry *pe, int32 tag); |
| 81 | |
| 82 | // Accessor functions. |
| 83 | int verbosity() const { return verbosity_; } |
| 84 | int logfile() const { return logfile_; } |
| 85 | int page_length() const { return page_length_; } |
| 86 | int disk_pages() const { return disk_pages_; } |
| 87 | int strict() const { return strict_; } |
| 88 | int tag_mode() const { return tag_mode_; } |
| 89 | int status() const { return statuscount_; } |
| 90 | void bad_status() { statuscount_++; } |
| 91 | int errors() const { return errorcount_; } |
| 92 | int warm() const { return warm_; } |
| 93 | bool stop_on_error() const { return stop_on_error_; } |
| 94 | int32 region_mask() const { return region_mask_; } |
| 95 | // Semi-accessor to find the "nth" region to avoid replicated bit searching.. |
| 96 | int32 region_find(int32 num) const { |
| 97 | for (int i = 0; i < 32; i++) { |
| 98 | if ((1 << i) & region_mask_) { |
| 99 | if (num == 0) |
| 100 | return i; |
| 101 | num--; |
| 102 | } |
| 103 | } |
| 104 | return 0; |
| 105 | } |
| 106 | |
| 107 | // Causes false errors for unittesting. |
| 108 | // Setting to "true" causes errors to be injected. |
| 109 | void set_error_injection(bool errors) { error_injection_ = errors; } |
| 110 | bool error_injection() const { return error_injection_; } |
| 111 | |
| 112 | protected: |
| 113 | // Opens log file for writing. Returns 0 on failure. |
| 114 | bool InitializeLogfile(); |
| 115 | // Checks for supported environment. Returns 0 on failure. |
| 116 | bool CheckEnvironment(); |
| 117 | // Allocates size_ bytes of test memory. |
| 118 | bool AllocateMemory(); |
| 119 | // Initializes datapattern reference structures. |
| 120 | bool InitializePatterns(); |
| 121 | // Initializes test memory with datapatterns. |
| 122 | bool InitializePages(); |
| 123 | |
| 124 | // Start up worker threads. |
| 125 | virtual void InitializeThreads(); |
| 126 | // Spawn worker threads. |
| 127 | void SpawnThreads(); |
| 128 | // Reap worker threads. |
| 129 | void JoinThreads(); |
| 130 | // Run bandwidth and error analysis. |
| 131 | virtual void RunAnalysis(); |
| 132 | // Delete worker threads. |
| 133 | void DeleteThreads(); |
| 134 | |
| 135 | // Return the number of cpus in the system. |
| 136 | int CpuCount(); |
| 137 | |
| 138 | // Collect error counts from threads. |
| 139 | int64 GetTotalErrorCount(); |
| 140 | |
| 141 | // Command line arguments. |
| 142 | string cmdline_; |
| 143 | |
| 144 | // Memory and test configuration. |
| 145 | int runtime_seconds_; // Seconds to run. |
| 146 | int page_length_; // Length of each memory block. |
| 147 | int64 pages_; // Number of memory blocks. |
| 148 | int64 size_; // Size of memory tested, in bytes. |
| 149 | int64 size_mb_; // Size of memory tested, in MB. |
| 150 | int64 min_hugepages_mbytes_; // Minimum hugepages size. |
| 151 | int64 freepages_; // How many invalid pages we need. |
| 152 | int disk_pages_; // Number of pages per temp file. |
| 153 | uint64 paddr_base_; // Physical address base. |
| 154 | |
| 155 | // Control flags. |
| 156 | volatile sig_atomic_t user_break_; // User has signalled early exit. Used as |
| 157 | // a boolean. |
| 158 | int verbosity_; // How much to print. |
| 159 | int strict_; // Check results per transaction. |
| 160 | int warm_; // FPU warms CPU while coying. |
| 161 | int address_mode_; // 32 or 64 bit binary. |
| 162 | bool stop_on_error_; // Exit immendiately on any error. |
| 163 | bool findfiles_; // Autodetect tempfile locations. |
| 164 | |
| 165 | bool error_injection_; // Simulate errors, for unittests. |
| 166 | bool crazy_error_injection_; // Simulate lots of errors. |
| 167 | uint64 max_errorcount_; // Number of errors before forced exit. |
| 168 | int run_on_anything_; // Ignore unknown machine ereor. |
| 169 | int use_logfile_; // Log to a file. |
| 170 | char logfilename_[255]; // Name of file to log to. |
| 171 | int logfile_; // File handle to log to. |
| 172 | |
| 173 | // Disk thread options. |
| 174 | int read_block_size_; // Size of block to read from disk. |
| 175 | int write_block_size_; // Size of block to write to disk. |
| 176 | int64 segment_size_; // Size of segment to split disk into. |
| 177 | int cache_size_; // Size of disk cache. |
| 178 | int blocks_per_segment_; // Number of blocks to test per segment. |
| 179 | int read_threshold_; // Maximum time (in us) a read should take |
| 180 | // before warning of a slow read. |
| 181 | int write_threshold_; // Maximum time (in us) a write should |
| 182 | // take before warning of a slow write. |
| 183 | int non_destructive_; // Whether to use non-destructive mode for |
| 184 | // the disk test. |
| 185 | |
| 186 | // Generic Options. |
| 187 | int monitor_mode_; // Switch for monitor-only mode SAT. |
| 188 | // This switch trumps most of the other |
| 189 | // argument, as SAT will only run error |
| 190 | // polling threads. |
| 191 | int tag_mode_; // Do tagging of memory and strict |
| 192 | // checking for misplaced cachelines. |
| 193 | |
| 194 | bool do_page_map_; // Should we print a list of used pages? |
| 195 | unsigned char *page_bitmap_; // Store bitmap of physical pages seen. |
| 196 | uint64 page_bitmap_size_; // Length of physical memory represented. |
| 197 | |
| 198 | // Cpu Cache Coherency Options. |
| 199 | bool cc_test_; // Flag to decide whether to start the |
| 200 | // cache coherency threads. |
| 201 | int cc_cacheline_count_; // Number of cache line size structures. |
| 202 | int cc_inc_count_; // Number of times to increment the shared |
| 203 | // cache lines structure members. |
| 204 | |
| 205 | // Thread control. |
| 206 | int file_threads_; // Threads of file IO. |
| 207 | int net_threads_; // Threads of network IO. |
| 208 | int listen_threads_; // Threads for network IO to connect. |
| 209 | int memory_threads_; // Threads of memcpy. |
| 210 | int invert_threads_; // Threads of invert. |
| 211 | int fill_threads_; // Threads of memset. |
| 212 | int check_threads_; // Threads of strcmp. |
| 213 | int cpu_stress_threads_; // Threads of CPU stress workload. |
| 214 | int disk_threads_; // Threads of disk test. |
| 215 | int random_threads_; // Number of random disk threads. |
| 216 | int total_threads_; // Total threads used. |
| 217 | bool error_poll_; // Poll for system errors. |
| 218 | |
| 219 | // Resources. |
| 220 | cc_cacheline_data *cc_cacheline_data_; // The cache line sized datastructure |
| 221 | // used by the ccache threads |
| 222 | // (in worker.h). |
| 223 | vector<string> filename_; // Filenames for file IO. |
| 224 | vector<string> ipaddrs_; // Addresses for network IO. |
| 225 | vector<string> diskfilename_; // Filename for disk IO device. |
| 226 | // Block table for IO device. |
| 227 | vector<DiskBlockTable*> blocktables_; |
| 228 | |
| 229 | int32 region_mask_; // Bitmask of available NUMA regions. |
| 230 | int32 region_count_; // Count of available NUMA regions. |
| 231 | int32 region_[32]; // Pagecount per region. |
| 232 | int region_mode_; // What to do with NUMA hints? |
| 233 | static const int kLocalNuma = 1; // Target local memory. |
| 234 | static const int kRemoteNuma = 2; // Target remote memory. |
| 235 | |
| 236 | // Results. |
| 237 | int64 errorcount_; // Total hardware incidents seen. |
| 238 | int statuscount_; // Total test errors seen. |
| 239 | |
| 240 | // Thread type constants and types |
| 241 | enum ThreadType { |
| 242 | kMemoryType = 0, |
| 243 | kFileIOType = 1, |
| 244 | kNetIOType = 2, |
| 245 | kNetSlaveType = 3, |
| 246 | kCheckType = 4, |
| 247 | kInvertType = 5, |
| 248 | kDiskType = 6, |
| 249 | kRandomDiskType = 7, |
| 250 | kCPUType = 8, |
| 251 | kErrorType = 9, |
| 252 | kCCType = 10 |
| 253 | }; |
| 254 | |
| 255 | // Helper functions. |
| 256 | virtual void AcquireWorkerLock(); |
| 257 | virtual void ReleaseWorkerLock(); |
| 258 | pthread_mutex_t worker_lock_; // Lock access to the worker thread structure. |
| 259 | typedef vector<WorkerThread*> WorkerVector; |
| 260 | typedef map<int, WorkerVector*> WorkerMap; |
| 261 | // Contains all worker threads. |
| 262 | WorkerMap workers_map_; |
| 263 | // Delay between power spikes. |
| 264 | time_t pause_delay_; |
| 265 | // The duration of each pause (for power spikes). |
| 266 | time_t pause_duration_; |
| 267 | // For the workers we pause and resume to create power spikes. |
| 268 | WorkerStatus power_spike_status_; |
| 269 | // For the workers we never pause. |
| 270 | WorkerStatus continuous_status_; |
| 271 | |
| 272 | class OsLayer *os_; // Os abstraction: put hacks here. |
| 273 | class PatternList *patternlist_; // Access to global data patterns. |
| 274 | |
| 275 | // RunAnalysis methods |
| 276 | void AnalysisAllStats(); // Summary of all runs. |
| 277 | void MemoryStats(); |
| 278 | void FileStats(); |
| 279 | void NetStats(); |
| 280 | void CheckStats(); |
| 281 | void InvertStats(); |
| 282 | void DiskStats(); |
| 283 | |
| 284 | void QueueStats(); |
| 285 | |
| 286 | // Physical page use reporting. |
| 287 | void AddrMapInit(); |
| 288 | void AddrMapUpdate(struct page_entry *pe); |
| 289 | void AddrMapPrint(); |
| 290 | |
| 291 | // additional memory data from google-specific tests. |
| 292 | virtual void GoogleMemoryStats(float *memcopy_data, |
| 293 | float *memcopy_bandwidth); |
| 294 | |
| 295 | virtual void GoogleOsOptions(std::map<std::string, std::string> *options); |
| 296 | |
| 297 | // Page queues, only one of (valid_+empty_) or (finelock_q_) will be used |
| 298 | // at a time. A commandline switch controls which queue implementation will |
| 299 | // be used. |
| 300 | class PageEntryQueue *valid_; // Page queue structure, valid pages. |
| 301 | class PageEntryQueue *empty_; // Page queue structure, free pages. |
| 302 | class FineLockPEQueue *finelock_q_; // Page queue with fine-grain locks |
| 303 | Sat::PageQueueType pe_q_implementation_; // Queue implementation switch |
| 304 | |
| 305 | DISALLOW_COPY_AND_ASSIGN(Sat); |
| 306 | }; |
| 307 | |
| 308 | Sat *SatFactory(); |
| 309 | |
| 310 | #endif // STRESSAPPTEST_SAT_H_ |