Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 1 | # export-to-postgresql.py: export perf data to a postgresql database |
| 2 | # Copyright (c) 2014, Intel Corporation. |
| 3 | # |
| 4 | # This program is free software; you can redistribute it and/or modify it |
| 5 | # under the terms and conditions of the GNU General Public License, |
| 6 | # version 2, as published by the Free Software Foundation. |
| 7 | # |
| 8 | # This program is distributed in the hope it will be useful, but WITHOUT |
| 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 10 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 11 | # more details. |
| 12 | |
| 13 | import os |
| 14 | import sys |
| 15 | import struct |
| 16 | import datetime |
| 17 | |
| 18 | from PySide.QtSql import * |
| 19 | |
| 20 | # Need to access PostgreSQL C library directly to use COPY FROM STDIN |
| 21 | from ctypes import * |
| 22 | libpq = CDLL("libpq.so.5") |
| 23 | PQconnectdb = libpq.PQconnectdb |
| 24 | PQconnectdb.restype = c_void_p |
| 25 | PQfinish = libpq.PQfinish |
| 26 | PQstatus = libpq.PQstatus |
| 27 | PQexec = libpq.PQexec |
| 28 | PQexec.restype = c_void_p |
| 29 | PQresultStatus = libpq.PQresultStatus |
| 30 | PQputCopyData = libpq.PQputCopyData |
| 31 | PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ] |
| 32 | PQputCopyEnd = libpq.PQputCopyEnd |
| 33 | PQputCopyEnd.argtypes = [ c_void_p, c_void_p ] |
| 34 | |
| 35 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
| 36 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') |
| 37 | |
| 38 | # These perf imports are not used at present |
| 39 | #from perf_trace_context import * |
| 40 | #from Core import * |
| 41 | |
| 42 | perf_db_export_mode = True |
| 43 | |
| 44 | def usage(): |
| 45 | print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>]" |
| 46 | print >> sys.stderr, "where: columns 'all' or 'branches'" |
| 47 | raise Exception("Too few arguments") |
| 48 | |
| 49 | if (len(sys.argv) < 2): |
| 50 | usage() |
| 51 | |
| 52 | dbname = sys.argv[1] |
| 53 | |
| 54 | if (len(sys.argv) >= 3): |
| 55 | columns = sys.argv[2] |
| 56 | else: |
| 57 | columns = "all" |
| 58 | |
| 59 | if columns not in ("all", "branches"): |
| 60 | usage() |
| 61 | |
| 62 | branches = (columns == "branches") |
| 63 | |
| 64 | output_dir_name = os.getcwd() + "/" + dbname + "-perf-data" |
| 65 | os.mkdir(output_dir_name) |
| 66 | |
| 67 | def do_query(q, s): |
| 68 | if (q.exec_(s)): |
| 69 | return |
| 70 | raise Exception("Query failed: " + q.lastError().text()) |
| 71 | |
| 72 | print datetime.datetime.today(), "Creating database..." |
| 73 | |
| 74 | db = QSqlDatabase.addDatabase('QPSQL') |
| 75 | query = QSqlQuery(db) |
| 76 | db.setDatabaseName('postgres') |
| 77 | db.open() |
| 78 | try: |
| 79 | do_query(query, 'CREATE DATABASE ' + dbname) |
| 80 | except: |
| 81 | os.rmdir(output_dir_name) |
| 82 | raise |
| 83 | query.finish() |
| 84 | query.clear() |
| 85 | db.close() |
| 86 | |
| 87 | db.setDatabaseName(dbname) |
| 88 | db.open() |
| 89 | |
| 90 | query = QSqlQuery(db) |
| 91 | do_query(query, 'SET client_min_messages TO WARNING') |
| 92 | |
| 93 | do_query(query, 'CREATE TABLE selected_events (' |
| 94 | 'id bigint NOT NULL,' |
| 95 | 'name varchar(80))') |
| 96 | do_query(query, 'CREATE TABLE machines (' |
| 97 | 'id bigint NOT NULL,' |
| 98 | 'pid integer,' |
| 99 | 'root_dir varchar(4096))') |
| 100 | do_query(query, 'CREATE TABLE threads (' |
| 101 | 'id bigint NOT NULL,' |
| 102 | 'machine_id bigint,' |
| 103 | 'process_id bigint,' |
| 104 | 'pid integer,' |
| 105 | 'tid integer)') |
| 106 | do_query(query, 'CREATE TABLE comms (' |
| 107 | 'id bigint NOT NULL,' |
| 108 | 'comm varchar(16))') |
| 109 | do_query(query, 'CREATE TABLE comm_threads (' |
| 110 | 'id bigint NOT NULL,' |
| 111 | 'comm_id bigint,' |
| 112 | 'thread_id bigint)') |
| 113 | do_query(query, 'CREATE TABLE dsos (' |
| 114 | 'id bigint NOT NULL,' |
| 115 | 'machine_id bigint,' |
| 116 | 'short_name varchar(256),' |
| 117 | 'long_name varchar(4096),' |
| 118 | 'build_id varchar(64))') |
| 119 | do_query(query, 'CREATE TABLE symbols (' |
| 120 | 'id bigint NOT NULL,' |
| 121 | 'dso_id bigint,' |
| 122 | 'sym_start bigint,' |
| 123 | 'sym_end bigint,' |
| 124 | 'binding integer,' |
| 125 | 'name varchar(2048))') |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 126 | do_query(query, 'CREATE TABLE branch_types (' |
| 127 | 'id integer NOT NULL,' |
| 128 | 'name varchar(80))') |
| 129 | |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 130 | if branches: |
| 131 | do_query(query, 'CREATE TABLE samples (' |
| 132 | 'id bigint NOT NULL,' |
| 133 | 'evsel_id bigint,' |
| 134 | 'machine_id bigint,' |
| 135 | 'thread_id bigint,' |
| 136 | 'comm_id bigint,' |
| 137 | 'dso_id bigint,' |
| 138 | 'symbol_id bigint,' |
| 139 | 'sym_offset bigint,' |
| 140 | 'ip bigint,' |
| 141 | 'time bigint,' |
| 142 | 'cpu integer,' |
| 143 | 'to_dso_id bigint,' |
| 144 | 'to_symbol_id bigint,' |
| 145 | 'to_sym_offset bigint,' |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 146 | 'to_ip bigint,' |
| 147 | 'branch_type integer,' |
| 148 | 'in_tx boolean)') |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 149 | else: |
| 150 | do_query(query, 'CREATE TABLE samples (' |
| 151 | 'id bigint NOT NULL,' |
| 152 | 'evsel_id bigint,' |
| 153 | 'machine_id bigint,' |
| 154 | 'thread_id bigint,' |
| 155 | 'comm_id bigint,' |
| 156 | 'dso_id bigint,' |
| 157 | 'symbol_id bigint,' |
| 158 | 'sym_offset bigint,' |
| 159 | 'ip bigint,' |
| 160 | 'time bigint,' |
| 161 | 'cpu integer,' |
| 162 | 'to_dso_id bigint,' |
| 163 | 'to_symbol_id bigint,' |
| 164 | 'to_sym_offset bigint,' |
| 165 | 'to_ip bigint,' |
| 166 | 'period bigint,' |
| 167 | 'weight bigint,' |
| 168 | 'transaction bigint,' |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 169 | 'data_src bigint,' |
| 170 | 'branch_type integer,' |
| 171 | 'in_tx boolean)') |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 172 | |
| 173 | do_query(query, 'CREATE VIEW samples_view AS ' |
| 174 | 'SELECT ' |
| 175 | 'id,' |
| 176 | 'time,' |
| 177 | 'cpu,' |
| 178 | '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' |
| 179 | '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' |
| 180 | '(SELECT comm FROM comms WHERE id = comm_id) AS command,' |
| 181 | '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,' |
| 182 | 'to_hex(ip) AS ip_hex,' |
| 183 | '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' |
| 184 | 'sym_offset,' |
| 185 | '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,' |
| 186 | 'to_hex(to_ip) AS to_ip_hex,' |
| 187 | '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,' |
| 188 | 'to_sym_offset,' |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 189 | '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' |
| 190 | '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' |
| 191 | 'in_tx' |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 192 | ' FROM samples') |
| 193 | |
| 194 | |
| 195 | file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0) |
| 196 | file_trailer = "\377\377" |
| 197 | |
| 198 | def open_output_file(file_name): |
| 199 | path_name = output_dir_name + "/" + file_name |
| 200 | file = open(path_name, "w+") |
| 201 | file.write(file_header) |
| 202 | return file |
| 203 | |
| 204 | def close_output_file(file): |
| 205 | file.write(file_trailer) |
| 206 | file.close() |
| 207 | |
| 208 | def copy_output_file_direct(file, table_name): |
| 209 | close_output_file(file) |
| 210 | sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')" |
| 211 | do_query(query, sql) |
| 212 | |
| 213 | # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly |
| 214 | def copy_output_file(file, table_name): |
| 215 | conn = PQconnectdb("dbname = " + dbname) |
| 216 | if (PQstatus(conn)): |
| 217 | raise Exception("COPY FROM STDIN PQconnectdb failed") |
| 218 | file.write(file_trailer) |
| 219 | file.seek(0) |
| 220 | sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')" |
| 221 | res = PQexec(conn, sql) |
| 222 | if (PQresultStatus(res) != 4): |
| 223 | raise Exception("COPY FROM STDIN PQexec failed") |
| 224 | data = file.read(65536) |
| 225 | while (len(data)): |
| 226 | ret = PQputCopyData(conn, data, len(data)) |
| 227 | if (ret != 1): |
| 228 | raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret)) |
| 229 | data = file.read(65536) |
| 230 | ret = PQputCopyEnd(conn, None) |
| 231 | if (ret != 1): |
| 232 | raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret)) |
| 233 | PQfinish(conn) |
| 234 | |
| 235 | def remove_output_file(file): |
| 236 | name = file.name |
| 237 | file.close() |
| 238 | os.unlink(name) |
| 239 | |
| 240 | evsel_file = open_output_file("evsel_table.bin") |
| 241 | machine_file = open_output_file("machine_table.bin") |
| 242 | thread_file = open_output_file("thread_table.bin") |
| 243 | comm_file = open_output_file("comm_table.bin") |
| 244 | comm_thread_file = open_output_file("comm_thread_table.bin") |
| 245 | dso_file = open_output_file("dso_table.bin") |
| 246 | symbol_file = open_output_file("symbol_table.bin") |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 247 | branch_type_file = open_output_file("branch_type_table.bin") |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 248 | sample_file = open_output_file("sample_table.bin") |
| 249 | |
| 250 | def trace_begin(): |
| 251 | print datetime.datetime.today(), "Writing to intermediate files..." |
| 252 | # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs |
| 253 | evsel_table(0, "unknown") |
| 254 | machine_table(0, 0, "unknown") |
| 255 | thread_table(0, 0, 0, -1, -1) |
| 256 | comm_table(0, "unknown") |
| 257 | dso_table(0, 0, "unknown", "unknown", "") |
| 258 | symbol_table(0, 0, 0, 0, 0, "unknown") |
| 259 | |
| 260 | unhandled_count = 0 |
| 261 | |
| 262 | def trace_end(): |
| 263 | print datetime.datetime.today(), "Copying to database..." |
| 264 | copy_output_file(evsel_file, "selected_events") |
| 265 | copy_output_file(machine_file, "machines") |
| 266 | copy_output_file(thread_file, "threads") |
| 267 | copy_output_file(comm_file, "comms") |
| 268 | copy_output_file(comm_thread_file, "comm_threads") |
| 269 | copy_output_file(dso_file, "dsos") |
| 270 | copy_output_file(symbol_file, "symbols") |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 271 | copy_output_file(branch_type_file, "branch_types") |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 272 | copy_output_file(sample_file, "samples") |
| 273 | |
| 274 | print datetime.datetime.today(), "Removing intermediate files..." |
| 275 | remove_output_file(evsel_file) |
| 276 | remove_output_file(machine_file) |
| 277 | remove_output_file(thread_file) |
| 278 | remove_output_file(comm_file) |
| 279 | remove_output_file(comm_thread_file) |
| 280 | remove_output_file(dso_file) |
| 281 | remove_output_file(symbol_file) |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 282 | remove_output_file(branch_type_file) |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 283 | remove_output_file(sample_file) |
| 284 | os.rmdir(output_dir_name) |
| 285 | print datetime.datetime.today(), "Adding primary keys" |
| 286 | do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') |
| 287 | do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)') |
| 288 | do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)') |
| 289 | do_query(query, 'ALTER TABLE comms ADD PRIMARY KEY (id)') |
| 290 | do_query(query, 'ALTER TABLE comm_threads ADD PRIMARY KEY (id)') |
| 291 | do_query(query, 'ALTER TABLE dsos ADD PRIMARY KEY (id)') |
| 292 | do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)') |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 293 | do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)') |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 294 | do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)') |
| 295 | |
| 296 | print datetime.datetime.today(), "Adding foreign keys" |
| 297 | do_query(query, 'ALTER TABLE threads ' |
| 298 | 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' |
| 299 | 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)') |
| 300 | do_query(query, 'ALTER TABLE comm_threads ' |
| 301 | 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' |
| 302 | 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)') |
| 303 | do_query(query, 'ALTER TABLE dsos ' |
| 304 | 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id)') |
| 305 | do_query(query, 'ALTER TABLE symbols ' |
| 306 | 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id)') |
| 307 | do_query(query, 'ALTER TABLE samples ' |
| 308 | 'ADD CONSTRAINT evselfk FOREIGN KEY (evsel_id) REFERENCES selected_events (id),' |
| 309 | 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' |
| 310 | 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),' |
| 311 | 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' |
| 312 | 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id),' |
| 313 | 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),' |
| 314 | 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),' |
| 315 | 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)') |
| 316 | |
| 317 | if (unhandled_count): |
| 318 | print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" |
| 319 | print datetime.datetime.today(), "Done" |
| 320 | |
| 321 | def trace_unhandled(event_name, context, event_fields_dict): |
| 322 | global unhandled_count |
| 323 | unhandled_count += 1 |
| 324 | |
| 325 | def sched__sched_switch(*x): |
| 326 | pass |
| 327 | |
| 328 | def evsel_table(evsel_id, evsel_name, *x): |
| 329 | n = len(evsel_name) |
| 330 | fmt = "!hiqi" + str(n) + "s" |
| 331 | value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name) |
| 332 | evsel_file.write(value) |
| 333 | |
| 334 | def machine_table(machine_id, pid, root_dir, *x): |
| 335 | n = len(root_dir) |
| 336 | fmt = "!hiqiii" + str(n) + "s" |
| 337 | value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir) |
| 338 | machine_file.write(value) |
| 339 | |
| 340 | def thread_table(thread_id, machine_id, process_id, pid, tid, *x): |
| 341 | value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid) |
| 342 | thread_file.write(value) |
| 343 | |
| 344 | def comm_table(comm_id, comm_str, *x): |
| 345 | n = len(comm_str) |
| 346 | fmt = "!hiqi" + str(n) + "s" |
| 347 | value = struct.pack(fmt, 2, 8, comm_id, n, comm_str) |
| 348 | comm_file.write(value) |
| 349 | |
| 350 | def comm_thread_table(comm_thread_id, comm_id, thread_id, *x): |
| 351 | fmt = "!hiqiqiq" |
| 352 | value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id) |
| 353 | comm_thread_file.write(value) |
| 354 | |
| 355 | def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x): |
| 356 | n1 = len(short_name) |
| 357 | n2 = len(long_name) |
| 358 | n3 = len(build_id) |
| 359 | fmt = "!hiqiqi" + str(n1) + "si" + str(n2) + "si" + str(n3) + "s" |
| 360 | value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id) |
| 361 | dso_file.write(value) |
| 362 | |
| 363 | def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x): |
| 364 | n = len(symbol_name) |
| 365 | fmt = "!hiqiqiqiqiii" + str(n) + "s" |
| 366 | value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name) |
| 367 | symbol_file.write(value) |
| 368 | |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 369 | def branch_type_table(branch_type, name, *x): |
| 370 | n = len(name) |
| 371 | fmt = "!hiii" + str(n) + "s" |
| 372 | value = struct.pack(fmt, 2, 4, branch_type, n, name) |
| 373 | branch_type_file.write(value) |
| 374 | |
| 375 | def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x): |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 376 | if branches: |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 377 | value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx) |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 378 | else: |
Adrian Hunter | c29414f | 2014-10-30 16:09:44 +0200 | [diff] [blame^] | 379 | value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx) |
Adrian Hunter | 2987e32 | 2014-10-23 13:45:15 +0300 | [diff] [blame] | 380 | sample_file.write(value) |