Kristof Beyls | 6255ac0 | 2018-05-18 13:02:32 +0000 | [diff] [blame^] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | import argparse |
| 4 | import email.mime.multipart |
| 5 | import email.mime.text |
| 6 | import logging |
| 7 | import os.path |
| 8 | import pickle |
| 9 | import re |
| 10 | import smtplib |
| 11 | import subprocess |
| 12 | import sys |
| 13 | from datetime import datetime, timedelta |
| 14 | from phabricator import Phabricator |
| 15 | |
| 16 | # Setting up a virtualenv to run this script can be done by running the |
| 17 | # following commands: |
| 18 | # $ virtualenv venv |
| 19 | # $ . ./venv/bin/activate |
| 20 | # $ pip install Phabricator |
| 21 | |
| 22 | GIT_REPO_METADATA = (("llvm", "https://llvm.org/git/llvm.git"), ) |
| 23 | |
| 24 | # The below PhabXXX classes represent objects as modelled by Phabricator. |
| 25 | # The classes can be serialized to disk, to try and make sure that we don't |
| 26 | # needlessly have to re-fetch lots of data from Phabricator, as that would |
| 27 | # make this script unusably slow. |
| 28 | |
| 29 | |
| 30 | class PhabObject: |
| 31 | OBJECT_KIND = None |
| 32 | |
| 33 | def __init__(self, id): |
| 34 | self.id = id |
| 35 | |
| 36 | |
| 37 | class PhabObjectCache: |
| 38 | def __init__(self, PhabObjectClass): |
| 39 | self.PhabObjectClass = PhabObjectClass |
| 40 | self.most_recent_info = None |
| 41 | self.oldest_info = None |
| 42 | self.id2PhabObjects = {} |
| 43 | |
| 44 | def get_name(self): |
| 45 | return self.PhabObjectClass.OBJECT_KIND + "sCache" |
| 46 | |
| 47 | def get(self, id): |
| 48 | if id not in self.id2PhabObjects: |
| 49 | self.id2PhabObjects[id] = self.PhabObjectClass(id) |
| 50 | return self.id2PhabObjects[id] |
| 51 | |
| 52 | def get_ids_in_cache(self): |
| 53 | return self.id2PhabObjects.keys() |
| 54 | |
| 55 | def get_objects(self): |
| 56 | return self.id2PhabObjects.values() |
| 57 | |
| 58 | DEFAULT_DIRECTORY = "PhabObjectCache" |
| 59 | |
| 60 | def _get_pickle_name(self, directory): |
| 61 | file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle" |
| 62 | return os.path.join(directory, file_name) |
| 63 | |
| 64 | def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY): |
| 65 | """ |
| 66 | FIXME: consider if serializing to JSON would bring interoperability |
| 67 | advantages over serializing to pickle. |
| 68 | """ |
| 69 | try: |
| 70 | f = open(self._get_pickle_name(directory), "rb") |
| 71 | except IOError as err: |
| 72 | print("Could not find cache. Error message: {0}. Continuing..." |
| 73 | .format(err)) |
| 74 | else: |
| 75 | with f: |
| 76 | try: |
| 77 | d = pickle.load(f) |
| 78 | self.__dict__.update(d) |
| 79 | except EOFError as err: |
| 80 | print("Cache seems to be corrupt. " + |
| 81 | "Not using cache. Error message: {0}".format(err)) |
| 82 | |
| 83 | def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY): |
| 84 | if not os.path.exists(directory): |
| 85 | os.makedirs(directory) |
| 86 | with open(self._get_pickle_name(directory), "wb") as f: |
| 87 | pickle.dump(self.__dict__, f) |
| 88 | print("wrote cache to disk, most_recent_info= {0}".format( |
| 89 | datetime.fromtimestamp(self.most_recent_info) |
| 90 | if self.most_recent_info is not None else None)) |
| 91 | |
| 92 | |
| 93 | class PhabReview(PhabObject): |
| 94 | OBJECT_KIND = "Review" |
| 95 | |
| 96 | def __init__(self, id): |
| 97 | PhabObject.__init__(self, id) |
| 98 | |
| 99 | def update(self, title, dateCreated, dateModified, author): |
| 100 | self.title = title |
| 101 | self.dateCreated = dateCreated |
| 102 | self.dateModified = dateModified |
| 103 | self.author = author |
| 104 | |
| 105 | def setPhabDiffs(self, phabDiffs): |
| 106 | self.phabDiffs = phabDiffs |
| 107 | |
| 108 | |
| 109 | class PhabUser(PhabObject): |
| 110 | OBJECT_KIND = "User" |
| 111 | |
| 112 | def __init__(self, id): |
| 113 | PhabObject.__init__(self, id) |
| 114 | |
| 115 | def update(self, phid, realName): |
| 116 | self.phid = phid |
| 117 | self.realName = realName |
| 118 | |
| 119 | |
| 120 | class PhabHunk: |
| 121 | def __init__(self, rest_api_hunk): |
| 122 | self.oldOffset = int(rest_api_hunk["oldOffset"]) |
| 123 | self.oldLength = int(rest_api_hunk["oldLength"]) |
| 124 | # self.actual_lines_changed_offset will contain the offsets of the |
| 125 | # lines that were changed in this hunk. |
| 126 | self.actual_lines_changed_offset = [] |
| 127 | offset = self.oldOffset |
| 128 | inHunk = False |
| 129 | hunkStart = -1 |
| 130 | contextLines = 3 |
| 131 | for line in rest_api_hunk["corpus"].split("\n"): |
| 132 | if line.startswith("+"): |
| 133 | # line is a new line that got introduced in this patch. |
| 134 | # Do not record it as a changed line. |
| 135 | if inHunk is False: |
| 136 | inHunk = True |
| 137 | hunkStart = max(self.oldOffset, offset - contextLines) |
| 138 | continue |
| 139 | if line.startswith("-"): |
| 140 | # line was changed or removed from the older version of the |
| 141 | # code. Record it as a changed line. |
| 142 | if inHunk is False: |
| 143 | inHunk = True |
| 144 | hunkStart = max(self.oldOffset, offset - contextLines) |
| 145 | offset += 1 |
| 146 | continue |
| 147 | # line is a context line. |
| 148 | if inHunk is True: |
| 149 | inHunk = False |
| 150 | hunkEnd = offset + contextLines |
| 151 | self.actual_lines_changed_offset.append((hunkStart, hunkEnd)) |
| 152 | offset += 1 |
| 153 | if inHunk is True: |
| 154 | hunkEnd = offset + contextLines |
| 155 | self.actual_lines_changed_offset.append((hunkStart, hunkEnd)) |
| 156 | |
| 157 | # The above algorithm could result in adjacent or overlapping ranges |
| 158 | # being recorded into self.actual_lines_changed_offset. |
| 159 | # Merge the adjacent and overlapping ranges in there: |
| 160 | t = [] |
| 161 | lastRange = None |
| 162 | for start, end in self.actual_lines_changed_offset + \ |
| 163 | [(sys.maxsize, sys.maxsize)]: |
| 164 | if lastRange is None: |
| 165 | lastRange = (start, end) |
| 166 | else: |
| 167 | if lastRange[1] >= start: |
| 168 | lastRange = (lastRange[0], end) |
| 169 | else: |
| 170 | t.append(lastRange) |
| 171 | lastRange = (start, end) |
| 172 | self.actual_lines_changed_offset = t |
| 173 | |
| 174 | |
| 175 | class PhabChange: |
| 176 | def __init__(self, rest_api_change): |
| 177 | self.oldPath = rest_api_change["oldPath"] |
| 178 | self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]] |
| 179 | |
| 180 | |
| 181 | class PhabDiff(PhabObject): |
| 182 | OBJECT_KIND = "Diff" |
| 183 | |
| 184 | def __init__(self, id): |
| 185 | PhabObject.__init__(self, id) |
| 186 | |
| 187 | def update(self, rest_api_results): |
| 188 | self.revisionID = rest_api_results["revisionID"] |
| 189 | self.dateModified = int(rest_api_results["dateModified"]) |
| 190 | self.dateCreated = int(rest_api_results["dateCreated"]) |
| 191 | self.changes = [PhabChange(c) for c in rest_api_results["changes"]] |
| 192 | |
| 193 | |
| 194 | class ReviewsCache(PhabObjectCache): |
| 195 | def __init__(self): |
| 196 | PhabObjectCache.__init__(self, PhabReview) |
| 197 | |
| 198 | |
| 199 | class UsersCache(PhabObjectCache): |
| 200 | def __init__(self): |
| 201 | PhabObjectCache.__init__(self, PhabUser) |
| 202 | |
| 203 | |
| 204 | reviews_cache = ReviewsCache() |
| 205 | users_cache = UsersCache() |
| 206 | |
| 207 | |
| 208 | def init_phab_connection(): |
| 209 | phab = Phabricator() |
| 210 | phab.update_interfaces() |
| 211 | return phab |
| 212 | |
| 213 | |
| 214 | def update_cached_info(phab, cache, phab_query, order, record_results, |
| 215 | max_nr_entries_per_fetch, max_nr_days_to_cache): |
| 216 | q = phab |
| 217 | LIMIT = max_nr_entries_per_fetch |
| 218 | for query_step in phab_query: |
| 219 | q = getattr(q, query_step) |
| 220 | results = q(order=order, limit=LIMIT) |
| 221 | most_recent_info, oldest_info = record_results(cache, results, phab) |
| 222 | oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - \ |
| 223 | timedelta(days=max_nr_days_to_cache) |
| 224 | most_recent_info_overall = most_recent_info |
| 225 | cache.write_cache_to_disk() |
| 226 | after = results["cursor"]["after"] |
| 227 | print("after: {0!r}".format(after)) |
| 228 | print("most_recent_info: {0}".format( |
| 229 | datetime.fromtimestamp(most_recent_info))) |
| 230 | while (after is not None |
| 231 | and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch): |
| 232 | need_more_older_data = \ |
| 233 | (cache.oldest_info is None or |
| 234 | datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch) |
| 235 | print(("need_more_older_data={0} cache.oldest_info={1} " + |
| 236 | "oldest_info_to_fetch={2}").format( |
| 237 | need_more_older_data, |
| 238 | datetime.fromtimestamp(cache.oldest_info) |
| 239 | if cache.oldest_info is not None else None, |
| 240 | oldest_info_to_fetch)) |
| 241 | need_more_newer_data = \ |
| 242 | (cache.most_recent_info is None or |
| 243 | cache.most_recent_info < most_recent_info) |
| 244 | print(("need_more_newer_data={0} cache.most_recent_info={1} " + |
| 245 | "most_recent_info={2}") |
| 246 | .format(need_more_newer_data, cache.most_recent_info, |
| 247 | most_recent_info)) |
| 248 | if not need_more_older_data and not need_more_newer_data: |
| 249 | break |
| 250 | results = q(order=order, after=after, limit=LIMIT) |
| 251 | most_recent_info, oldest_info = record_results(cache, results, phab) |
| 252 | after = results["cursor"]["after"] |
| 253 | print("after: {0!r}".format(after)) |
| 254 | print("most_recent_info: {0}".format( |
| 255 | datetime.fromtimestamp(most_recent_info))) |
| 256 | cache.write_cache_to_disk() |
| 257 | cache.most_recent_info = most_recent_info_overall |
| 258 | if after is None: |
| 259 | # We did fetch all records. Mark the cache to contain all info since |
| 260 | # the start of time. |
| 261 | oldest_info = 0 |
| 262 | cache.oldest_info = oldest_info |
| 263 | cache.write_cache_to_disk() |
| 264 | |
| 265 | |
| 266 | def record_reviews(cache, reviews, phab): |
| 267 | most_recent_info = None |
| 268 | oldest_info = None |
| 269 | for reviewInfo in reviews["data"]: |
| 270 | if reviewInfo["type"] != "DREV": |
| 271 | continue |
| 272 | id = reviewInfo["id"] |
| 273 | # phid = reviewInfo["phid"] |
| 274 | dateModified = int(reviewInfo["fields"]["dateModified"]) |
| 275 | dateCreated = int(reviewInfo["fields"]["dateCreated"]) |
| 276 | title = reviewInfo["fields"]["title"] |
| 277 | author = reviewInfo["fields"]["authorPHID"] |
| 278 | phabReview = cache.get(id) |
| 279 | if "dateModified" not in phabReview.__dict__ or \ |
| 280 | dateModified > phabReview.dateModified: |
| 281 | diff_results = phab.differential.querydiffs(revisionIDs=[id]) |
| 282 | diff_ids = sorted(diff_results.keys()) |
| 283 | phabDiffs = [] |
| 284 | for diff_id in diff_ids: |
| 285 | diffInfo = diff_results[diff_id] |
| 286 | d = PhabDiff(diff_id) |
| 287 | d.update(diffInfo) |
| 288 | phabDiffs.append(d) |
| 289 | phabReview.update(title, dateCreated, dateModified, author) |
| 290 | phabReview.setPhabDiffs(phabDiffs) |
| 291 | print("Updated D{0} modified on {1} ({2} diffs)".format( |
| 292 | id, datetime.fromtimestamp(dateModified), len(phabDiffs))) |
| 293 | |
| 294 | if most_recent_info is None: |
| 295 | most_recent_info = dateModified |
| 296 | elif most_recent_info < dateModified: |
| 297 | most_recent_info = dateModified |
| 298 | |
| 299 | if oldest_info is None: |
| 300 | oldest_info = dateModified |
| 301 | elif oldest_info > dateModified: |
| 302 | oldest_info = dateModified |
| 303 | return most_recent_info, oldest_info |
| 304 | |
| 305 | |
| 306 | def record_users(cache, users, phab): |
| 307 | most_recent_info = None |
| 308 | oldest_info = None |
| 309 | for info in users["data"]: |
| 310 | if info["type"] != "USER": |
| 311 | continue |
| 312 | id = info["id"] |
| 313 | phid = info["phid"] |
| 314 | dateModified = int(info["fields"]["dateModified"]) |
| 315 | # dateCreated = int(info["fields"]["dateCreated"]) |
| 316 | realName = info["fields"]["realName"] |
| 317 | phabUser = cache.get(id) |
| 318 | phabUser.update(phid, realName) |
| 319 | if most_recent_info is None: |
| 320 | most_recent_info = dateModified |
| 321 | elif most_recent_info < dateModified: |
| 322 | most_recent_info = dateModified |
| 323 | if oldest_info is None: |
| 324 | oldest_info = dateModified |
| 325 | elif oldest_info > dateModified: |
| 326 | oldest_info = dateModified |
| 327 | return most_recent_info, oldest_info |
| 328 | |
| 329 | |
| 330 | PHABCACHESINFO = ((reviews_cache, ("differential", "revision", "search"), |
| 331 | "updated", record_reviews, 5, 7), |
| 332 | (users_cache, ("user", "search"), "newest", record_users, |
| 333 | 100, 1000)) |
| 334 | |
| 335 | |
| 336 | def load_cache(): |
| 337 | for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO: |
| 338 | cache.populate_cache_from_disk() |
| 339 | print("Loaded {0} nr entries: {1}".format( |
| 340 | cache.get_name(), len(cache.get_ids_in_cache()))) |
| 341 | print("Loaded {0} has most recent info: {1}".format( |
| 342 | cache.get_name(), |
| 343 | datetime.fromtimestamp(cache.most_recent_info) |
| 344 | if cache.most_recent_info is not None else None)) |
| 345 | |
| 346 | |
| 347 | def update_cache(phab): |
| 348 | load_cache() |
| 349 | for cache, phab_query, order, record_results, max_nr_entries_per_fetch, \ |
| 350 | max_nr_days_to_cache in PHABCACHESINFO: |
| 351 | update_cached_info(phab, cache, phab_query, order, record_results, |
| 352 | max_nr_entries_per_fetch, max_nr_days_to_cache) |
| 353 | ids_in_cache = cache.get_ids_in_cache() |
| 354 | print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name())) |
| 355 | cache.write_cache_to_disk() |
| 356 | |
| 357 | |
| 358 | def get_most_recent_reviews(days): |
| 359 | newest_reviews = sorted( |
| 360 | reviews_cache.get_objects(), key=lambda r: -r.dateModified) |
| 361 | if len(newest_reviews) == 0: |
| 362 | return newest_reviews |
| 363 | most_recent_review_time = \ |
| 364 | datetime.fromtimestamp(newest_reviews[0].dateModified) |
| 365 | cut_off_date = most_recent_review_time - timedelta(days=days) |
| 366 | result = [] |
| 367 | for review in newest_reviews: |
| 368 | if datetime.fromtimestamp(review.dateModified) < cut_off_date: |
| 369 | return result |
| 370 | result.append(review) |
| 371 | return result |
| 372 | |
| 373 | |
| 374 | # All of the above code is about fetching data from Phabricator and caching it |
| 375 | # on local disk. The below code contains the actual "business logic" for this |
| 376 | # script. |
| 377 | |
| 378 | _userphid2realname = None |
| 379 | |
| 380 | |
| 381 | def get_real_name_from_author(user_phid): |
| 382 | global _userphid2realname |
| 383 | if _userphid2realname is None: |
| 384 | _userphid2realname = {} |
| 385 | for user in users_cache.get_objects(): |
| 386 | _userphid2realname[user.phid] = user.realName |
| 387 | return _userphid2realname.get(user_phid, "unknown") |
| 388 | |
| 389 | |
| 390 | def print_most_recent_reviews(phab, days, filter_reviewers): |
| 391 | msgs = [] |
| 392 | |
| 393 | def add_msg(msg): |
| 394 | msgs.append(msg) |
| 395 | print(msg) |
| 396 | |
| 397 | newest_reviews = get_most_recent_reviews(days) |
| 398 | add_msg("These are the reviews that look interesting to be reviewed. " + |
| 399 | "The report below has 2 sections. The first " + |
| 400 | "section is organized per review; the second section is organized " |
| 401 | + "per potential reviewer.\n") |
| 402 | oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None |
| 403 | oldest_datetime = \ |
| 404 | datetime.fromtimestamp(oldest_review.dateModified) \ |
| 405 | if oldest_review else None |
| 406 | add_msg(("The report below is based on analyzing the reviews that got " + |
| 407 | "touched in the past {0} days (since {1}). " + |
| 408 | "The script found {2} such reviews.\n").format( |
| 409 | days, oldest_datetime, len(newest_reviews))) |
| 410 | reviewer2reviews_and_scores = {} |
| 411 | for i, review in enumerate(newest_reviews): |
| 412 | matched_reviewers = find_reviewers_for_review(review) |
| 413 | matched_reviewers = filter_reviewers(matched_reviewers) |
| 414 | if len(matched_reviewers) == 0: |
| 415 | continue |
| 416 | add_msg(("{0:>3}. https://reviews.llvm.org/D{1} by {2}\n {3}\n" + |
| 417 | " Last updated on {4}").format( |
| 418 | i, review.id, |
| 419 | get_real_name_from_author(review.author), review.title, |
| 420 | datetime.fromtimestamp(review.dateModified))) |
| 421 | for reviewer, scores in matched_reviewers: |
| 422 | add_msg(" potential reviewer {0}, score {1}".format( |
| 423 | reviewer, |
| 424 | "(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")")) |
| 425 | if reviewer not in reviewer2reviews_and_scores: |
| 426 | reviewer2reviews_and_scores[reviewer] = [] |
| 427 | reviewer2reviews_and_scores[reviewer].append((review, scores)) |
| 428 | |
| 429 | # Print out a summary per reviewer. |
| 430 | for reviewer in sorted(reviewer2reviews_and_scores.keys()): |
| 431 | reviews_and_scores = reviewer2reviews_and_scores[reviewer] |
| 432 | reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True) |
| 433 | add_msg("\n\nSUMMARY FOR {0} (found {1} reviews):".format( |
| 434 | reviewer, len(reviews_and_scores))) |
| 435 | for review, scores in reviews_and_scores: |
| 436 | add_msg("[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format( |
| 437 | "/".join(["{0:.1f}%".format(s) for s in scores]), review.id, |
| 438 | review.title, get_real_name_from_author(review.author))) |
| 439 | return "\n".join(msgs) |
| 440 | |
| 441 | |
| 442 | def get_git_cmd_output(cmd): |
| 443 | output = None |
| 444 | try: |
| 445 | logging.debug(cmd) |
| 446 | output = subprocess.check_output( |
| 447 | cmd, shell=True, stderr=subprocess.STDOUT) |
| 448 | except subprocess.CalledProcessError as e: |
| 449 | logging.debug(str(e)) |
| 450 | if output is None: |
| 451 | return None |
| 452 | return output.decode("utf-8", errors='ignore') |
| 453 | |
| 454 | |
| 455 | reAuthorMail = re.compile("^author-mail <([^>]*)>.*$") |
| 456 | |
| 457 | |
| 458 | def parse_blame_output_line_porcelain(blame_output): |
| 459 | email2nr_occurences = {} |
| 460 | if blame_output is None: |
| 461 | return email2nr_occurences |
| 462 | for line in blame_output.split('\n'): |
| 463 | m = reAuthorMail.match(line) |
| 464 | if m: |
| 465 | author_email_address = m.group(1) |
| 466 | if author_email_address not in email2nr_occurences: |
| 467 | email2nr_occurences[author_email_address] = 1 |
| 468 | else: |
| 469 | email2nr_occurences[author_email_address] += 1 |
| 470 | return email2nr_occurences |
| 471 | |
| 472 | |
| 473 | def find_reviewers_for_diff_heuristic(diff): |
| 474 | # Heuristic 1: assume good reviewers are the ones that touched the same |
| 475 | # lines before as this patch is touching. |
| 476 | # Heuristic 2: assume good reviewers are the ones that touched the same |
| 477 | # files before as this patch is touching. |
| 478 | reviewers2nr_lines_touched = {} |
| 479 | reviewers2nr_files_touched = {} |
| 480 | # Assume last revision before diff was modified is the revision the diff |
| 481 | # applies to. |
| 482 | git_repo = "git_repos/llvm" |
| 483 | cmd = 'git -C {0} rev-list -n 1 --before="{1}" master'.format( |
| 484 | git_repo, |
| 485 | datetime.fromtimestamp( |
| 486 | diff.dateModified).strftime("%Y-%m-%d %H:%M:%s")) |
| 487 | base_revision = get_git_cmd_output(cmd).strip() |
| 488 | logging.debug("Base revision={0}".format(base_revision)) |
| 489 | for change in diff.changes: |
| 490 | path = change.oldPath |
| 491 | # Compute heuristic 1: look at context of patch lines. |
| 492 | for hunk in change.hunks: |
| 493 | for start_line, end_line in hunk.actual_lines_changed_offset: |
| 494 | # Collect git blame results for authors in those ranges. |
| 495 | cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e " + |
| 496 | "-w --line-porcelain -L {1},{2} {3} -- {4}").format( |
| 497 | git_repo, start_line, end_line, base_revision, path) |
| 498 | blame_output = get_git_cmd_output(cmd) |
| 499 | for reviewer, nr_occurences in \ |
| 500 | parse_blame_output_line_porcelain(blame_output).items(): |
| 501 | if reviewer not in reviewers2nr_lines_touched: |
| 502 | reviewers2nr_lines_touched[reviewer] = 0 |
| 503 | reviewers2nr_lines_touched[reviewer] += nr_occurences |
| 504 | # Compute heuristic 2: don't look at context, just at files touched. |
| 505 | # Collect git blame results for authors in those ranges. |
| 506 | cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " + |
| 507 | "--line-porcelain {1} -- {2}").format(git_repo, base_revision, |
| 508 | path) |
| 509 | blame_output = get_git_cmd_output(cmd) |
| 510 | for reviewer, nr_occurences in parse_blame_output_line_porcelain( |
| 511 | blame_output).items(): |
| 512 | if reviewer not in reviewers2nr_files_touched: |
| 513 | reviewers2nr_files_touched[reviewer] = 0 |
| 514 | reviewers2nr_files_touched[reviewer] += 1 |
| 515 | |
| 516 | # Compute "match scores" |
| 517 | total_nr_lines = sum(reviewers2nr_lines_touched.values()) |
| 518 | total_nr_files = len(diff.changes) |
| 519 | reviewers_matchscores = \ |
| 520 | [(reviewer, |
| 521 | (reviewers2nr_lines_touched.get(reviewer, 0)*100.0/total_nr_lines |
| 522 | if total_nr_lines != 0 else 0, |
| 523 | reviewers2nr_files_touched[reviewer]*100.0/total_nr_files |
| 524 | if total_nr_files != 0 else 0)) |
| 525 | for reviewer, nr_lines |
| 526 | in reviewers2nr_files_touched.items()] |
| 527 | reviewers_matchscores.sort(key=lambda i: i[1], reverse=True) |
| 528 | return reviewers_matchscores |
| 529 | |
| 530 | |
| 531 | def find_reviewers_for_review(review): |
| 532 | # Process the newest diff first. |
| 533 | diffs = sorted( |
| 534 | review.phabDiffs, key=lambda d: d.dateModified, reverse=True) |
| 535 | if len(diffs) == 0: |
| 536 | return |
| 537 | diff = diffs[0] |
| 538 | matched_reviewers = find_reviewers_for_diff_heuristic(diff) |
| 539 | # Show progress, as this is a slow operation: |
| 540 | sys.stdout.write('.') |
| 541 | sys.stdout.flush() |
| 542 | logging.debug("matched_reviewers: {0}".format(matched_reviewers)) |
| 543 | return matched_reviewers |
| 544 | |
| 545 | |
| 546 | def update_git_repos(): |
| 547 | git_repos_directory = "git_repos" |
| 548 | for name, url in GIT_REPO_METADATA: |
| 549 | dirname = os.path.join(git_repos_directory, name) |
| 550 | if not os.path.exists(dirname): |
| 551 | cmd = "git clone {0} {1}".format(url, dirname) |
| 552 | output = get_git_cmd_output(cmd) |
| 553 | cmd = "git -C {0} pull --rebase".format(dirname) |
| 554 | output = get_git_cmd_output(cmd) |
| 555 | |
| 556 | |
| 557 | def send_emails(email_addresses, msg): |
| 558 | s = smtplib.SMTP() |
| 559 | s.connect() |
| 560 | for email_address in email_addresses: |
| 561 | email_msg = email.mime.multipart.MIMEMultipart() |
| 562 | email_msg['From'] = '' |
| 563 | email_msg['To'] = email_address |
| 564 | email_msg['Subject'] = 'LLVM patches you may be able to review.' |
| 565 | email_msg.attach(email.mime.text.MIMEText(msg, 'plain')) |
| 566 | # python 3.x: s.send_message(email_msg) |
| 567 | s.sendmail(email_msg['From'], email_msg['To'], msg) |
| 568 | s.quit() |
| 569 | |
| 570 | |
| 571 | def filter_reviewers_to_report_for(people_to_look_for): |
| 572 | # The below is just an example filter, to only report potential reviews |
| 573 | # to do for the people that will receive the report email. |
| 574 | return lambda potential_reviewers: [r for r in potential_reviewers |
| 575 | if r[0] in people_to_look_for] |
| 576 | |
| 577 | |
| 578 | def main(): |
| 579 | parser = argparse.ArgumentParser( |
| 580 | description='Match open reviews to potential reviewers.') |
| 581 | parser.add_argument( |
| 582 | '--no-update-cache', |
| 583 | dest='update_cache', |
| 584 | action='store_false', |
| 585 | default=True, |
| 586 | help='Do not update cached Phabricator objects') |
| 587 | parser.add_argument( |
| 588 | 'email_addresses', |
| 589 | nargs='*', |
| 590 | help="The email addresses (as known by LLVM git) of " + |
| 591 | "the people to look for reviews for.") |
| 592 | parser.add_argument('--verbose', '-v', action='count') |
| 593 | |
| 594 | args = parser.parse_args() |
| 595 | |
| 596 | if args.verbose >= 1: |
| 597 | logging.basicConfig(level=logging.DEBUG) |
| 598 | |
| 599 | people_to_look_for = [e.decode('utf-8') for e in args.email_addresses] |
| 600 | |
| 601 | phab = init_phab_connection() |
| 602 | |
| 603 | if args.update_cache: |
| 604 | update_cache(phab) |
| 605 | |
| 606 | load_cache() |
| 607 | update_git_repos() |
| 608 | msg = print_most_recent_reviews( |
| 609 | phab, |
| 610 | days=1, |
| 611 | filter_reviewers=filter_reviewers_to_report_for(people_to_look_for)) |
| 612 | send_emails(people_to_look_for, msg) |
| 613 | |
| 614 | |
| 615 | if __name__ == "__main__": |
| 616 | main() |