Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 1 | # Copyright 2021 Google LLC |
| 2 | |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | |
| 7 | # https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | from enum import IntEnum |
| 16 | import json |
| 17 | from multiprocessing import Pool |
| 18 | import pandas as pd |
| 19 | import pathlib |
| 20 | import numpy as np |
| 21 | |
| 22 | BRANCH_ARTIFACTS_DIR = ( |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 23 | pathlib.Path(__file__).parent.resolve() |
| 24 | / "googleapiclient" |
| 25 | / "discovery_cache" |
| 26 | / "documents" |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 27 | ) |
| 28 | MAIN_ARTIFACTS_DIR = ( |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 29 | pathlib.Path(__file__).parent.resolve() |
| 30 | / ".." |
| 31 | / "main" |
| 32 | / "googleapiclient" |
| 33 | / "discovery_cache" |
| 34 | / "documents" |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 35 | ) |
| 36 | |
| 37 | MULTIPROCESSING_NUM_PER_BATCH = 5 |
| 38 | MULTIPROCESSING_NUM_AGENTS = 10 |
| 39 | |
| 40 | |
| 41 | class ChangeType(IntEnum): |
| 42 | UNKNOWN = 0 |
| 43 | DELETED = 1 |
| 44 | ADDED = 2 |
| 45 | CHANGED = 3 |
| 46 | |
| 47 | |
| 48 | class DirectoryDoesNotExist(ValueError): |
| 49 | """Raised when the specified directory does not exist.""" |
| 50 | |
| 51 | pass |
| 52 | |
| 53 | |
| 54 | class ChangeSummary: |
| 55 | """Represents the change summary between 2 directories containing \ |
| 56 | artifacts. |
| 57 | """ |
| 58 | |
| 59 | def __init__(self, new_artifacts_dir, current_artifacts_dir, temp_dir, file_list): |
| 60 | """Initializes an instance of a ChangeSummary. |
| 61 | |
| 62 | Args: |
| 63 | new_artifacts_dir (str): The relative path to the directory with the |
| 64 | new discovery artifacts. |
| 65 | current_artifacts_dir (str): The relative path to the directory with |
| 66 | the current discovery artifacts. |
| 67 | temp_dir (str): The relative path to the directory used for |
| 68 | temporary storage where intermediate files will be stored. |
| 69 | file_list (list): A list of strings containing files to analyze. |
| 70 | """ |
| 71 | |
| 72 | self._file_list = file_list |
| 73 | self._new_artifacts_dir = pathlib.Path(new_artifacts_dir) |
| 74 | self._current_artifacts_dir = pathlib.Path(current_artifacts_dir) |
| 75 | self._temp_dir = pathlib.Path(temp_dir) |
| 76 | |
| 77 | # Sanity checks to ensure directories exist |
| 78 | self._raise_if_directory_not_found(self._new_artifacts_dir) |
| 79 | self._raise_if_directory_not_found(self._current_artifacts_dir) |
| 80 | self._raise_if_directory_not_found(self._temp_dir) |
| 81 | |
| 82 | def _raise_if_directory_not_found(self, directory): |
| 83 | """Raises if the `directory` doesn't exist |
| 84 | |
| 85 | args: |
| 86 | directory (str): The relative path to the `directory` |
| 87 | """ |
| 88 | |
| 89 | if not pathlib.Path(directory).exists(): |
| 90 | raise DirectoryDoesNotExist( |
| 91 | "Directory does not exist : {0}".format(directory) |
| 92 | ) |
| 93 | |
| 94 | def _load_json_to_dataframe(self, file_path): |
| 95 | """Returns a pandas dataframe from the json file provided. |
| 96 | |
| 97 | args: |
| 98 | file_path (str): The relative path to the discovery artifact to |
| 99 | parse. |
| 100 | """ |
| 101 | |
| 102 | # Create an empty dataframe as we will need to return it if the file |
| 103 | # doesn't exist |
| 104 | dataframe_doc = pd.DataFrame() |
| 105 | |
| 106 | if pathlib.Path(file_path).is_file(): |
| 107 | with open(file_path, "r") as f: |
| 108 | # Now load the json file into a pandas dataframe as a flat table |
| 109 | dataframe_doc = pd.json_normalize(json.load(f)) |
| 110 | return dataframe_doc |
| 111 | |
| 112 | def _get_discovery_differences(self, filename): |
| 113 | """Returns a pandas dataframe which contains the differences with the |
| 114 | current and new discovery artifact directories, corresponding to the |
| 115 | file name provided. |
| 116 | |
| 117 | args: |
| 118 | filename (str): The name of the discovery artifact to parse. |
| 119 | """ |
| 120 | # The paths of the 2 discovery artifacts to compare |
| 121 | current_artifact_path = self._current_artifacts_dir / filename |
| 122 | new_artifact_path = self._new_artifacts_dir / filename |
| 123 | |
| 124 | # Use a helper functions to load the discovery artifacts into pandas |
| 125 | # dataframes |
| 126 | current_doc = self._load_json_to_dataframe(current_artifact_path) |
| 127 | new_doc = self._load_json_to_dataframe(new_artifact_path) |
| 128 | |
| 129 | # Concatenate the 2 dataframes, transpose them, and create |
| 130 | # a new dataframe called combined_docs with columns |
| 131 | # `Key`, `CurrentValue`, `NewValue`. |
| 132 | combined_docs = ( |
| 133 | pd.concat([current_doc, new_doc], keys=["CurrentValue", "NewValue"]) |
| 134 | # Drop the index column |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 135 | .reset_index(drop=True, level=1) |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 136 | # Transpose the DataFrame, Resulting Columns should be |
| 137 | # ["Key", "CurrentValue", "New Value"] |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 138 | .rename_axis(["Key"], axis=1).transpose() |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 139 | # Drop the index column |
| 140 | .reset_index() |
| 141 | ) |
| 142 | |
| 143 | # When discovery documents are added, the column `CurrentValue` will |
| 144 | # not exist. In that case, we'll just populate with `np.nan`. |
| 145 | if "CurrentValue" not in combined_docs.columns: |
| 146 | combined_docs["CurrentValue"] = np.nan |
| 147 | |
| 148 | # When discovery documents are deleted, the column `NewValue` will |
| 149 | # not exist. In that case, we'll just populate with `np.nan`. |
| 150 | if "NewValue" not in combined_docs.columns: |
| 151 | combined_docs["NewValue"] = np.nan |
| 152 | |
| 153 | # Split the Key into 2 columns for `Parent` and `Child` in order |
| 154 | # to group keys with the same parents together to summarize the changes |
| 155 | # by parent. |
| 156 | parent_child_df = combined_docs["Key"].str.rsplit(".", 1, expand=True) |
| 157 | # Rename the columns and join them with the combined_docs dataframe. |
| 158 | # If we only have a `Parent` column, it means that the Key doesn't have |
| 159 | # any children. |
| 160 | if len(parent_child_df.columns) == 1: |
| 161 | parent_child_df.columns = ["Parent"] |
| 162 | else: |
| 163 | parent_child_df.columns = ["Parent", "Child"] |
| 164 | combined_docs = combined_docs.join(parent_child_df) |
| 165 | |
| 166 | # Create a new column `Added` to identify rows which have new keys. |
| 167 | combined_docs["Added"] = np.where( |
| 168 | combined_docs["CurrentValue"].isnull(), True, False |
| 169 | ) |
| 170 | |
| 171 | # Create a new column `Deleted` to identify rows which have deleted keys. |
| 172 | combined_docs["Deleted"] = np.where( |
| 173 | combined_docs["NewValue"].isnull(), True, False |
| 174 | ) |
| 175 | |
| 176 | # Aggregate the keys added by grouping keys with the same parents |
| 177 | # together to summarize the changes by parent rather than by key. |
| 178 | parent_added_agg = ( |
| 179 | combined_docs.groupby("Parent") |
| 180 | .Added.value_counts(normalize=True) |
| 181 | .reset_index(name="Proportion") |
| 182 | ) |
| 183 | |
| 184 | # Add a column NumLevels to inicate the number of levels in the tree |
| 185 | # which will allow us to sort the parents in hierarchical order. |
| 186 | parent_added_agg["NumLevels"] = ( |
| 187 | parent_added_agg["Parent"].str.split(".").apply(lambda x: len(x)) |
| 188 | ) |
| 189 | |
| 190 | # Aggregate the keys deleted by grouping keys with the same parents |
| 191 | # together to summarize the changes by parent rather than by key. |
| 192 | parent_deleted_agg = ( |
| 193 | combined_docs.groupby("Parent") |
| 194 | .Deleted.value_counts(normalize=True) |
| 195 | .reset_index(name="Proportion") |
| 196 | ) |
| 197 | |
| 198 | # Add a column NumLevels to inicate the number of levels in the tree |
| 199 | # which will allow us to sort the parents in hierarchical order. |
| 200 | parent_deleted_agg["NumLevels"] = ( |
| 201 | parent_added_agg["Parent"].str.split(".").apply(lambda x: len(x)) |
| 202 | ) |
| 203 | |
| 204 | # Create a list of all parents that have been added in hierarchical |
| 205 | # order. When `Proportion` is 1, it means that the parent is new as all |
| 206 | # children keys have been added. |
| 207 | all_added = ( |
| 208 | parent_added_agg[ |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 209 | (parent_added_agg["Proportion"] == 1) |
| 210 | & (parent_added_agg["Added"] == True) |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 211 | ][["Parent", "NumLevels"]] |
| 212 | .sort_values("NumLevels", ascending=True) |
| 213 | .Parent.to_list() |
| 214 | ) |
| 215 | |
| 216 | # Create a list of all parents that have been deleted in hierarchical |
| 217 | # order. When `Proportion` is 1, it means that the parent is new as all |
| 218 | # children keys have been deleted. |
| 219 | all_deleted = ( |
| 220 | parent_deleted_agg[ |
| 221 | (parent_deleted_agg["Proportion"] == 1) |
| 222 | & (parent_deleted_agg["Deleted"] == True) |
| 223 | ][["Parent", "NumLevels"]] |
| 224 | .sort_values("NumLevels", ascending=True) |
| 225 | .Parent.to_list() |
| 226 | ) |
| 227 | |
| 228 | # Go through the list of parents that have been added. If we find any |
| 229 | # keys with parents which are a substring of the parent in this list, |
| 230 | # then it means that the entire parent is new. We don't need verbose |
| 231 | # information about the children, so we replace the parent. |
| 232 | for i in range(0, len(all_added)): |
| 233 | word = all_added[i] |
| 234 | combined_docs.Parent = np.where( |
| 235 | combined_docs["Parent"].str.startswith(word), word, combined_docs.Parent |
| 236 | ) |
| 237 | |
| 238 | # Go through the list of parents that have been deleted. If we find any |
| 239 | # keys with parents which are a substring of the parent in this list, |
| 240 | # then it means that the entire parent is deleted. We don't need verbose |
| 241 | # information about the children, so we replace the parent. |
| 242 | for i in range(0, len(all_deleted)): |
| 243 | word = all_deleted[i] |
| 244 | combined_docs.Parent = np.where( |
| 245 | combined_docs["Parent"].str.startswith(word), word, combined_docs.Parent |
| 246 | ) |
| 247 | |
| 248 | # Create a new dataframe with only the keys which have changed |
| 249 | docs_diff = combined_docs[ |
| 250 | combined_docs["CurrentValue"] != combined_docs["NewValue"] |
| 251 | ].copy(deep=False) |
| 252 | |
| 253 | # Get the API and Version from the file name but exclude the extension. |
| 254 | api_version_string = filename.split(".")[:-1] |
| 255 | # Create columns `Name` and `Version` using the version string |
| 256 | docs_diff["Name"] = api_version_string[0] |
| 257 | docs_diff["Version"] = ".".join(api_version_string[1:]) |
| 258 | |
| 259 | # These conditions are used as arguments in the `np.where` function |
| 260 | # below. |
| 261 | deleted_condition = docs_diff["NewValue"].isnull() |
| 262 | added_condition = docs_diff["CurrentValue"].isnull() |
| 263 | |
| 264 | # Create a new `ChangeType` column. The `np.where()` function is like a |
| 265 | # tenary operator. When the `deleted_condition` is `True`, the |
| 266 | # `ChangeType` will be `ChangeType.Deleted`. If the added_condition is |
| 267 | # `True` the `ChangeType` will be `ChangeType.Added`, otherwise the |
| 268 | # `ChangeType` will be `ChangeType.Changed`. |
| 269 | docs_diff["ChangeType"] = np.where( |
| 270 | deleted_condition, |
| 271 | ChangeType.DELETED, |
| 272 | np.where(added_condition, ChangeType.ADDED, ChangeType.CHANGED), |
| 273 | ) |
| 274 | |
| 275 | # Filter out keys which rarely affect functionality. For example: |
| 276 | # {"description", "documentation", "enum", "etag", "revision", "title", |
| 277 | # "url", "rootUrl"} |
| 278 | docs_diff = docs_diff[ |
| 279 | ~docs_diff["Key"].str.contains( |
| 280 | "|".join(self._get_keys_to_ignore()), case=False |
| 281 | ) |
| 282 | ] |
| 283 | |
| 284 | # Group keys with similar parents together and create a new column |
| 285 | # called 'Count' which indicates the number of keys that have been |
| 286 | # grouped together. The reason for the count column is that when keys |
| 287 | # have the same parent, we group them together to improve readability. |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 288 | docs_diff_with_count = ( |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 289 | docs_diff.groupby( |
| 290 | ["Parent", "Added", "Deleted", "Name", "Version", "ChangeType"] |
| 291 | ) |
| 292 | .size() |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 293 | .reset_index(name="Count") |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 294 | ) |
| 295 | |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 296 | # Add counts column |
| 297 | docs_diff = docs_diff.merge(docs_diff_with_count) |
| 298 | |
| 299 | # When the count is greater than 1, update the key with the name of the |
| 300 | # parent since we are consolidating keys with the same parent. |
| 301 | docs_diff.loc[docs_diff["Count"] > 1, "Key"] = docs_diff["Parent"] |
| 302 | |
| 303 | return docs_diff[ |
| 304 | ["Key", "Added", "Deleted", "Name", "Version", "ChangeType", "Count"] |
| 305 | ].drop_duplicates() |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 306 | |
| 307 | def _build_summary_message(self, api_name, is_feature): |
| 308 | """Returns a string containing the summary for a given api. The string |
| 309 | returned will be in the format `fix(<api_name>): update the API` |
| 310 | when `is_feature=False` and `feat(<api_name>)!: update the API` |
| 311 | when `is_feature=True`. |
| 312 | |
| 313 | args: |
| 314 | api_name (str): The name of the api to include in the summary. |
| 315 | is_feature (bool): If True, include the prefix `feat` otherwise use |
| 316 | `fix` |
| 317 | """ |
| 318 | |
| 319 | # Build the conventional commit string based on the arguments provided |
| 320 | commit_type = "feat" if is_feature else "fix" |
| 321 | return "{0}({1}): update the api".format(commit_type, api_name) |
| 322 | |
| 323 | def _get_keys_to_ignore(self): |
| 324 | """Returns a list of strings with keys to ignore because they rarely |
| 325 | affect functionality. |
| 326 | |
| 327 | args: None |
| 328 | """ |
| 329 | keys_to_ignore = [ |
| 330 | "description", |
| 331 | "documentation", |
| 332 | "enum", |
| 333 | "etag", |
| 334 | "revision", |
| 335 | "title", |
| 336 | "url", |
| 337 | "rootUrl", |
| 338 | ] |
| 339 | return keys_to_ignore |
| 340 | |
| 341 | def _get_stable_versions(self, versions): |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 342 | """Returns a pandas series `pd.Series()` of boolean values, |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 343 | corresponding to the given series, indicating whether the version is |
| 344 | considered stable or not. |
| 345 | args: |
| 346 | versions (object): a pandas series containing version |
| 347 | information for all discovery artifacts. |
| 348 | """ |
| 349 | # Use a regex on the version to find versions with the pattern |
| 350 | # <v>.<0-9>.<0-9>.<0-9> . Any api that matches this pattern will be |
| 351 | # labeled as stable. In other words, v1, v1.4 and v1.4.5 is stable |
| 352 | # but v1b1 v1aplha and v1beta1 is not stable. |
| 353 | return versions.str.extract(r"(v\d?\.?\d?\.?\d+$)").notnull() |
| 354 | |
| 355 | def _get_summary_and_write_to_disk(self, dataframe, directory): |
| 356 | """Writes summary information to file about changes made to discovery |
| 357 | artifacts based on the provided dataframe and returns a dataframe |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 358 | with the same. The file `'allapis.dataframe'` is saved to the current |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 359 | working directory. |
| 360 | args: |
| 361 | dataframe (object): a pandas dataframe containing summary change |
| 362 | information for all discovery artifacts |
| 363 | directory (str): path where the summary file should be saved |
| 364 | """ |
| 365 | |
| 366 | dataframe["IsStable"] = self._get_stable_versions(dataframe["Version"]) |
| 367 | |
| 368 | # Create a filter for features, which contains only rows which have keys |
| 369 | # that have been deleted or added, that will be used as an argument in |
| 370 | # the `np.where()` call below. |
| 371 | filter_features = (dataframe["ChangeType"] == ChangeType.DELETED) | ( |
| 372 | dataframe["ChangeType"] == ChangeType.ADDED |
| 373 | ) |
| 374 | |
| 375 | # Create a new column `IsFeature` to indicate which rows should be |
| 376 | # considered as features. |
| 377 | dataframe["IsFeature"] = np.where(filter_features, True, np.nan) |
| 378 | |
| 379 | # Create a new column `IsFeatureAggregate` which will be used to |
| 380 | # summarize the api changes. We can either have feature or fix but not |
| 381 | # both. |
| 382 | dataframe["IsFeatureAggregate"] = dataframe.groupby("Name").IsFeature.transform( |
| 383 | lambda x: x.any() |
| 384 | ) |
| 385 | |
| 386 | # Create a new column `Summary`, which will contain a string with the |
| 387 | # conventional commit message. |
| 388 | dataframe["Summary"] = np.vectorize(self._build_summary_message)( |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 389 | dataframe["Name"], dataframe["IsFeatureAggregate"] |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 390 | ) |
| 391 | |
| 392 | # Write the final dataframe to disk as it will be used in the |
| 393 | # buildprbody.py script |
| 394 | dataframe.to_csv(directory / "allapis.dataframe") |
| 395 | return dataframe |
| 396 | |
| 397 | def _write_verbose_changes_to_disk(self, dataframe, directory, summary_df): |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 398 | """Writes verbose information to file about changes made to discovery |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 399 | artifacts based on the provided dataframe. A separate file is saved |
| 400 | for each api in the current working directory. The extension of the |
| 401 | files will be `'.verbose'`. |
| 402 | |
| 403 | args: |
| 404 | dataframe (object): a pandas dataframe containing verbose change |
| 405 | information for all discovery artifacts |
| 406 | directory (str): path where the summary file should be saved |
| 407 | summary_df (object): A dataframe containing a summary of the changes |
| 408 | """ |
| 409 | # Array of strings which will contains verbose change information for |
| 410 | # each api |
| 411 | verbose_changes = [] |
| 412 | |
| 413 | # Sort the dataframe to minimize file operations below. |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 414 | dataframe.sort_values( |
| 415 | by=["Name", "Version", "ChangeType"], ascending=True, inplace=True |
| 416 | ) |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 417 | |
| 418 | # Select only the relevant columns. We need to create verbose output |
| 419 | # by Api Name, Version and ChangeType so we need to group by these |
| 420 | # columns. |
| 421 | |
| 422 | change_type_groups = dataframe[ |
| 423 | ["Name", "Version", "ChangeType", "Key", "Count"] |
| 424 | ].groupby(["Name", "Version", "ChangeType"]) |
| 425 | |
| 426 | lastApi = "" |
| 427 | lastVersion = "" |
| 428 | lastType = ChangeType.UNKNOWN |
| 429 | |
| 430 | f = None |
| 431 | for name, group in change_type_groups: |
| 432 | currentApi = name[0] |
| 433 | currentVersion = name[1] |
| 434 | currentType = name[2] |
| 435 | |
| 436 | # We need to handing file opening and closing when processing an API |
| 437 | # which is different from the previous one |
| 438 | if lastApi != currentApi: |
| 439 | # If we are processing a new api, close the file used for |
| 440 | # processing the previous API |
| 441 | if f is not None: |
| 442 | f.writelines(verbose_changes) |
| 443 | f.close() |
| 444 | f = None |
| 445 | # Clear the array of strings with information from the previous |
| 446 | # api and reset the last version |
| 447 | verbose_changes = [] |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 448 | lastVersion = "" |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 449 | # Create a file which contains verbose changes for the current |
| 450 | # API being processed |
| 451 | filename = "{0}.verbose".format(currentApi) |
| 452 | f = open(pathlib.Path(directory / filename), "a") |
| 453 | lastApi = currentApi |
| 454 | |
| 455 | # Create a filter with only the rows for the current API |
| 456 | current_api_filter = summary_df["Name"] == currentApi |
| 457 | |
| 458 | # Get the string in the `Summary` column for the current api and |
| 459 | # append it to `verbose_changes`. The `Summary` column contains |
| 460 | # the conventional commit message. Use pandas.Series.iloc[0] to |
| 461 | # retrieve only the first elemnt, since all the values in the |
| 462 | # summary column are the same for a given API. |
| 463 | verbose_changes.append(summary_df[current_api_filter].Summary.iloc[0]) |
| 464 | |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 465 | # If the version has changed, we need to create append a new heading |
| 466 | # in the verbose summary which contains the api and version. |
| 467 | if lastVersion != currentVersion: |
| 468 | # Append a header string with the API and version |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 469 | verbose_changes.append( |
| 470 | "\n\n#### {0}:{1}\n\n".format(currentApi, currentVersion) |
| 471 | ) |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 472 | |
| 473 | lastVersion = currentVersion |
| 474 | lastType = ChangeType.UNKNOWN |
| 475 | |
| 476 | # Whenever the change type is different, we need to create a new |
| 477 | # heading for the group of keys with the same change type. |
| 478 | if currentType != lastType: |
| 479 | if currentType == ChangeType.DELETED: |
| 480 | verbose_changes.append("\nThe following keys were deleted:\n") |
| 481 | elif currentType == ChangeType.ADDED: |
| 482 | verbose_changes.append("\nThe following keys were added:\n") |
| 483 | else: |
| 484 | verbose_changes.append("\nThe following keys were changed:\n") |
| 485 | |
| 486 | lastType = currentType |
| 487 | |
| 488 | # Append the keys, and corresponding count, in the same change |
| 489 | # type group. |
| 490 | verbose_changes.extend( |
| 491 | [ |
Anthonios Partheniou | 6a88422 | 2021-05-10 22:14:57 -0400 | [diff] [blame] | 492 | "- {0} (Total Keys: {1})\n".format(row["Key"], row["Count"]) |
Anthonios Partheniou | aff037a | 2021-04-21 11:00:09 -0400 | [diff] [blame] | 493 | for index, row in group[["Key", "Count"]].iterrows() |
| 494 | ] |
| 495 | ) |
| 496 | |
| 497 | # Make sure to close the last file and write the changes. |
| 498 | if f is not None: |
| 499 | f.writelines(verbose_changes) |
| 500 | f.close() |
| 501 | f = None |
| 502 | |
| 503 | def detect_discovery_changes(self): |
| 504 | """Writes a summary of the changes to the discovery artifacts to disk |
| 505 | at the path specified in `temp_dir`. |
| 506 | |
| 507 | args: None |
| 508 | """ |
| 509 | result = pd.DataFrame() |
| 510 | # Process files in parallel to improve performance |
| 511 | with Pool(processes=MULTIPROCESSING_NUM_AGENTS) as pool: |
| 512 | result = result.append( |
| 513 | pool.map( |
| 514 | self._get_discovery_differences, |
| 515 | self._file_list, |
| 516 | MULTIPROCESSING_NUM_PER_BATCH, |
| 517 | ) |
| 518 | ) |
| 519 | |
| 520 | if len(result): |
| 521 | # Sort the resulting dataframe by `Name`, `Version`, `ChangeType` |
| 522 | # and `Key` |
| 523 | sort_columns = ["Name", "Version", "ChangeType", "Key"] |
| 524 | result.sort_values(by=sort_columns, ascending=True, inplace=True) |
| 525 | |
| 526 | # Create a folder which be used by the `createcommits.sh` and |
| 527 | # `buildprbody.py` scripts. |
| 528 | pathlib.Path(self._temp_dir).mkdir(exist_ok=True) |
| 529 | |
| 530 | # Create a summary which contains a conventional commit message |
| 531 | # for each API and write it to disk. |
| 532 | summary_df = self._get_summary_and_write_to_disk(result, self._temp_dir) |
| 533 | |
| 534 | # Create verbose change information for each API which contains |
| 535 | # a list of changes by key and write it to disk. |
| 536 | self._write_verbose_changes_to_disk(result, self._temp_dir, summary_df) |