Adam Nemet | 6ab2d48 | 2017-03-02 17:00:59 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python2.7 |
| 2 | |
| 3 | from __future__ import print_function |
| 4 | |
| 5 | desc = '''Generate the difference of two YAML files into a new YAML file (works on |
| 6 | pair of directories too). A new attribute 'Added' is set to True or False |
| 7 | depending whether the entry is added or removed from the first input to the |
| 8 | next. |
| 9 | |
| 10 | The tools requires PyYAML.''' |
| 11 | |
| 12 | import yaml |
| 13 | # Try to use the C parser. |
| 14 | try: |
| 15 | from yaml import CLoader as Loader |
| 16 | except ImportError: |
| 17 | from yaml import Loader |
| 18 | |
| 19 | import optrecord |
| 20 | import argparse |
| 21 | from collections import defaultdict |
| 22 | from multiprocessing import cpu_count, Pool |
| 23 | import os, os.path |
Adam Nemet | b1d026f | 2017-03-06 19:15:22 +0000 | [diff] [blame] | 24 | import fnmatch |
Adam Nemet | 6ab2d48 | 2017-03-02 17:00:59 +0000 | [diff] [blame] | 25 | |
| 26 | def find_files(dir_or_file): |
| 27 | if os.path.isfile(dir_or_file): |
| 28 | return [dir_or_file] |
| 29 | |
| 30 | all = [] |
| 31 | for dir, subdirs, files in os.walk(dir_or_file): |
| 32 | for file in files: |
Adam Nemet | b1d026f | 2017-03-06 19:15:22 +0000 | [diff] [blame] | 33 | if fnmatch.fnmatch(file, "*.opt.yaml"): |
| 34 | all.append( os.path.join(dir, file)) |
Adam Nemet | 6ab2d48 | 2017-03-02 17:00:59 +0000 | [diff] [blame] | 35 | return all |
| 36 | |
| 37 | if __name__ == '__main__': |
| 38 | parser = argparse.ArgumentParser(description=desc) |
| 39 | parser.add_argument('yaml_dir_or_file_1') |
| 40 | parser.add_argument('yaml_dir_or_file_2') |
| 41 | parser.add_argument( |
| 42 | '--jobs', |
| 43 | '-j', |
| 44 | default=cpu_count(), |
| 45 | type=int, |
Brian Gesiak | 701386d | 2017-06-10 21:33:27 +0000 | [diff] [blame] | 46 | help='Max job count (defaults to %(default)s, the current CPU count)') |
Adam Nemet | 6ab2d48 | 2017-03-02 17:00:59 +0000 | [diff] [blame] | 47 | parser.add_argument('--output', '-o', default='diff.opt.yaml') |
| 48 | args = parser.parse_args() |
| 49 | |
| 50 | if args.jobs == 1: |
| 51 | pmap = map |
| 52 | else: |
| 53 | pool = Pool(processes=args.jobs) |
| 54 | pmap = pool.map |
| 55 | |
| 56 | files1 = find_files(args.yaml_dir_or_file_1) |
| 57 | files2 = find_files(args.yaml_dir_or_file_2) |
| 58 | |
| 59 | all_remarks1, _, _ = optrecord.gather_results(pmap, files1) |
| 60 | all_remarks2, _, _ = optrecord.gather_results(pmap, files2) |
| 61 | |
| 62 | added = set(all_remarks2.values()) - set(all_remarks1.values()) |
| 63 | removed = set(all_remarks1.values()) - set(all_remarks2.values()) |
| 64 | |
| 65 | for r in added: |
| 66 | r.Added = True |
| 67 | for r in removed: |
| 68 | r.Added = False |
| 69 | stream = file(args.output, 'w') |
| 70 | yaml.dump_all(added | removed, stream) |