blob: c51e05e2152dd557df97689f76a24f553413936a [file] [log] [blame]
Adam Nemet6ab2d482017-03-02 17:00:59 +00001#!/usr/bin/env python2.7
2
3from __future__ import print_function
4
5desc = '''Generate the difference of two YAML files into a new YAML file (works on
6pair of directories too). A new attribute 'Added' is set to True or False
7depending whether the entry is added or removed from the first input to the
8next.
9
10The tools requires PyYAML.'''
11
12import yaml
13# Try to use the C parser.
14try:
15 from yaml import CLoader as Loader
16except ImportError:
17 from yaml import Loader
18
19import optrecord
20import argparse
21from collections import defaultdict
22from multiprocessing import cpu_count, Pool
23import os, os.path
24
25def find_files(dir_or_file):
26 if os.path.isfile(dir_or_file):
27 return [dir_or_file]
28
29 all = []
30 for dir, subdirs, files in os.walk(dir_or_file):
31 for file in files:
32 all.append( os.path.join(dir, file))
33 return all
34
35if __name__ == '__main__':
36 parser = argparse.ArgumentParser(description=desc)
37 parser.add_argument('yaml_dir_or_file_1')
38 parser.add_argument('yaml_dir_or_file_2')
39 parser.add_argument(
40 '--jobs',
41 '-j',
42 default=cpu_count(),
43 type=int,
44 help='Max job count (defaults to current CPU count)')
45 parser.add_argument('--output', '-o', default='diff.opt.yaml')
46 args = parser.parse_args()
47
48 if args.jobs == 1:
49 pmap = map
50 else:
51 pool = Pool(processes=args.jobs)
52 pmap = pool.map
53
54 files1 = find_files(args.yaml_dir_or_file_1)
55 files2 = find_files(args.yaml_dir_or_file_2)
56
57 all_remarks1, _, _ = optrecord.gather_results(pmap, files1)
58 all_remarks2, _, _ = optrecord.gather_results(pmap, files2)
59
60 added = set(all_remarks2.values()) - set(all_remarks1.values())
61 removed = set(all_remarks1.values()) - set(all_remarks2.values())
62
63 for r in added:
64 r.Added = True
65 for r in removed:
66 r.Added = False
67 stream = file(args.output, 'w')
68 yaml.dump_all(added | removed, stream)