Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2.4 |
| 2 | # -*- coding: utf-8 -*- |
| 3 | # |
Craig Citro | 751b7fb | 2014-09-23 11:20:38 -0700 | [diff] [blame] | 4 | # Copyright 2014 Google Inc. All Rights Reserved. |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | # you may not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
| 17 | |
| 18 | """Simple command-line sample for the Google Prediction API |
| 19 | |
Joe Gregorio | 65826f9 | 2011-06-03 11:20:29 -0400 | [diff] [blame] | 20 | Command-line application that trains on your input data. This sample does |
| 21 | the same thing as the Hello Prediction! example. You might want to run |
| 22 | the setup.sh script to load the sample data to Google Storage. |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 23 | |
| 24 | Usage: |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 25 | $ python prediction.py "bucket/object" "model_id" "project_id" |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 26 | |
| 27 | You can also get help on all the command-line flags the program understands |
| 28 | by running: |
| 29 | |
| 30 | $ python prediction.py --help |
| 31 | |
| 32 | To get detailed log output run: |
| 33 | |
| 34 | $ python prediction.py --logging_level=DEBUG |
| 35 | """ |
| 36 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 37 | __author__ = ('jcgregorio@google.com (Joe Gregorio), ' |
| 38 | 'marccohen@google.com (Marc Cohen)') |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 39 | |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 40 | import argparse |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 41 | import os |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 42 | import pprint |
| 43 | import sys |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 44 | import time |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 45 | |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 46 | from apiclient import discovery |
| 47 | from apiclient import sample_tools |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 48 | from oauth2client import client |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 49 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 50 | |
| 51 | # Time to wait (in seconds) between successive checks of training status. |
| 52 | SLEEP_TIME = 10 |
| 53 | |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 54 | |
| 55 | # Declare command-line flags. |
| 56 | argparser = argparse.ArgumentParser(add_help=False) |
| 57 | argparser.add_argument('object_name', |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 58 | help='Full Google Storage path of csv data (ex bucket/object)') |
| 59 | argparser.add_argument('model_id', |
| 60 | help='Model Id of your choosing to name trained model') |
| 61 | argparser.add_argument('project_id', |
| 62 | help='Model Id of your choosing to name trained model') |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 63 | |
| 64 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 65 | def print_header(line): |
| 66 | '''Format and print header block sized to length of line''' |
| 67 | header_str = '=' |
| 68 | header_line = header_str * len(line) |
| 69 | print '\n' + header_line |
| 70 | print line |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 71 | print header_line |
| 72 | |
| 73 | |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 74 | def main(argv): |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 75 | # If you previously ran this app with an earlier version of the API |
| 76 | # or if you change the list of scopes below, revoke your app's permission |
| 77 | # here: https://accounts.google.com/IssuedAuthSubTokens |
| 78 | # Then re-run the app to re-authorize it. |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 79 | service, flags = sample_tools.init( |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 80 | argv, 'prediction', 'v1.6', __doc__, __file__, parents=[argparser], |
| 81 | scope=( |
| 82 | 'https://www.googleapis.com/auth/prediction', |
| 83 | 'https://www.googleapis.com/auth/devstorage.read_only')) |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 84 | |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 85 | try: |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 86 | # Get access to the Prediction API. |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 87 | papi = service.trainedmodels() |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 88 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 89 | # List models. |
| 90 | print_header('Fetching list of first ten models') |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 91 | result = papi.list(maxResults=10, project=flags.project_id).execute() |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 92 | print 'List results:' |
| 93 | pprint.pprint(result) |
| 94 | |
| 95 | # Start training request on a data set. |
| 96 | print_header('Submitting model training request') |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 97 | body = {'id': flags.model_id, 'storageDataLocation': flags.object_name} |
| 98 | start = papi.insert(body=body, project=flags.project_id).execute() |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 99 | print 'Training results:' |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 100 | pprint.pprint(start) |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 101 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 102 | # Wait for the training to complete. |
| 103 | print_header('Waiting for training to complete') |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 104 | while True: |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 105 | status = papi.get(id=flags.model_id, project=flags.project_id).execute() |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 106 | state = status['trainingStatus'] |
| 107 | print 'Training state: ' + state |
| 108 | if state == 'DONE': |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 109 | break |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 110 | elif state == 'RUNNING': |
| 111 | time.sleep(SLEEP_TIME) |
| 112 | continue |
| 113 | else: |
| 114 | raise Exception('Training Error: ' + state) |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 115 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 116 | # Job has completed. |
| 117 | print 'Training completed:' |
| 118 | pprint.pprint(status) |
| 119 | break |
Robert Kaplow | 49cd5f8 | 2011-08-02 13:50:04 -0400 | [diff] [blame] | 120 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 121 | # Describe model. |
| 122 | print_header('Fetching model description') |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 123 | result = papi.analyze(id=flags.model_id, project=flags.project_id).execute() |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 124 | print 'Analyze results:' |
| 125 | pprint.pprint(result) |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 126 | |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 127 | # Make some predictions using the newly trained model. |
| 128 | print_header('Making some predictions') |
| 129 | for sample_text in ['mucho bueno', 'bonjour, mon cher ami']: |
| 130 | body = {'input': {'csvInstance': [sample_text]}} |
| 131 | result = papi.predict( |
| 132 | body=body, id=flags.model_id, project=flags.project_id).execute() |
| 133 | print 'Prediction results for "%s"...' % sample_text |
| 134 | pprint.pprint(result) |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 135 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 136 | # Delete model. |
| 137 | print_header('Deleting model') |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 138 | result = papi.delete(id=flags.model_id, project=flags.project_id).execute() |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame] | 139 | print 'Model deleted.' |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 140 | |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 141 | except client.AccessTokenRefreshError: |
Antoine Picard | 7ba3c3f | 2014-05-09 14:55:58 -0700 | [diff] [blame] | 142 | print ('The credentials have been revoked or expired, please re-run ' |
| 143 | 'the application to re-authorize.') |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 144 | |
Joe Gregorio | e839115 | 2013-06-28 01:30:57 -0400 | [diff] [blame] | 145 | |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 146 | if __name__ == '__main__': |
| 147 | main(sys.argv) |