Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2.4 |
| 2 | # -*- coding: utf-8 -*- |
| 3 | # |
| 4 | # Copyright (C) 2010 Google Inc. |
| 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | # you may not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
| 17 | |
| 18 | """Simple command-line sample for the Google Prediction API |
| 19 | |
Joe Gregorio | 65826f9 | 2011-06-03 11:20:29 -0400 | [diff] [blame] | 20 | Command-line application that trains on your input data. This sample does |
| 21 | the same thing as the Hello Prediction! example. You might want to run |
| 22 | the setup.sh script to load the sample data to Google Storage. |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 23 | |
| 24 | Usage: |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 25 | $ python prediction.py --object_name="bucket/object" --id="model_id" |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 26 | |
| 27 | You can also get help on all the command-line flags the program understands |
| 28 | by running: |
| 29 | |
| 30 | $ python prediction.py --help |
| 31 | |
| 32 | To get detailed log output run: |
| 33 | |
| 34 | $ python prediction.py --logging_level=DEBUG |
| 35 | """ |
| 36 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 37 | __author__ = ('jcgregorio@google.com (Joe Gregorio), ' |
| 38 | 'marccohen@google.com (Marc Cohen)') |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 39 | |
Robert Kaplow | 49cd5f8 | 2011-08-02 13:50:04 -0400 | [diff] [blame] | 40 | import apiclient.errors |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 41 | import gflags |
| 42 | import httplib2 |
| 43 | import logging |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 44 | import os |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 45 | import pprint |
| 46 | import sys |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 47 | import time |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 48 | |
| 49 | from apiclient.discovery import build |
| 50 | from oauth2client.file import Storage |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 51 | from oauth2client.client import AccessTokenRefreshError |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 52 | from oauth2client.client import flow_from_clientsecrets |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 53 | from oauth2client.tools import run |
| 54 | |
| 55 | FLAGS = gflags.FLAGS |
| 56 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 57 | # CLIENT_SECRETS, name of a file containing the OAuth 2.0 information for this |
| 58 | # application, including client_id and client_secret, which are found |
| 59 | # on the API Access tab on the Google APIs |
| 60 | # Console <http://code.google.com/apis/console> |
| 61 | CLIENT_SECRETS = 'client_secrets.json' |
| 62 | |
| 63 | # Helpful message to display in the browser if the CLIENT_SECRETS file |
| 64 | # is missing. |
| 65 | MISSING_CLIENT_SECRETS_MESSAGE = """ |
| 66 | WARNING: Please configure OAuth 2.0 |
| 67 | |
| 68 | To make this sample run you will need to populate the client_secrets.json file |
| 69 | found at: |
| 70 | |
| 71 | %s |
| 72 | |
| 73 | with information from the APIs Console <https://code.google.com/apis/console>. |
| 74 | |
| 75 | """ % os.path.join(os.path.dirname(__file__), CLIENT_SECRETS) |
| 76 | |
| 77 | # Set up a Flow object to be used if we need to authenticate. |
| 78 | FLOW = flow_from_clientsecrets(CLIENT_SECRETS, |
| 79 | scope='https://www.googleapis.com/auth/prediction', |
| 80 | message=MISSING_CLIENT_SECRETS_MESSAGE) |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 81 | |
| 82 | # The gflags module makes defining command-line options easy for |
| 83 | # applications. Run this program with the '--help' argument to see |
| 84 | # all the flags that it understands. |
| 85 | gflags.DEFINE_enum('logging_level', 'ERROR', |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 86 | ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], |
| 87 | 'Set the level of logging detail.') |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 88 | |
Joe Gregorio | 65826f9 | 2011-06-03 11:20:29 -0400 | [diff] [blame] | 89 | gflags.DEFINE_string('object_name', |
| 90 | None, |
| 91 | 'Full Google Storage path of csv data (ex bucket/object)') |
Joe Gregorio | 65826f9 | 2011-06-03 11:20:29 -0400 | [diff] [blame] | 92 | gflags.MarkFlagAsRequired('object_name') |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 93 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 94 | gflags.DEFINE_string('id', |
| 95 | None, |
| 96 | 'Model Id of your choosing to name trained model') |
| 97 | gflags.MarkFlagAsRequired('id') |
| 98 | |
| 99 | # Time to wait (in seconds) between successive checks of training status. |
| 100 | SLEEP_TIME = 10 |
| 101 | |
| 102 | def print_header(line): |
| 103 | '''Format and print header block sized to length of line''' |
| 104 | header_str = '=' |
| 105 | header_line = header_str * len(line) |
| 106 | print '\n' + header_line |
| 107 | print line |
| 108 | print header_line |
| 109 | |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 110 | def main(argv): |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 111 | # Let the gflags module process the command-line arguments. |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 112 | try: |
| 113 | argv = FLAGS(argv) |
| 114 | except gflags.FlagsError, e: |
| 115 | print '%s\\nUsage: %s ARGS\\n%s' % (e, argv[0], FLAGS) |
| 116 | sys.exit(1) |
| 117 | |
| 118 | # Set the logging according to the command-line flag |
| 119 | logging.getLogger().setLevel(getattr(logging, FLAGS.logging_level)) |
| 120 | |
| 121 | # If the Credentials don't exist or are invalid run through the native client |
| 122 | # flow. The Storage object will ensure that if successful the good |
| 123 | # Credentials will get written back to a file. |
| 124 | storage = Storage('prediction.dat') |
| 125 | credentials = storage.get() |
| 126 | if credentials is None or credentials.invalid: |
| 127 | credentials = run(FLOW, storage) |
| 128 | |
| 129 | # Create an httplib2.Http object to handle our HTTP requests and authorize it |
| 130 | # with our good Credentials. |
| 131 | http = httplib2.Http() |
| 132 | http = credentials.authorize(http) |
| 133 | |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 134 | try: |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 135 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 136 | # Get access to the Prediction API. |
| 137 | service = build("prediction", "v1.5", http=http) |
| 138 | papi = service.trainedmodels() |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 139 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 140 | # List models. |
| 141 | print_header('Fetching list of first ten models') |
| 142 | result = papi.list(maxResults=10).execute() |
| 143 | print 'List results:' |
| 144 | pprint.pprint(result) |
| 145 | |
| 146 | # Start training request on a data set. |
| 147 | print_header('Submitting model training request') |
| 148 | body = {'id': FLAGS.id, 'storageDataLocation': FLAGS.object_name} |
| 149 | start = papi.insert(body=body).execute() |
| 150 | print 'Training results:' |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 151 | pprint.pprint(start) |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 152 | |
| 153 | # Wait for the training to complete. |
| 154 | print_header('Waiting for training to complete') |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 155 | while True: |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 156 | status = papi.get(id=FLAGS.id).execute() |
| 157 | state = status['trainingStatus'] |
| 158 | print 'Training state: ' + state |
| 159 | if state == 'DONE': |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 160 | break |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 161 | elif state == 'RUNNING': |
| 162 | time.sleep(SLEEP_TIME) |
| 163 | continue |
| 164 | else: |
| 165 | raise Exception('Training Error: ' + state) |
| 166 | |
| 167 | # Job has completed. |
| 168 | print 'Training completed:' |
| 169 | pprint.pprint(status) |
| 170 | break |
Robert Kaplow | 49cd5f8 | 2011-08-02 13:50:04 -0400 | [diff] [blame] | 171 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 172 | # Describe model. |
| 173 | print_header('Fetching model description') |
| 174 | result = papi.analyze(id=FLAGS.id).execute() |
| 175 | print 'Analyze results:' |
| 176 | pprint.pprint(result) |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 177 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 178 | # Make a prediction using the newly trained model. |
| 179 | print_header('Making a prediction') |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 180 | body = {'input': {'csvInstance': ["mucho bueno"]}} |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 181 | result = papi.predict(body=body, id=FLAGS.id).execute() |
| 182 | print 'Prediction results...' |
| 183 | pprint.pprint(result) |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 184 | |
Joe Gregorio | 968a958 | 2012-03-07 14:52:52 -0500 | [diff] [blame^] | 185 | # Delete model. |
| 186 | print_header('Deleting model') |
| 187 | result = papi.delete(id=FLAGS.id).execute() |
| 188 | print 'Model deleted.' |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 189 | |
Joe Gregorio | 7d79121 | 2011-05-16 21:58:52 -0700 | [diff] [blame] | 190 | except AccessTokenRefreshError: |
| 191 | print ("The credentials have been revoked or expired, please re-run" |
| 192 | "the application to re-authorize") |
Joe Gregorio | 652898b | 2011-05-02 21:07:43 -0400 | [diff] [blame] | 193 | |
| 194 | if __name__ == '__main__': |
| 195 | main(sys.argv) |