| # Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved. |
| # |
| # Permission is hereby granted, free of charge, to any person obtaining a copy |
| # of this software and associated documentation files (the "Software"), to deal |
| # in the Software without restriction, including without limitation the rights |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| # copies of the Software, and to permit persons to whom the Software is |
| # furnished to do so, subject to the following conditions: |
| # |
| # The above copyright notice and this permission notice shall be included in |
| # all copies or substantial portions of the Software. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| # THE SOFTWARE. |
| # |
| # Comments and/or additions are welcome (send e-mail to: |
| # strang@nmr.mgh.harvard.edu). |
| # |
| """ |
| pstat.py module |
| |
| ################################################# |
| ####### Written by: Gary Strangman ########### |
| ####### Last modified: Dec 18, 2007 ########### |
| ################################################# |
| |
| This module provides some useful list and array manipulation routines |
| modeled after those found in the |Stat package by Gary Perlman, plus a |
| number of other useful list/file manipulation functions. The list-based |
| functions include: |
| |
| abut (source,*args) |
| simpleabut (source, addon) |
| colex (listoflists,cnums) |
| collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) |
| dm (listoflists,criterion) |
| flat (l) |
| linexand (listoflists,columnlist,valuelist) |
| linexor (listoflists,columnlist,valuelist) |
| linedelimited (inlist,delimiter) |
| lineincols (inlist,colsize) |
| lineincustcols (inlist,colsizes) |
| list2string (inlist) |
| makelol(inlist) |
| makestr(x) |
| printcc (lst,extra=2) |
| printincols (listoflists,colsize) |
| pl (listoflists) |
| printl(listoflists) |
| replace (lst,oldval,newval) |
| recode (inlist,listmap,cols='all') |
| remap (listoflists,criterion) |
| roundlist (inlist,num_digits_to_round_floats_to) |
| sortby(listoflists,sortcols) |
| unique (inlist) |
| duplicates(inlist) |
| writedelimited (listoflists, delimiter, file, writetype='w') |
| |
| Some of these functions have alternate versions which are defined only if |
| Numeric (NumPy) can be imported. These functions are generally named as |
| above, with an 'a' prefix. |
| |
| aabut (source, *args) |
| acolex (a,indices,axis=1) |
| acollapse (a,keepcols,collapsecols,sterr=0,ns=0) |
| adm (a,criterion) |
| alinexand (a,columnlist,valuelist) |
| alinexor (a,columnlist,valuelist) |
| areplace (a,oldval,newval) |
| arecode (a,listmap,col='all') |
| arowcompare (row1, row2) |
| arowsame (row1, row2) |
| asortrows(a,axis=0) |
| aunique(inarray) |
| aduplicates(inarray) |
| |
| Currently, the code is all but completely un-optimized. In many cases, the |
| array versions of functions amount simply to aliases to built-in array |
| functions/methods. Their inclusion here is for function name consistency. |
| """ |
| |
| ## CHANGE LOG: |
| ## ========== |
| ## 07-11-26 ... edited to work with numpy |
| ## 01-11-15 ... changed list2string() to accept a delimiter |
| ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 |
| ## 01-05-31 ... added duplicates() and aduplicates() functions |
| ## 00-12-28 ... license made GPL, docstring and import requirements |
| ## 99-11-01 ... changed version to 0.3 |
| ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) |
| ## 03/27/99 ... added areplace function, made replace fcn recursive |
| ## 12/31/98 ... added writefc function for ouput to fixed column sizes |
| ## 12/07/98 ... fixed import problem (failed on collapse() fcn) |
| ## added __version__ variable (now 0.2) |
| ## 12/05/98 ... updated doc-strings |
| ## added features to collapse() function |
| ## added flat() function for lists |
| ## fixed a broken asortrows() |
| ## 11/16/98 ... fixed minor bug in aput for 1D arrays |
| ## |
| ## 11/08/98 ... fixed aput to output large arrays correctly |
| |
| import stats # required 3rd party module |
| import string, copy |
| from types import * |
| |
| __version__ = 0.4 |
| |
| ###=========================== LIST FUNCTIONS ========================== |
| ### |
| ### Here are the list functions, DEFINED FOR ALL SYSTEMS. |
| ### Array functions (for NumPy-enabled computers) appear below. |
| ### |
| |
| def abut (source,*args): |
| """ |
| Like the |Stat abut command. It concatenates two lists side-by-side |
| and returns the result. '2D' lists are also accomodated for either argument |
| (source or addon). CAUTION: If one list is shorter, it will be repeated |
| until it is as long as the longest list. If this behavior is not desired, |
| use pstat.simpleabut(). |
| |
| Usage: abut(source, args) where args=any # of lists |
| Returns: a list of lists as long as the LONGEST list past, source on the |
| 'left', lists in <args> attached consecutively on the 'right' |
| """ |
| |
| if type(source) not in [ListType,TupleType]: |
| source = [source] |
| for addon in args: |
| if type(addon) not in [ListType,TupleType]: |
| addon = [addon] |
| if len(addon) < len(source): # is source list longer? |
| if len(source) % len(addon) == 0: # are they integer multiples? |
| repeats = len(source)/len(addon) # repeat addon n times |
| origadd = copy.deepcopy(addon) |
| for i in range(repeats-1): |
| addon = addon + origadd |
| else: |
| repeats = len(source)/len(addon)+1 # repeat addon x times, |
| origadd = copy.deepcopy(addon) # x is NOT an integer |
| for i in range(repeats-1): |
| addon = addon + origadd |
| addon = addon[0:len(source)] |
| elif len(source) < len(addon): # is addon list longer? |
| if len(addon) % len(source) == 0: # are they integer multiples? |
| repeats = len(addon)/len(source) # repeat source n times |
| origsour = copy.deepcopy(source) |
| for i in range(repeats-1): |
| source = source + origsour |
| else: |
| repeats = len(addon)/len(source)+1 # repeat source x times, |
| origsour = copy.deepcopy(source) # x is NOT an integer |
| for i in range(repeats-1): |
| source = source + origsour |
| source = source[0:len(addon)] |
| |
| source = simpleabut(source,addon) |
| return source |
| |
| |
| def simpleabut (source, addon): |
| """ |
| Concatenates two lists as columns and returns the result. '2D' lists |
| are also accomodated for either argument (source or addon). This DOES NOT |
| repeat either list to make the 2 lists of equal length. Beware of list pairs |
| with different lengths ... the resulting list will be the length of the |
| FIRST list passed. |
| |
| Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) |
| Returns: a list of lists as long as source, with source on the 'left' and |
| addon on the 'right' |
| """ |
| if type(source) not in [ListType,TupleType]: |
| source = [source] |
| if type(addon) not in [ListType,TupleType]: |
| addon = [addon] |
| minlen = min(len(source),len(addon)) |
| list = copy.deepcopy(source) # start abut process |
| if type(source[0]) not in [ListType,TupleType]: |
| if type(addon[0]) not in [ListType,TupleType]: |
| for i in range(minlen): |
| list[i] = [source[i]] + [addon[i]] # source/addon = column |
| else: |
| for i in range(minlen): |
| list[i] = [source[i]] + addon[i] # addon=list-of-lists |
| else: |
| if type(addon[0]) not in [ListType,TupleType]: |
| for i in range(minlen): |
| list[i] = source[i] + [addon[i]] # source=list-of-lists |
| else: |
| for i in range(minlen): |
| list[i] = source[i] + addon[i] # source/addon = list-of-lists |
| source = list |
| return source |
| |
| |
| def colex (listoflists,cnums): |
| """ |
| Extracts from listoflists the columns specified in the list 'cnums' |
| (cnums can be an integer, a sequence of integers, or a string-expression that |
| corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex |
| columns 3 onward from the listoflists). |
| |
| Usage: colex (listoflists,cnums) |
| Returns: a list-of-lists corresponding to the columns from listoflists |
| specified by cnums, in the order the column numbers appear in cnums |
| """ |
| global index |
| column = 0 |
| if type(cnums) in [ListType,TupleType]: # if multiple columns to get |
| index = cnums[0] |
| column = map(lambda x: x[index], listoflists) |
| for col in cnums[1:]: |
| index = col |
| column = abut(column,map(lambda x: x[index], listoflists)) |
| elif type(cnums) == StringType: # if an 'x[3:]' type expr. |
| evalstring = 'map(lambda x: x'+cnums+', listoflists)' |
| column = eval(evalstring) |
| else: # else it's just 1 col to get |
| index = cnums |
| column = map(lambda x: x[index], listoflists) |
| return column |
| |
| |
| def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): |
| """ |
| Averages data in collapsecol, keeping all unique items in keepcols |
| (using unique, which keeps unique LISTS of column numbers), retaining the |
| unique sets of values in keepcols, the mean for each. Setting fcn1 |
| and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) |
| will append those results (e.g., the sterr, N) after each calculated mean. |
| cfcn is the collapse function to apply (defaults to mean, defined here in the |
| pstat module to avoid circular imports with stats.py, but harmonicmean or |
| others could be passed). |
| |
| Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) |
| Returns: a list of lists with all unique permutations of entries appearing in |
| columns ("conditions") specified by keepcols, abutted with the result of |
| cfcn (if cfcn=None, defaults to the mean) of each column specified by |
| collapsecols. |
| """ |
| def collmean (inlist): |
| s = 0 |
| for item in inlist: |
| s = s + item |
| return s/float(len(inlist)) |
| |
| if type(keepcols) not in [ListType,TupleType]: |
| keepcols = [keepcols] |
| if type(collapsecols) not in [ListType,TupleType]: |
| collapsecols = [collapsecols] |
| if cfcn == None: |
| cfcn = collmean |
| if keepcols == []: |
| means = [0]*len(collapsecols) |
| for i in range(len(collapsecols)): |
| avgcol = colex(listoflists,collapsecols[i]) |
| means[i] = cfcn(avgcol) |
| if fcn1: |
| try: |
| test = fcn1(avgcol) |
| except: |
| test = 'N/A' |
| means[i] = [means[i], test] |
| if fcn2: |
| try: |
| test = fcn2(avgcol) |
| except: |
| test = 'N/A' |
| try: |
| means[i] = means[i] + [len(avgcol)] |
| except TypeError: |
| means[i] = [means[i],len(avgcol)] |
| return means |
| else: |
| values = colex(listoflists,keepcols) |
| uniques = unique(values) |
| uniques.sort() |
| newlist = [] |
| if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] |
| for item in uniques: |
| if type(item) not in [ListType,TupleType]: item =[item] |
| tmprows = linexand(listoflists,keepcols,item) |
| for col in collapsecols: |
| avgcol = colex(tmprows,col) |
| item.append(cfcn(avgcol)) |
| if fcn1 <> None: |
| try: |
| test = fcn1(avgcol) |
| except: |
| test = 'N/A' |
| item.append(test) |
| if fcn2 <> None: |
| try: |
| test = fcn2(avgcol) |
| except: |
| test = 'N/A' |
| item.append(test) |
| newlist.append(item) |
| return newlist |
| |
| |
| def dm (listoflists,criterion): |
| """ |
| Returns rows from the passed list of lists that meet the criteria in |
| the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' |
| will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows |
| with column 2 equal to the string 'N'). |
| |
| Usage: dm (listoflists, criterion) |
| Returns: rows from listoflists that meet the specified criterion. |
| """ |
| function = 'filter(lambda x: '+criterion+',listoflists)' |
| lines = eval(function) |
| return lines |
| |
| |
| def flat(l): |
| """ |
| Returns the flattened version of a '2D' list. List-correlate to the a.ravel()() |
| method of NumPy arrays. |
| |
| Usage: flat(l) |
| """ |
| newl = [] |
| for i in range(len(l)): |
| for j in range(len(l[i])): |
| newl.append(l[i][j]) |
| return newl |
| |
| |
| def linexand (listoflists,columnlist,valuelist): |
| """ |
| Returns the rows of a list of lists where col (from columnlist) = val |
| (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). |
| len(columnlist) must equal len(valuelist). |
| |
| Usage: linexand (listoflists,columnlist,valuelist) |
| Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i |
| """ |
| if type(columnlist) not in [ListType,TupleType]: |
| columnlist = [columnlist] |
| if type(valuelist) not in [ListType,TupleType]: |
| valuelist = [valuelist] |
| criterion = '' |
| for i in range(len(columnlist)): |
| if type(valuelist[i])==StringType: |
| critval = '\'' + valuelist[i] + '\'' |
| else: |
| critval = str(valuelist[i]) |
| criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' |
| criterion = criterion[0:-3] # remove the "and" after the last crit |
| function = 'filter(lambda x: '+criterion+',listoflists)' |
| lines = eval(function) |
| return lines |
| |
| |
| def linexor (listoflists,columnlist,valuelist): |
| """ |
| Returns the rows of a list of lists where col (from columnlist) = val |
| (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). |
| One value is required for each column in columnlist. If only one value |
| exists for columnlist but multiple values appear in valuelist, the |
| valuelist values are all assumed to pertain to the same column. |
| |
| Usage: linexor (listoflists,columnlist,valuelist) |
| Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i |
| """ |
| if type(columnlist) not in [ListType,TupleType]: |
| columnlist = [columnlist] |
| if type(valuelist) not in [ListType,TupleType]: |
| valuelist = [valuelist] |
| criterion = '' |
| if len(columnlist) == 1 and len(valuelist) > 1: |
| columnlist = columnlist*len(valuelist) |
| for i in range(len(columnlist)): # build an exec string |
| if type(valuelist[i])==StringType: |
| critval = '\'' + valuelist[i] + '\'' |
| else: |
| critval = str(valuelist[i]) |
| criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' |
| criterion = criterion[0:-2] # remove the "or" after the last crit |
| function = 'filter(lambda x: '+criterion+',listoflists)' |
| lines = eval(function) |
| return lines |
| |
| |
| def linedelimited (inlist,delimiter): |
| """ |
| Returns a string composed of elements in inlist, with each element |
| separated by 'delimiter.' Used by function writedelimited. Use '\t' |
| for tab-delimiting. |
| |
| Usage: linedelimited (inlist,delimiter) |
| """ |
| outstr = '' |
| for item in inlist: |
| if type(item) <> StringType: |
| item = str(item) |
| outstr = outstr + item + delimiter |
| outstr = outstr[0:-1] |
| return outstr |
| |
| |
| def lineincols (inlist,colsize): |
| """ |
| Returns a string composed of elements in inlist, with each element |
| right-aligned in columns of (fixed) colsize. |
| |
| Usage: lineincols (inlist,colsize) where colsize is an integer |
| """ |
| outstr = '' |
| for item in inlist: |
| if type(item) <> StringType: |
| item = str(item) |
| size = len(item) |
| if size <= colsize: |
| for i in range(colsize-size): |
| outstr = outstr + ' ' |
| outstr = outstr + item |
| else: |
| outstr = outstr + item[0:colsize+1] |
| return outstr |
| |
| |
| def lineincustcols (inlist,colsizes): |
| """ |
| Returns a string composed of elements in inlist, with each element |
| right-aligned in a column of width specified by a sequence colsizes. The |
| length of colsizes must be greater than or equal to the number of columns |
| in inlist. |
| |
| Usage: lineincustcols (inlist,colsizes) |
| Returns: formatted string created from inlist |
| """ |
| outstr = '' |
| for i in range(len(inlist)): |
| if type(inlist[i]) <> StringType: |
| item = str(inlist[i]) |
| else: |
| item = inlist[i] |
| size = len(item) |
| if size <= colsizes[i]: |
| for j in range(colsizes[i]-size): |
| outstr = outstr + ' ' |
| outstr = outstr + item |
| else: |
| outstr = outstr + item[0:colsizes[i]+1] |
| return outstr |
| |
| |
| def list2string (inlist,delimit=' '): |
| """ |
| Converts a 1D list to a single long string for file output, using |
| the string.join function. |
| |
| Usage: list2string (inlist,delimit=' ') |
| Returns: the string created from inlist |
| """ |
| stringlist = map(makestr,inlist) |
| return string.join(stringlist,delimit) |
| |
| |
| def makelol(inlist): |
| """ |
| Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you |
| want to use put() to write a 1D list one item per line in the file. |
| |
| Usage: makelol(inlist) |
| Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. |
| """ |
| x = [] |
| for item in inlist: |
| x.append([item]) |
| return x |
| |
| |
| def makestr (x): |
| if type(x) <> StringType: |
| x = str(x) |
| return x |
| |
| |
| def printcc (lst,extra=2): |
| """ |
| Prints a list of lists in columns, customized by the max size of items |
| within the columns (max size of items in col, plus 'extra' number of spaces). |
| Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, |
| respectively. |
| |
| Usage: printcc (lst,extra=2) |
| Returns: None |
| """ |
| if type(lst[0]) not in [ListType,TupleType]: |
| lst = [lst] |
| rowstokill = [] |
| list2print = copy.deepcopy(lst) |
| for i in range(len(lst)): |
| if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: |
| rowstokill = rowstokill + [i] |
| rowstokill.reverse() # delete blank rows from the end |
| for row in rowstokill: |
| del list2print[row] |
| maxsize = [0]*len(list2print[0]) |
| for col in range(len(list2print[0])): |
| items = colex(list2print,col) |
| items = map(makestr,items) |
| maxsize[col] = max(map(len,items)) + extra |
| for row in lst: |
| if row == ['\n'] or row == '\n' or row == '' or row == ['']: |
| print |
| elif row == ['dashes'] or row == 'dashes': |
| dashes = [0]*len(maxsize) |
| for j in range(len(maxsize)): |
| dashes[j] = '-'*(maxsize[j]-2) |
| print lineincustcols(dashes,maxsize) |
| else: |
| print lineincustcols(row,maxsize) |
| return None |
| |
| |
| def printincols (listoflists,colsize): |
| """ |
| Prints a list of lists in columns of (fixed) colsize width, where |
| colsize is an integer. |
| |
| Usage: printincols (listoflists,colsize) |
| Returns: None |
| """ |
| for row in listoflists: |
| print lineincols(row,colsize) |
| return None |
| |
| |
| def pl (listoflists): |
| """ |
| Prints a list of lists, 1 list (row) at a time. |
| |
| Usage: pl(listoflists) |
| Returns: None |
| """ |
| for row in listoflists: |
| if row[-1] == '\n': |
| print row, |
| else: |
| print row |
| return None |
| |
| |
| def printl(listoflists): |
| """Alias for pl.""" |
| pl(listoflists) |
| return |
| |
| |
| def replace (inlst,oldval,newval): |
| """ |
| Replaces all occurrences of 'oldval' with 'newval', recursively. |
| |
| Usage: replace (inlst,oldval,newval) |
| """ |
| lst = inlst*1 |
| for i in range(len(lst)): |
| if type(lst[i]) not in [ListType,TupleType]: |
| if lst[i]==oldval: lst[i]=newval |
| else: |
| lst[i] = replace(lst[i],oldval,newval) |
| return lst |
| |
| |
| def recode (inlist,listmap,cols=None): |
| """ |
| Changes the values in a list to a new set of values (useful when |
| you need to recode data from (e.g.) strings to numbers. cols defaults |
| to None (meaning all columns are recoded). |
| |
| Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list |
| Returns: inlist with the appropriate values replaced with new ones |
| """ |
| lst = copy.deepcopy(inlist) |
| if cols != None: |
| if type(cols) not in [ListType,TupleType]: |
| cols = [cols] |
| for col in cols: |
| for row in range(len(lst)): |
| try: |
| idx = colex(listmap,0).index(lst[row][col]) |
| lst[row][col] = listmap[idx][1] |
| except ValueError: |
| pass |
| else: |
| for row in range(len(lst)): |
| for col in range(len(lst)): |
| try: |
| idx = colex(listmap,0).index(lst[row][col]) |
| lst[row][col] = listmap[idx][1] |
| except ValueError: |
| pass |
| return lst |
| |
| |
| def remap (listoflists,criterion): |
| """ |
| Remaps values in a given column of a 2D list (listoflists). This requires |
| a criterion as a function of 'x' so that the result of the following is |
| returned ... map(lambda x: 'criterion',listoflists). |
| |
| Usage: remap(listoflists,criterion) criterion=string |
| Returns: remapped version of listoflists |
| """ |
| function = 'map(lambda x: '+criterion+',listoflists)' |
| lines = eval(function) |
| return lines |
| |
| |
| def roundlist (inlist,digits): |
| """ |
| Goes through each element in a 1D or 2D inlist, and applies the following |
| function to all elements of FloatType ... round(element,digits). |
| |
| Usage: roundlist(inlist,digits) |
| Returns: list with rounded floats |
| """ |
| if type(inlist[0]) in [IntType, FloatType]: |
| inlist = [inlist] |
| l = inlist*1 |
| for i in range(len(l)): |
| for j in range(len(l[i])): |
| if type(l[i][j])==FloatType: |
| l[i][j] = round(l[i][j],digits) |
| return l |
| |
| |
| def sortby(listoflists,sortcols): |
| """ |
| Sorts a list of lists on the column(s) specified in the sequence |
| sortcols. |
| |
| Usage: sortby(listoflists,sortcols) |
| Returns: sorted list, unchanged column ordering |
| """ |
| newlist = abut(colex(listoflists,sortcols),listoflists) |
| newlist.sort() |
| try: |
| numcols = len(sortcols) |
| except TypeError: |
| numcols = 1 |
| crit = '[' + str(numcols) + ':]' |
| newlist = colex(newlist,crit) |
| return newlist |
| |
| |
| def unique (inlist): |
| """ |
| Returns all unique items in the passed list. If the a list-of-lists |
| is passed, unique LISTS are found (i.e., items in the first dimension are |
| compared). |
| |
| Usage: unique (inlist) |
| Returns: the unique elements (or rows) in inlist |
| """ |
| uniques = [] |
| for item in inlist: |
| if item not in uniques: |
| uniques.append(item) |
| return uniques |
| |
| def duplicates(inlist): |
| """ |
| Returns duplicate items in the FIRST dimension of the passed list. |
| |
| Usage: duplicates (inlist) |
| """ |
| dups = [] |
| for i in range(len(inlist)): |
| if inlist[i] in inlist[i+1:]: |
| dups.append(inlist[i]) |
| return dups |
| |
| |
| def nonrepeats(inlist): |
| """ |
| Returns items that are NOT duplicated in the first dim of the passed list. |
| |
| Usage: nonrepeats (inlist) |
| """ |
| nonrepeats = [] |
| for i in range(len(inlist)): |
| if inlist.count(inlist[i]) == 1: |
| nonrepeats.append(inlist[i]) |
| return nonrepeats |
| |
| |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| #=================== PSTAT ARRAY FUNCTIONS ===================== |
| |
| try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE |
| import numpy as N |
| |
| def aabut (source, *args): |
| """ |
| Like the |Stat abut command. It concatenates two arrays column-wise |
| and returns the result. CAUTION: If one array is shorter, it will be |
| repeated until it is as long as the other. |
| |
| Usage: aabut (source, args) where args=any # of arrays |
| Returns: an array as long as the LONGEST array past, source appearing on the |
| 'left', arrays in <args> attached on the 'right'. |
| """ |
| if len(source.shape)==1: |
| width = 1 |
| source = N.resize(source,[source.shape[0],width]) |
| else: |
| width = source.shape[1] |
| for addon in args: |
| if len(addon.shape)==1: |
| width = 1 |
| addon = N.resize(addon,[source.shape[0],width]) |
| else: |
| width = source.shape[1] |
| if len(addon) < len(source): |
| addon = N.resize(addon,[source.shape[0],addon.shape[1]]) |
| elif len(source) < len(addon): |
| source = N.resize(source,[addon.shape[0],source.shape[1]]) |
| source = N.concatenate((source,addon),1) |
| return source |
| |
| |
| def acolex (a,indices,axis=1): |
| """ |
| Extracts specified indices (a list) from passed array, along passed |
| axis (column extraction is default). BEWARE: A 1D array is presumed to be a |
| column-array (and that the whole array will be returned as a column). |
| |
| Usage: acolex (a,indices,axis=1) |
| Returns: the columns of a specified by indices |
| """ |
| if type(indices) not in [ListType,TupleType,N.ndarray]: |
| indices = [indices] |
| if len(N.shape(a)) == 1: |
| cols = N.resize(a,[a.shape[0],1]) |
| else: |
| # print a[:3] |
| cols = N.take(a,indices,axis) |
| # print cols[:3] |
| return cols |
| |
| |
| def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): |
| """ |
| Averages data in collapsecol, keeping all unique items in keepcols |
| (using unique, which keeps unique LISTS of column numbers), retaining |
| the unique sets of values in keepcols, the mean for each. If stderror or |
| N of the mean are desired, set either or both parameters to 1. |
| |
| Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) |
| Returns: unique 'conditions' specified by the contents of columns specified |
| by keepcols, abutted with the mean(s) of column(s) specified by |
| collapsecols |
| """ |
| def acollmean (inarray): |
| return N.sum(N.ravel(inarray)) |
| |
| if type(keepcols) not in [ListType,TupleType,N.ndarray]: |
| keepcols = [keepcols] |
| if type(collapsecols) not in [ListType,TupleType,N.ndarray]: |
| collapsecols = [collapsecols] |
| |
| if cfcn == None: |
| cfcn = acollmean |
| if keepcols == []: |
| avgcol = acolex(a,collapsecols) |
| means = N.sum(avgcol)/float(len(avgcol)) |
| if fcn1<>None: |
| try: |
| test = fcn1(avgcol) |
| except: |
| test = N.array(['N/A']*len(means)) |
| means = aabut(means,test) |
| if fcn2<>None: |
| try: |
| test = fcn2(avgcol) |
| except: |
| test = N.array(['N/A']*len(means)) |
| means = aabut(means,test) |
| return means |
| else: |
| if type(keepcols) not in [ListType,TupleType,N.ndarray]: |
| keepcols = [keepcols] |
| values = colex(a,keepcols) # so that "item" can be appended (below) |
| uniques = unique(values) # get a LIST, so .sort keeps rows intact |
| uniques.sort() |
| newlist = [] |
| for item in uniques: |
| if type(item) not in [ListType,TupleType,N.ndarray]: |
| item =[item] |
| tmprows = alinexand(a,keepcols,item) |
| for col in collapsecols: |
| avgcol = acolex(tmprows,col) |
| item.append(acollmean(avgcol)) |
| if fcn1<>None: |
| try: |
| test = fcn1(avgcol) |
| except: |
| test = 'N/A' |
| item.append(test) |
| if fcn2<>None: |
| try: |
| test = fcn2(avgcol) |
| except: |
| test = 'N/A' |
| item.append(test) |
| newlist.append(item) |
| try: |
| new_a = N.array(newlist) |
| except TypeError: |
| new_a = N.array(newlist,'O') |
| return new_a |
| |
| |
| def adm (a,criterion): |
| """ |
| Returns rows from the passed list of lists that meet the criteria in |
| the passed criterion expression (a string as a function of x). |
| |
| Usage: adm (a,criterion) where criterion is like 'x[2]==37' |
| """ |
| function = 'filter(lambda x: '+criterion+',a)' |
| lines = eval(function) |
| try: |
| lines = N.array(lines) |
| except: |
| lines = N.array(lines,dtype='O') |
| return lines |
| |
| |
| def isstring(x): |
| if type(x)==StringType: |
| return 1 |
| else: |
| return 0 |
| |
| |
| def alinexand (a,columnlist,valuelist): |
| """ |
| Returns the rows of an array where col (from columnlist) = val |
| (from valuelist). One value is required for each column in columnlist. |
| |
| Usage: alinexand (a,columnlist,valuelist) |
| Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i |
| """ |
| if type(columnlist) not in [ListType,TupleType,N.ndarray]: |
| columnlist = [columnlist] |
| if type(valuelist) not in [ListType,TupleType,N.ndarray]: |
| valuelist = [valuelist] |
| criterion = '' |
| for i in range(len(columnlist)): |
| if type(valuelist[i])==StringType: |
| critval = '\'' + valuelist[i] + '\'' |
| else: |
| critval = str(valuelist[i]) |
| criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' |
| criterion = criterion[0:-3] # remove the "and" after the last crit |
| return adm(a,criterion) |
| |
| |
| def alinexor (a,columnlist,valuelist): |
| """ |
| Returns the rows of an array where col (from columnlist) = val (from |
| valuelist). One value is required for each column in columnlist. |
| The exception is if either columnlist or valuelist has only 1 value, |
| in which case that item will be expanded to match the length of the |
| other list. |
| |
| Usage: alinexor (a,columnlist,valuelist) |
| Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i |
| """ |
| if type(columnlist) not in [ListType,TupleType,N.ndarray]: |
| columnlist = [columnlist] |
| if type(valuelist) not in [ListType,TupleType,N.ndarray]: |
| valuelist = [valuelist] |
| criterion = '' |
| if len(columnlist) == 1 and len(valuelist) > 1: |
| columnlist = columnlist*len(valuelist) |
| elif len(valuelist) == 1 and len(columnlist) > 1: |
| valuelist = valuelist*len(columnlist) |
| for i in range(len(columnlist)): |
| if type(valuelist[i])==StringType: |
| critval = '\'' + valuelist[i] + '\'' |
| else: |
| critval = str(valuelist[i]) |
| criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' |
| criterion = criterion[0:-2] # remove the "or" after the last crit |
| return adm(a,criterion) |
| |
| |
| def areplace (a,oldval,newval): |
| """ |
| Replaces all occurrences of oldval with newval in array a. |
| |
| Usage: areplace(a,oldval,newval) |
| """ |
| return N.where(a==oldval,newval,a) |
| |
| |
| def arecode (a,listmap,col='all'): |
| """ |
| Remaps the values in an array to a new set of values (useful when |
| you need to recode data from (e.g.) strings to numbers as most stats |
| packages require. Can work on SINGLE columns, or 'all' columns at once. |
| @@@BROKEN 2007-11-26 |
| |
| Usage: arecode (a,listmap,col='all') |
| Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] |
| """ |
| ashape = a.shape |
| if col == 'all': |
| work = a.ravel() |
| else: |
| work = acolex(a,col) |
| work = work.ravel() |
| for pair in listmap: |
| if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O': |
| work = N.array(work,dtype='O') |
| a = N.array(a,dtype='O') |
| for i in range(len(work)): |
| if work[i]==pair[0]: |
| work[i] = pair[1] |
| if col == 'all': |
| return N.reshape(work,ashape) |
| else: |
| return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) |
| else: # must be a non-Object type array and replacement |
| work = N.where(work==pair[0],pair[1],work) |
| return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) |
| |
| |
| def arowcompare(row1, row2): |
| """ |
| Compares two rows from an array, regardless of whether it is an |
| array of numbers or of python objects (which requires the cmp function). |
| @@@PURPOSE? 2007-11-26 |
| |
| Usage: arowcompare(row1,row2) |
| Returns: an array of equal length containing 1s where the two rows had |
| identical elements and 0 otherwise |
| """ |
| return |
| if row1.dtype.char=='O' or row2.dtype=='O': |
| cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 |
| else: |
| cmpvect = N.equal(row1,row2) |
| return cmpvect |
| |
| |
| def arowsame(row1, row2): |
| """ |
| Compares two rows from an array, regardless of whether it is an |
| array of numbers or of python objects (which requires the cmp function). |
| |
| Usage: arowsame(row1,row2) |
| Returns: 1 if the two rows are identical, 0 otherwise. |
| """ |
| cmpval = N.alltrue(arowcompare(row1,row2)) |
| return cmpval |
| |
| |
| def asortrows(a,axis=0): |
| """ |
| Sorts an array "by rows". This differs from the Numeric.sort() function, |
| which sorts elements WITHIN the given axis. Instead, this function keeps |
| the elements along the given axis intact, but shifts them 'up or down' |
| relative to one another. |
| |
| Usage: asortrows(a,axis=0) |
| Returns: sorted version of a |
| """ |
| return N.sort(a,axis=axis,kind='mergesort') |
| |
| |
| def aunique(inarray): |
| """ |
| Returns unique items in the FIRST dimension of the passed array. Only |
| works on arrays NOT including string items. |
| |
| Usage: aunique (inarray) |
| """ |
| uniques = N.array([inarray[0]]) |
| if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY |
| for item in inarray[1:]: |
| if N.add.reduce(N.equal(uniques,item).ravel()) == 0: |
| try: |
| uniques = N.concatenate([uniques,N.array[N.newaxis,:]]) |
| except TypeError: |
| uniques = N.concatenate([uniques,N.array([item])]) |
| else: # IT MUST BE A 2+D ARRAY |
| if inarray.dtype.char != 'O': # not an Object array |
| for item in inarray[1:]: |
| if not N.sum(N.alltrue(N.equal(uniques,item),1)): |
| try: |
| uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) |
| except TypeError: # the item to add isn't a list |
| uniques = N.concatenate([uniques,N.array([item])]) |
| else: |
| pass # this item is already in the uniques array |
| else: # must be an Object array, alltrue/equal functions don't work |
| for item in inarray[1:]: |
| newflag = 1 |
| for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> |
| test = N.sum(abs(N.array(map(cmp,item,unq)))) |
| if test == 0: # if item identical to any 1 row in uniques |
| newflag = 0 # then not a novel item to add |
| break |
| if newflag == 1: |
| try: |
| uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) |
| except TypeError: # the item to add isn't a list |
| uniques = N.concatenate([uniques,N.array([item])]) |
| return uniques |
| |
| |
| def aduplicates(inarray): |
| """ |
| Returns duplicate items in the FIRST dimension of the passed array. Only |
| works on arrays NOT including string items. |
| |
| Usage: aunique (inarray) |
| """ |
| inarray = N.array(inarray) |
| if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY |
| dups = [] |
| inarray = inarray.tolist() |
| for i in range(len(inarray)): |
| if inarray[i] in inarray[i+1:]: |
| dups.append(inarray[i]) |
| dups = aunique(dups) |
| else: # IT MUST BE A 2+D ARRAY |
| dups = [] |
| aslist = inarray.tolist() |
| for i in range(len(aslist)): |
| if aslist[i] in aslist[i+1:]: |
| dups.append(aslist[i]) |
| dups = unique(dups) |
| dups = N.array(dups) |
| return dups |
| |
| except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs |
| pass |