r"""Part 1:Functions to merge similar pivot tables; Part2: Functions to censor table cells based on associated statistics.""" r"""Part 1:There are three main functions: mergeLatest, mergeSelected, and tmerge. All have the same functionality but differ in how the participating tables are specified. The functions stdrowfunc and stdcolfunc may also be useful if partially overriding the row and column matching rules. Example usage (1991 General Social Survey, installed with SPSS) begin program. import tables cmd=\ r'''CTABLES /TABLE sex > race [COUNT COLPCT.COUNT] BY region /TITLES TITLE='This is the Main Table' /COMPARETEST TYPE=PROP ALPHA=0.05 ADJUST=BONFERRONI ORIGIN=COLUMN. ''' tables.mergeLatest(cmd, label='Count') end program. This example runs a Ctables command and merges the last table in the Viewer, which has the signifcance tests, into the main table, which has count and percent statistics in the columns. The next example is similar but has the statistics in the rows. This requires custom rowfunc and colfunc functions. These functions determine the definition of the join between the two tables. The custom rowfunc ensures that the rows labeled Count in the main table match the corresponding rows in the significance table by removing the "Count" element of the label, but they leave the other statistics labels unchanged to ensure that those do not join. Secondly, the colfunc function, when being applied to the test table -- othertable is True -- return the column heading without the (A), (B) etc term ensuring that the columns join on the rest of the heading. But on the main table, the entire column heading is returned, because there is no statistic name in it (because that label occurs in the rows for this layout). cmd=\ r'''CTABLES /TABLE sex > race [COUNT COLPCT.COUNT] BY region /SLABELS POSITION=row /TITLES TITLE='This is the Main Table' /COMPARETEST TYPE=PROP ALPHA=0.05 ADJUST=BONFERRONI ORIGIN=COLUMN.''' def rowfunc(tup, othertable): if tup[-1] == 'Count': return tup[0:-1] else: return tup def colfunc(tup, othertable): if othertable: return tup[:-1] else: return tup tables.mergeLatest(cmd, rowfunc = rowfunc, colfunc=colfunc) # This example merges two tables with the same structure but different statistics. cmd=\ r'''CTABLES /TABLE region BY sex > educ [MEAN F10.3] by race. CTABLES /TABLE region BY sex > educ [SEMEAN 'SE Mean' F10.3]. ''' tables.mergeLatest(cmd, label='Mean', mode='merge') # This example merges two tables with different statistics but only for males in the West region cmd=\ r'''CTABLES /TABLE region BY sex > educ [MEAN F10.3] by race. CTABLES /TABLE region BY sex > educ [SEMEAN 'SE Mean' F10.3]. ''' def rowfunc(tup, othertable): if othertable and tup[-1] != "West": return None else: return tables.stdrowfunc(tup, othertable) def colfunc(tup, othertable): if othertable and tup[2] != "Male": return None else: return tables.stdcolfunc(tup, othertable) tables.mergeLatest(cmd, label='Mean', mode='merge', rowfunc = rowfunc, colfunc=colfunc) Part 2: Functions to censor table cells. Example: censorLatest cmd='ctables /TABLE origin BY accel [MEAN, COUNT]' rc=tables.censorLatest(critvalue=100, desout=desout, neighborlist=[-1], direction='row') This example blanks out the means for all cells with a count value < 100. See the function help for other censoring options. """ # This module requires the spss and viewer modules. See the viewer module, in turn, for its requirements. # This module will not work in distributed mode or if there is not a regular Designated Viewer. # Tables with layers are not supported for the merge functions. The first layer is processed, and a footnote is attached. # The censoring functions do support layers. # Before using this module, prepare the Python interface to the SPSS type library by running # makepy on the SPSS libraries, # In order to avoid display problems, set the SPSS pivot table options to "edit all tables in the Viewer". # However, display problems can be cleared up by activating and deactivating a table # and the table will export or print properly without this step. # The first set of code provides ways to merge two tables together. # There are three main functions: mergeLatest, mergeSelected, and tmerge. # The functions mergeLatest and tmerge are very similar. mergeLatest includes the ability to specify a set of # SPSS commands to run and an automatic way to find the last tables. # The second set of code provides ways to censor cells of a table based on a statistic. Typically it would be # used to blank out cells where the count for that category is too small # Copyright(c) SPSS Inc, 2007 __author__ = 'spss' __version__= '1.2.2' # history # 07-apr-2006 Initial experimental version # 11-apr-2006 Add mode parameter to allow merge or replace of cells, and othertable parameter to rowfunc and colfunc # to allow these functions to distinguish between the main and other tables. Added mergeSelected. # 05-jan-2007 Added functions for censoring table cells. # 03-feb-2007 Added option to hide entire criterion column when censoring a table import spss, viewer, sys debug = False def mergeLatest(cmd=None, desout=None, label=None,tablepair=None, mode='merge', appendtitle=True, appendcaption=True, rowfunc=None, colfunc=None, hide=True, addlabelleaf=True, layerfootnote=None): """Run the command(s), cmd. and merge the specified tables. If cmd is empty or not supplied, it is not run, and the requisite tables should already exist. if tablepair, a duple, is specified, those are the absolute table numbers to merge: main, other. Otherwise, the most recent tables are used. The expectation is the the Ctables command produced the main table and one table containing either column proportions or column means tests. Set label to the label of the statistic where the merge should appear. Typically it would be "Count" or "Mean" (case matters). If no label is specified, the results are merged to all matching columns. mode may be merge (the default), or replace. Set appendtitle and appendcaption to False to suppress the corresponding append. desout can be specified as the handle to the designated Viewer. If None, this module will find it. set hide=False to prevent the test table from being hidden. If the tables have layers, only the first layer is merged, and a footnote is added to the title indicating this. layerfootnote can provide alternative text or be set to "" to suppress the footnote. """ if cmd: spss.Submit(cmd) if desout is None: desout= viewer.spssapp().GetDesignatedOutput() if not tablepair: tablepair = [] items = desout.Items itemkt = items.Count-1 while itemkt >= 0 and len(tablepair) < 2: if items.GetItem(itemkt).SPSSType == 5: #a pivot table tablepair.insert(0, itemkt) itemkt-= 1 if len(tablepair) != 2: raise ValueError, "Exactly two table numbers must be specified." maintbl, othertbl = tablepair tmerge(desout, maintbl, othertbl, label=label, hide=hide, rowfunc=rowfunc, colfunc=colfunc,mode=mode, addlabelleaf=addlabelleaf, appendtitle=appendtitle, appendcaption=appendcaption) def mergeSelected(*args, **kwds): """Merge the last two tables currently selected in the Designated Viewer using the later table as and earlier as
. Arguments are the same as for tmerge except that the first three are supplied by this function.""" desout = viewer.spssapp().GetDesignatedOutput() items = desout.Items itemkt = items.Count-1 tablepair = [] while itemkt >= 0 and len(tablepair) < 2: if items.GetItem(itemkt).SPSSType == 5 and items.GetItem(itemkt).Selected: #a selected pivot table tablepair.insert(0, itemkt) itemkt-= 1 if len(tablepair) != 2: raise ValueError, "At least two table numbers must be specified." tmerge(desout, tablepair[0], tablepair[1], *args, **kwds) def tmerge(desviewer, main, other, hide=True, label=None, mode='merge', rowfunc=None, colfunc=None, addlabelleaf=True, appendtitle=True, appendcaption=True, layerfootnote=None): """merge the cells of table other into table main for those where the row and column labels match. main and other are item numbers of the tables in the Designated Viewer, whose handle is passed in desviewer. If hide, then the "other" table is hidden after the merge. Label identifies the column statistic where the merged cells are placed. If omitted, statistics are placed in every applicable column. rowfunc and colfunc are functions that transform the row and column label tuples before creating the dictionary key. By default, rowfunc is the identity function and colfunc selects all but the last tuple element. If addlabeleaf is True, the last element of the "other" table column labels is appened to the matching labels of the main table. If the table has layers, which are not supported in this function currently, the layerfootnote is attached to the title. You can supply alternative text via this parameter. Use an empty string to suppress the footnote.""" if rowfunc is None: rowfunc = stdrowfunc if colfunc is None: colfunc = stdcolfunc if not mode in ['merge','replace']: raise ValueError, "mode must be merge or replace" objItems = desviewer.Items main = objItems.GetItem(main) other = objItems.GetItem(other) if main.SPSSType != 5 or other.SPSSType != 5: raise ValueError, "A specified item is not a pivot table or does not exist." #PivotTable= other.ActivateTable() PivotTable = other.GetTableOleObject() try: otherdict, cols = gettbl(PivotTable.RowLabelArray() , PivotTable.ColumnLabelArray(), PivotTable.DataCellArray(), rowfunc, colfunc) othertitle = PivotTable.TitleText othercaption = PivotTable.CaptionText finally: #other.Deactivate() # make sure table is deactivated pass originalWidth = main.Width #PivotTable = main.ActivateTable() PivotTable = main.GetTableOleObject() #new try: layerwarning = PivotTable.LayerLabelArray().NumDimensions > 0 PivotTable.UpdateScreen = False colarray = PivotTable.ColumnLabelArray() settbl(PivotTable.RowLabelArray() , colarray,PivotTable.DataCellArray(), otherdict, label, rowfunc, colfunc, mode) if addlabelleaf: appendleaf(colarray, colfunc, label, cols, mode) if appendtitle: PivotTable.TitleText= PivotTable.TitleText + "\r\n" + othertitle if appendcaption: PivotTable.CaptionText = PivotTable.CaptionText + "\r\n" + othercaption PivotTable.Autofit() finally: if layerwarning and layerfootnote != "": PivotTable.SelectTitle() PivotTable.InsertFootnote(layerfootnote or "Only the first layer has been merged from the secondary table.") PivotTable.UpdateScreen = True main.Activate() main.Deactivate() #PivotTable.UpdateScreen = True if hide: other.Visible = False ## main.Activate() ## main.Deactivate() def appendleaf(colarray, colfunc, label, cols, mode, separator="\r\n"): """append the leaf element of colarray to matching items in cols at label, if any. If mode != 'merge', then replace the leaf instead of appending.""" merge = mode == 'merge' coldict = {} for tup in cols: coldict[colfunc(tuple(tup),True)] = tup[-1] #need to be able to trim here. was False nr = colarray.NumRows-1 for j in range(colarray.NumColumns): try: colarray.SetVAlignAt(nr, j, 0) #force vertical alignment except: pass if label is None or label == colarray.ValueAt(nr,j): tbltup = colfunc(maketup(colarray, j, 'col'), False) if tbltup in coldict: if merge: colarray.SetValueAt(nr,j, colarray.ValueAt(nr,j) + separator + coldict[tbltup]) else: colarray.SetValueAt(nr,j, coldict[tbltup]) def gettbl(rowlabels, collabels, datacells, rowfunc, colfunc): """return dictionary indexed by rowlabel and collabel tuple of datacells values and a listof the row label tuples with col identifier Dictionary containing a duple of the datacell value and the column identifier for test purposes e.g., (A). rowfunc and colfunc are functions applied to the label tuples before using them as keys. """ tbldict = {} cols = [] for j in range(datacells.NumColumns): collabeltup = maketup(collabels, j, 'col') cols.append(collabeltup) for i in range(datacells.NumRows): rowlabeltup = maketup(rowlabels, i, 'row') tbldict[(rowfunc(rowlabeltup, True), colfunc(collabeltup, True))] = fmtcell(datacells, i, j) return tbldict, cols def maketup(obj, index, dim): "return tuple of row or column values of obj" lis = [] if dim == 'row': for i in range(obj.NumColumns): lis.append(obj.ValueAt(index, i)) else: for j in range(obj.NumRows): lis.append(obj.ValueAt(j, index)) return tuple(lis) def settbl(rowlabels, collabels, datacells, tbldict, label, rowfunc, colfunc, mode, separator="\r\n"): """Modify table to incorporate "other" nonblank table values. Set all cell vertical alignment to top to insure they still line up and horizontal to right aligned for elements converted to strings. If, however, mode == 'replace' the values from the other table just replace the main table values. other formatting is preserved, but horizontal alignment may be changed. rowfunc and colfunc are functions applied to the label tuples before using them as keys. separator is inserted between the existing label and the attachment.""" mergemode = mode == 'merge' for i in range(datacells.NumRows): rowlabeltup = maketup(rowlabels, i, 'row') for j in range(datacells.NumColumns): if mergemode: try: datacells.SetVAlignAt(i,j, 0) #vertical alignment = SpssVAlTop except: pass collabeltup = maketup(collabels, j, 'col') key = (rowfunc(rowlabeltup,False), colfunc(collabeltup, False)) otherval = tbldict.get(key, None) if otherval is not None: if (label is None or collabeltup[-1].upper() == label.upper()) and not str(otherval).isspace(): #dataval = datacells.ValueAt(i,j) if mergemode: newval = fmtcell(datacells, i, j) + separator + str(otherval) datacells.SetValueAt(i,j, newval) datacells.SetHAlignAt(i,j,1) # right align cell else: if isinstance(otherval, basestring) and not isinstance(datacells.ValueAt(i,j), basestring): datacells.SetHAlignAt(i,j,1) # right align cell datacells.SetValueAt(i,j, otherval) # keep target table formatting (mostly). def fmtcell(cells, i, j): """return formatted value of cell(i,j). If format matches some #.# or ##.#%, appropriate string is returned; otherwise simple formatting is used.""" value = cells.ValueAt(i,j) fmt = cells.NumericFormatAt(i,j) decimals = cells.HDecDigitsAt(i,j) if fmt.startswith("#") or fmt.startswith("$"): try: value = "%.*f" % (decimals, value) if fmt.startswith("$"): value = "$" + value if fmt.endswith("%"): value = value + "%" except: value = str(value) else: value = str(value) return value # These functions are used by default to transform row and column label tuples when accessing the main or # other table. def stdrowfunc(tup,othertable): """tup is the tuple of row labels for the current row. By default it is passed back unmodified. You can supply your own rowfunc to transform it when the tables being merged do not have the same structure or if you want to prevent the merging of certain rows. Note that the tuple starts with "row" or "column", so the first normally visible element is tup[1]. othertable is True if the function was called while processing the "other" table and False if processing the main table.""" if debug: print "row:", (othertable and "other:" or "main:"), tup return tup def stdcolfunc(tup, othertable): """tup is the tuple of column labels for the current column. By default it is passed back with the last item deleted. See additiona comments under rowfunc""" if debug: print "col:", (othertable and "other:" or "main:"), tup return tup[:-1] # Part 2 def censorLatest(cmd=None, desout=None, tablenum=None, critfield='Count', critvalue=5, symbol=" ", neighborlist=[1], direction='row', testtype="<", appendcaption=True, othercaption=None, hidecrit=False): """Run the command(s), cmd and censor specified cells based on criterion and return the number of cells censored. If cmd is empty or not supplied, it is not run, and the requisite tables should already exist. if tablenum is specified, it is the absolute table number to process. Otherwise, the most recent table is used. Set appendtitle and appendcaption to False to suppress the corresponding append. desout can be specified as the handle to the designated Viewer. If None, this module will find it. critfield specifies the leaf text of the cell to use for the criterion value. critvalue is the threshold value. The absolute value of a cell is tested against the criterion value. neighborlist is a list of relative positions that should be censored. Positive to the right/above, negative to the left/below. In order to censor the criterion field itself, include 0 in neighborlist direction is 'row' for items in the same row or 'col' for items in the same column. The label is expected in the dimension opposite to direction. symbol is the value that should replace a censored field, defaulting to blank. hidecrit if True causes the criterion row or column to be hidden. Note that censoring a field may still leave its value discoverable if it is included in a total. Example: censor statistics in a table when counts are small: cmd="CTABLES /TABLE origin BY accel [MEAN, COUNT] /SLABELS POSITION=ROW." rc= tables.censorLatest(critvalue=10, desout=desout, neighborlist=[-1], direction='col') Example: blank out insignificant correlations in the output from CORRELATIONS: cmd=r"CORRELATIONS /VARIABLES=availblt avg_purc chckout" tables.censorLatest(cmd=cmd, critvalue=.01, critfield="Sig. (2-tailed)", testtype=">", neighborlist=[0,1,-1], direction='col') """ if cmd: spss.Submit(cmd) if desout is None: desout= viewer.spssapp().GetDesignatedOutput() if not tablenum: items = desout.Items itemkt = items.Count-1 while itemkt >= 0 and not tablenum: if items.GetItem(itemkt).SPSSType == 5: #a pivot table tablenum = itemkt itemkt-= 1 if not tablenum: raise ValueError, "No table found to process." censorkt = tcensor(desout, tablenum, critfield, critvalue, symbol, neighborlist, direction, testtype, appendcaption=appendcaption, othercaption=othercaption, hidecrit=hidecrit) return censorkt def tcensor(desviewer, main, critfield='Count', critvalue=5, symbol=" ", neighborlist=[1], direction='row', testtype='<', appendcaption=True,othercaption=None, hidecrit=False): """censor the cells of table main in designated Viewer window.""" objItems = desviewer.Items main = objItems.GetItem(main) if main.SPSSType != 5 : raise ValueError, "A specified item is not a pivot table or does not exist." # create criterion function try: ttype = ['<', '<=','=','==','>','>=','!=', '~='].index(testtype) if ttype == 0: olist = [True, False, False] elif ttype == 1: olist = [True, True, False] elif ttype == 2 or ttype == 3: olist = [False, True, False] elif ttype == 4: olist = [False, False, True] elif ttype == 5: olist = [False, True, True] elif ttype == 6 or olist == 7: olist = [True, False, True] except: raise ValueError, "Invalid comparison type: " + testtype def crittest(value): try: c = cmp(abs(value), critvalue) return olist[c+1] except: return False PivotTable = main.GetTableOleObject() censorkt = 0 try: PivotTable.UpdateScreen = False if direction == 'row': lblarray = PivotTable.ColumnLabelArray() elif direction == 'col': lblarray = PivotTable.RowLabelArray() else: raise ValueError, "direction must be 'row' or 'col'" censorkt = censortbl(lblarray, PivotTable, critfield, crittest, symbol, neighborlist, direction, hidecrit) if appendcaption: if othercaption is None: othercaption = "Number of values censored because of " + critfield + ": " + str(censorkt) PivotTable.CaptionText = PivotTable.CaptionText + "\r\n" + othercaption PivotTable.Autofit() finally: PivotTable.UpdateScreen = True main.Activate() main.Deactivate() return censorkt def censortbl(labels, PivotTable, critfield, crittest, symbol, neighborlist, direction, hidecrit): """Censor the table, returning the number of values censored. labels is the row or column label array where the critfield should be found. PivotTable is the table to process. crittest is a test function to be used for the comparison. symbol is the symbol used to replace the value. neighborlist is the list of neighboring cells in which to do the replacement when the test succeeds direction is row or col, which determines how to interpret neighborlist. hidecrit if True causes the criterion row or column to be hidden.""" try: datacells = PivotTable.DataCellArray() rows = direction == 'row' if rows: dimsize = datacells.NumColumns otherdimsize = datacells.NumRows lbllimit = labels.NumRows - 1 else: dimsize = datacells.NumRows otherdimsize = datacells.NumColumns lbllimit = labels.NumColumns - 1 censorkt = 0 foundcritfield = False c = LayerManager(PivotTable) for facenumber, face in enumerate(c.layers()): for i in range(dimsize): a1, a2 = _sargs(rows, lbllimit, i) if labels.ValueAt(a1, a2) == critfield: foundcritfield = True for other in range(otherdimsize): aa1, aa2 = _sargs(rows, other, i) if crittest(datacells.ValueAt(aa1, aa2)): for offset in neighborlist: if 0 <= i+ offset < dimsize: aaa1, aaa2 = _sargs(rows, other, i + offset) datacells.SetValueAt(aaa1, aaa2, symbol) datacells.SetHAlignAt(aaa1, aaa2, 1) # right align cell censorkt += 1 if hidecrit and facenumber == 0: # only need to hide once for all the layers hide(labels, rows, i, lbllimit) except AttributeError: print "Error in censortbl:", sys.exc_info()[0] print "This may indicate a need to run makepy on the SPSS Pivot Table type library" raise if not foundcritfield: print "Warning: The criterion field was not found in the table: " + critfield raise AttributeError, "The criterion field was not found in the table: " + critfield return censorkt def hide(array, rows, i, lastdim): """Hide the data and labels at i. array is the array of row or column labels. i is the row or column number to hide. lastdim is the index of the innermost label row or column.""" #collblarray = PivotTable.columnlabelarray() #lblnumrows = collblarray.numrows x, y = _sargs(rows, lastdim, i) array.HideLabelsWithDataAt(x, y) def _sargs(row,x,y): """ return x, y, if row == True else y,x""" if row: return (x,y) else: return (y,x) class LayerManager(object): def __init__(self, pt): """pt is an activated pivot table""" self.ptmgr = pt.PivotManager() self.numlayerdims = self.ptmgr.NumLayerDimensions self. layercats = [] self.layercurrcat = [] for c in range(self.numlayerdims): self.layercats.append(self.ptmgr.LayerDimension(c).NumCategories) self.layercurrcat.append(self.ptmgr.LayerDimension(c).CurrentCategory) def layers(self): """Generator to iterate over all the categories of all the dimensions in the layer. Returns the current category number, but the main purpose is to change the current category.""" if self.numlayerdims == 0: yield 0 else: for ld in range(self.numlayerdims): cc = self.layercurrcat[ld] for c in range(self.layercats[ld]): yield cc cc = (cc+1) % self.layercats[ld] self.ptmgr.LayerDimension(ld).CurrentCategory = cc