r"""Part 1:Functions to merge similar pivot tables;  Part2: Functions to censor table cells based on associated statistics."""

r"""Part 1:There are three main functions: mergeLatest, mergeSelected, and
tmerge.  All have the same functionality but differ in how the participating tables are specified.

The functions stdrowfunc and stdcolfunc may also be useful if partially overriding the row and column
matching rules.

Example usage (1991 General Social Survey, installed with SPSS)
begin program.
import tables

cmd=\
r'''CTABLES
  /TABLE sex > race [COUNT  COLPCT.COUNT] BY region
  /TITLES TITLE='This is the Main Table'
  /COMPARETEST TYPE=PROP ALPHA=0.05 ADJUST=BONFERRONI ORIGIN=COLUMN.
'''
tables.mergeLatest(cmd, label='Count')
end program.

This example runs a Ctables command and merges the last table in the Viewer, which has the signifcance tests, into the
main table, which has count and percent statistics in the columns.

The next example is similar but has the statistics in the rows.  This requires custom rowfunc and colfunc functions.
These functions determine the definition of the join between the two tables.  The custom rowfunc ensures that the rows
labeled Count in the main table match the corresponding rows in the significance table by removing the "Count" element of the
label, but they leave the other statistics labels unchanged to ensure that those do not join.

Secondly,  the colfunc function, when being applied to the test table -- othertable is True -- return the column heading without
the (A), (B) etc term ensuring that the columns join on the rest of the heading.  But on the main table, the entire column heading
is returned, because there is no statistic name in it (because that label occurs in the rows for this layout).

cmd=\
r'''CTABLES
  /TABLE sex > race [COUNT  COLPCT.COUNT] BY region
 /SLABELS POSITION=row
 /TITLES TITLE='This is the Main Table'
  /COMPARETEST TYPE=PROP ALPHA=0.05 ADJUST=BONFERRONI ORIGIN=COLUMN.'''

def rowfunc(tup, othertable):
	if tup[-1] == 'Count':
		return tup[0:-1]
	else:
		return tup

def colfunc(tup, othertable):
	if othertable:
		return tup[:-1]
	else:
		return tup

tables.mergeLatest(cmd, rowfunc = rowfunc, colfunc=colfunc)


# This example merges two tables with the same structure but different statistics.
cmd=\
r'''CTABLES
  /TABLE region BY sex > educ [MEAN F10.3] by race.
CTABLES
  /TABLE region BY sex > educ [SEMEAN 'SE Mean' F10.3].
'''
tables.mergeLatest(cmd, label='Mean', mode='merge')

# This example merges two tables with different statistics but only for males in the West region
cmd=\
r'''CTABLES
  /TABLE region BY sex > educ [MEAN F10.3] by race.
CTABLES
  /TABLE region BY sex > educ [SEMEAN 'SE Mean' F10.3].
'''
def rowfunc(tup, othertable):
	if othertable and tup[-1] != "West":
		return None
	else:
		return tables.stdrowfunc(tup, othertable)

def colfunc(tup, othertable):
	if othertable and tup[2] != "Male":
		return None
	else:
		return tables.stdcolfunc(tup, othertable)

tables.mergeLatest(cmd, label='Mean', mode='merge', rowfunc = rowfunc, colfunc=colfunc)



Part 2: Functions to censor table cells.


Example: censorLatest
cmd='ctables   /TABLE origin BY accel [MEAN, COUNT]'
rc=tables.censorLatest(critvalue=100, desout=desout, neighborlist=[-1], direction='row')

This example blanks out the means for all cells with a count value < 100.  See the function help for other censoring options.
"""
# This module requires the spss and viewer modules.  See the viewer module, in turn, for its requirements.
# This module will not work in distributed mode or if there is not a regular Designated Viewer.
#  Tables with layers are not supported for the merge functions.  The first layer is processed, and a footnote is attached.
#  The censoring functions do support layers.

# Before using this module, prepare the Python interface to the SPSS type library by running
# makepy on the SPSS libraries,
# In order to avoid display problems, set the SPSS pivot table options to "edit all tables in the Viewer".
# However, display problems can be cleared up by activating and deactivating a table
# and the table will export or print properly without this step.

# The first set of code provides ways to merge two tables together.
# There are three main functions: mergeLatest, mergeSelected, and tmerge.
# The functions mergeLatest and tmerge are very similar.  mergeLatest includes the ability to specify a set of
# SPSS commands to run and an automatic way to find the last tables.

# The second set of code provides ways to censor cells of a table based on a statistic.  Typically it would be
# used to blank out cells where the count for that category is too small

# Copyright(c) SPSS Inc, 2007



__author__ =  'spss'
__version__=  '1.2.2'

# history
# 07-apr-2006  Initial experimental version
# 11-apr-2006   Add mode parameter to allow merge or replace of cells, and othertable parameter to rowfunc and colfunc
#                          to allow these functions to distinguish between the main and other tables.  Added mergeSelected.
# 05-jan-2007   Added functions for censoring table cells.
# 03-feb-2007  Added option to hide entire criterion column when censoring a table

import spss, viewer, sys

debug = False

def mergeLatest(cmd=None, desout=None, label=None,tablepair=None, mode='merge', appendtitle=True, appendcaption=True, 
		 rowfunc=None, colfunc=None, hide=True, addlabelleaf=True, layerfootnote=None):
	"""Run the command(s), cmd. and merge the specified tables.
	If cmd is empty or not supplied, it is not run, and the requisite tables should already exist.
	if tablepair, a duple, is specified, those are the absolute table numbers to merge: main, other.
	Otherwise, the most recent tables are used.
	The expectation is the the Ctables command produced the main table and one table containing either
	column proportions or column means tests.
	Set label to the label of the statistic where the merge should appear.  Typically it would be "Count" or "Mean"
	(case matters).  If no label is specified, the results are merged to all matching columns.
	mode may be merge (the default), or replace.
	Set appendtitle and appendcaption to False to suppress the corresponding append.
	desout can be specified as the handle to the designated Viewer.  If None, this module will find it.
	set hide=False to prevent the test table from being hidden.
	If the tables have layers, only the first layer is merged, and a footnote is added to the title indicating this.
	layerfootnote can provide alternative text or be set to "" to suppress the footnote.
	"""
	if cmd:
		spss.Submit(cmd)
	if desout is None:
		desout= viewer.spssapp().GetDesignatedOutput()
	if not tablepair:
		tablepair = []
		items = desout.Items
		itemkt = items.Count-1
		while itemkt >= 0 and len(tablepair) < 2:
			if items.GetItem(itemkt).SPSSType == 5:  #a pivot table
				tablepair.insert(0, itemkt)
			itemkt-= 1
	if len(tablepair) != 2:
		raise ValueError, "Exactly two table numbers must be specified."
	maintbl, othertbl = tablepair
	tmerge(desout, maintbl, othertbl, label=label, hide=hide, rowfunc=rowfunc, colfunc=colfunc,mode=mode,
	       addlabelleaf=addlabelleaf, appendtitle=appendtitle, appendcaption=appendcaption)

def mergeSelected(*args, **kwds):
	"""Merge the last two tables currently selected in the Designated Viewer using the later table as <other> and earlier as <main>.
	Arguments are the same as for tmerge except that the first three are supplied by this function."""
	
	desout = viewer.spssapp().GetDesignatedOutput()
	items = desout.Items
	itemkt = items.Count-1
	tablepair = []
	while itemkt >= 0 and len(tablepair) < 2:
		if items.GetItem(itemkt).SPSSType == 5 and items.GetItem(itemkt).Selected:  #a selected pivot table
			tablepair.insert(0, itemkt)
		itemkt-= 1
	if len(tablepair) != 2:
		raise ValueError, "At least two table numbers must be specified."
	tmerge(desout, tablepair[0], tablepair[1], *args, **kwds)

def tmerge(desviewer, main, other, hide=True, label=None, mode='merge', rowfunc=None, colfunc=None,
	  addlabelleaf=True, appendtitle=True, appendcaption=True, layerfootnote=None):
	"""merge the cells of table other into table main for those where the row and column labels match.
	main and other are item numbers of the tables in the Designated Viewer, whose handle is passed in desviewer.
	If hide, then the "other" table is hidden after the merge.
	Label identifies the column statistic where the merged cells are placed.  If omitted, statistics are placed in every
	applicable column.
	rowfunc and colfunc are functions that transform the row and column label tuples before creating the dictionary key.
	By default, rowfunc is the identity function and colfunc selects all but the last tuple element.
	If addlabeleaf is True, the last element of the "other" table column labels is appened to the matching labels
	of the main table.
	If the table has layers, which are not supported in this function currently, the layerfootnote is attached
	to the title.  You can supply alternative text via this parameter.  Use an empty string to suppress the footnote."""
	
	if rowfunc is None:
		rowfunc = stdrowfunc
	if colfunc is None:
		colfunc = stdcolfunc
	if not mode in ['merge','replace']:
		raise ValueError, "mode must be merge or replace"

	objItems = desviewer.Items
	main = objItems.GetItem(main)
	other = objItems.GetItem(other)
	if main.SPSSType != 5 or other.SPSSType != 5:
		raise ValueError, "A specified item is not a pivot table or does not exist."
	#PivotTable= other.ActivateTable()
	PivotTable = other.GetTableOleObject()
	try:
		otherdict, cols = gettbl(PivotTable.RowLabelArray() , PivotTable.ColumnLabelArray(), PivotTable.DataCellArray(),
			rowfunc, colfunc)
		othertitle = PivotTable.TitleText
		othercaption = PivotTable.CaptionText
	finally:
		#other.Deactivate()   # make sure table is deactivated
		pass
	originalWidth = main.Width
	#PivotTable = main.ActivateTable()
	PivotTable = main.GetTableOleObject()   #new
	try:
		layerwarning = PivotTable.LayerLabelArray().NumDimensions > 0
		PivotTable.UpdateScreen = False
		colarray = PivotTable.ColumnLabelArray()
		settbl(PivotTable.RowLabelArray() , colarray,PivotTable.DataCellArray(), otherdict, label,
		       rowfunc, colfunc, mode)
		if addlabelleaf:
			appendleaf(colarray, colfunc, label, cols, mode)
		if appendtitle:
			PivotTable.TitleText= PivotTable.TitleText + "\r\n" + othertitle
		if appendcaption:
			PivotTable.CaptionText = PivotTable.CaptionText + "\r\n" + othercaption
		PivotTable.Autofit()
	finally:
		if layerwarning and layerfootnote != "":
			PivotTable.SelectTitle()
			PivotTable.InsertFootnote(layerfootnote or "Only the first layer has been merged from the secondary table.")
		PivotTable.UpdateScreen = True
		main.Activate()
		main.Deactivate()
		#PivotTable.UpdateScreen = True
	if hide:
		other.Visible = False
##	main.Activate()
##	main.Deactivate()

def appendleaf(colarray, colfunc, label, cols, mode, separator="\r\n"):
	"""append the leaf element of colarray to matching items in cols at label, if any.
	If mode != 'merge', then replace the leaf instead of appending."""
	
	merge = mode == 'merge'
	coldict = {}
	for tup in cols:
		coldict[colfunc(tuple(tup),True)] = tup[-1]  #need to be able to trim here.  was False
	nr = colarray.NumRows-1
	for j in range(colarray.NumColumns):
		try:
			colarray.SetVAlignAt(nr, j, 0)   #force vertical alignment
		except:
			pass
		if label is None or label == colarray.ValueAt(nr,j):
			tbltup = colfunc(maketup(colarray, j, 'col'), False)
			if tbltup in coldict:
				if merge:
					colarray.SetValueAt(nr,j,  colarray.ValueAt(nr,j) + separator + coldict[tbltup])
				else:
					colarray.SetValueAt(nr,j,  coldict[tbltup])
				
		
def gettbl(rowlabels, collabels, datacells, rowfunc, colfunc):
	"""return dictionary indexed by rowlabel and collabel tuple of datacells values and a listof the row label tuples with col identifier
	Dictionary containing a duple of the datacell value and the column identifier for test purposes e.g., (A).
	rowfunc and colfunc are functions applied to the label tuples before using them as keys.
	"""
	
	tbldict = {}
	cols = []
	for j in range(datacells.NumColumns):	
		collabeltup = maketup(collabels, j, 'col')
		cols.append(collabeltup)
		for i in range(datacells.NumRows):
			rowlabeltup = maketup(rowlabels, i, 'row')
			tbldict[(rowfunc(rowlabeltup, True), colfunc(collabeltup, True))] = fmtcell(datacells, i, j)
	return tbldict, cols

def maketup(obj, index, dim):
	"return tuple of row or column values of obj"
	lis = []
	if dim == 'row':
		for i in range(obj.NumColumns):
			lis.append(obj.ValueAt(index, i))
	else:
		for j in range(obj.NumRows):
			lis.append(obj.ValueAt(j, index))
	return tuple(lis)

def settbl(rowlabels, collabels, datacells, tbldict, label, rowfunc, colfunc, mode, separator="\r\n"):
	"""Modify table to incorporate "other" nonblank table values.  Set all cell vertical alignment to top to insure they still line up
	and horizontal to right aligned for elements converted to strings.
	If, however, mode == 'replace' the values from the other table just replace the main table values.
	other formatting is preserved, but horizontal alignment may be changed.
	rowfunc and colfunc are functions applied to the label tuples before using them as keys.
	separator is inserted between the existing label and the attachment."""
	
	mergemode = mode == 'merge'
	for i in range(datacells.NumRows):
		rowlabeltup = maketup(rowlabels, i, 'row')
		for j in range(datacells.NumColumns):
			if mergemode:
				try:
					datacells.SetVAlignAt(i,j, 0)   #vertical alignment = SpssVAlTop
				except:
					pass
			collabeltup = maketup(collabels, j, 'col')
			key = (rowfunc(rowlabeltup,False), colfunc(collabeltup, False))
			otherval = tbldict.get(key, None)
			if otherval is not None:
				if (label is None or collabeltup[-1].upper() == label.upper()) and not str(otherval).isspace():
					#dataval = datacells.ValueAt(i,j)
					if mergemode:
						newval = fmtcell(datacells, i, j) + separator + str(otherval)
						datacells.SetValueAt(i,j, newval)
						datacells.SetHAlignAt(i,j,1)  # right align cell
					else:
						if isinstance(otherval, basestring) and not isinstance(datacells.ValueAt(i,j), basestring):
							datacells.SetHAlignAt(i,j,1)  # right align cell
						datacells.SetValueAt(i,j, otherval)   # keep target table formatting (mostly).

def fmtcell(cells, i, j):
	"""return formatted value of cell(i,j).  If format matches some #.# or ##.#%, appropriate string is returned; otherwise
	simple formatting is used."""
	value = cells.ValueAt(i,j)
	fmt = cells.NumericFormatAt(i,j)
	decimals = cells.HDecDigitsAt(i,j)
	if fmt.startswith("#") or fmt.startswith("$"):
		try:
			value = "%.*f" % (decimals, value)
			if fmt.startswith("$"):
				value = "$" + value
			if fmt.endswith("%"):
				value = value + "%"
		except:
			value = str(value)
	else:
		value = str(value)
	return value

# These functions are used by default to transform row and column label tuples when accessing the main or
# other table.

def stdrowfunc(tup,othertable):
	"""tup is the tuple of row labels for the current row.  By default it is passed back unmodified.
	You can supply your own rowfunc to transform it when the tables being merged do not have the
	same structure or if you want to prevent the merging of certain rows.  Note that the tuple
	starts with "row" or "column", so the first normally visible element is tup[1].
	othertable is True if the function was called while processing the "other" table
	and False if processing the main table."""
	if debug:
		print "row:", (othertable and "other:" or "main:"), tup
	return tup

def stdcolfunc(tup, othertable):
	"""tup is the tuple of column labels for the current column.  By default it is passed back with the
	last item deleted.  See additiona comments under rowfunc"""
	if debug:
		print "col:", (othertable and "other:" or "main:"), tup

	return tup[:-1]


# Part 2
def censorLatest(cmd=None, desout=None, tablenum=None, critfield='Count', 
		critvalue=5, symbol=" ",  neighborlist=[1], direction='row', testtype="<", appendcaption=True,
		othercaption=None, hidecrit=False):
	"""Run the command(s), cmd and censor specified cells based on criterion and return the number of cells censored.
	
	If cmd is empty or not supplied, it is not run, and the requisite tables should already exist.
	if tablenum is specified, it is the absolute table number to process.
	Otherwise, the most recent table is used.
	Set appendtitle and appendcaption to False to suppress the corresponding append.
	desout can be specified as the handle to the designated Viewer.  If None, this module will find it.
	
	critfield specifies the leaf text of the cell to use for the criterion value.
	critvalue is the threshold value.  The absolute value of a cell is tested against the criterion value.
	neighborlist is a list of relative positions that should be censored.  Positive to the right/above,
	negative to the left/below.  In order to censor the criterion field itself, include 0 in neighborlist
	direction is 'row' for items in the same row or 'col' for items in the same column.
	The label is expected in the dimension opposite to direction.
	symbol is the value that should replace a censored field, defaulting to blank.
	hidecrit if True causes the criterion row or column to be hidden.
	
	Note that censoring a field may still leave its value discoverable if it is included in a total.
	
	Example: censor statistics in  a table when counts are small:
	cmd="CTABLES /TABLE origin BY accel [MEAN, COUNT]  /SLABELS POSITION=ROW."
    rc= tables.censorLatest(critvalue=10, desout=desout, neighborlist=[-1], direction='col')
	
	Example: blank out insignificant correlations in the output from CORRELATIONS:
	cmd=r"CORRELATIONS  /VARIABLES=availblt avg_purc chckout"
    tables.censorLatest(cmd=cmd, critvalue=.01, critfield="Sig. (2-tailed)", 
	    testtype=">", neighborlist=[0,1,-1], direction='col')

	"""
	if cmd:
		spss.Submit(cmd)
	if desout is None:
		desout= viewer.spssapp().GetDesignatedOutput()
	if not tablenum:
		items = desout.Items
		itemkt = items.Count-1
		while itemkt >= 0 and not tablenum:
			if items.GetItem(itemkt).SPSSType == 5:  #a pivot table
				tablenum = itemkt
			itemkt-= 1
	if not tablenum:
		raise ValueError, "No table found to process."

	censorkt = tcensor(desout, tablenum, critfield, critvalue, symbol, neighborlist, direction, testtype,
		appendcaption=appendcaption, othercaption=othercaption, hidecrit=hidecrit)
	return censorkt

def tcensor(desviewer, main, critfield='Count', 
		critvalue=5, symbol=" ", neighborlist=[1], direction='row', testtype='<',
		appendcaption=True,othercaption=None, hidecrit=False):
	"""censor the cells of table  main in designated Viewer window."""

	objItems = desviewer.Items
	main = objItems.GetItem(main)
	if main.SPSSType != 5 :
		raise ValueError, "A specified item is not a pivot table or does not exist."
	
	# create criterion function
	try:
		ttype = ['<', '<=','=','==','>','>=','!=', '~='].index(testtype)
		if ttype == 0:
			olist = [True, False, False]
		elif ttype == 1:
			olist = [True, True, False]
		elif ttype == 2 or ttype == 3:
			olist = [False, True, False]
		elif ttype == 4:
			olist = [False, False, True]
		elif ttype == 5:
			olist = [False, True, True]
		elif ttype == 6 or olist == 7:
			olist = [True, False, True]
	except:
		raise ValueError, "Invalid comparison type: " + testtype
	
	def crittest(value):
		try:
			c = cmp(abs(value), critvalue)
			return olist[c+1]
		except:
			return False

	PivotTable = main.GetTableOleObject()
	censorkt = 0
	try:
		PivotTable.UpdateScreen = False
		if direction == 'row':
			lblarray = PivotTable.ColumnLabelArray()
		elif direction == 'col':
			lblarray = PivotTable.RowLabelArray()			
		else:
			raise ValueError, "direction must be 'row' or 'col'"
		censorkt = censortbl(lblarray, PivotTable, critfield, crittest, symbol, 
							 neighborlist, direction, hidecrit)
		if appendcaption:
			if othercaption is None:
				othercaption = "Number of values censored because of " + critfield + ": " + str(censorkt)
			PivotTable.CaptionText = PivotTable.CaptionText + "\r\n" + othercaption
		PivotTable.Autofit()
	finally:
		PivotTable.UpdateScreen = True
		main.Activate()
		main.Deactivate()
		return censorkt
		
def censortbl(labels, PivotTable, critfield, crittest, symbol, neighborlist, direction, hidecrit):
	"""Censor the table, returning the number of values censored.
	
	labels is the row or column label array where the critfield should be found.
	PivotTable is the table to process.
	crittest is a test function to be used for the comparison.
	symbol is the symbol used to replace the value.
	neighborlist is the list of neighboring cells in which to do the replacement when the test succeeds
	direction is row or col, which determines how to interpret neighborlist.
	hidecrit if True causes the criterion row or column to be hidden."""
	
	try:
		datacells = PivotTable.DataCellArray()
		rows = direction == 'row'
		if rows:
			dimsize = datacells.NumColumns
			otherdimsize = datacells.NumRows
			lbllimit = labels.NumRows - 1
		else:
			dimsize = datacells.NumRows
			otherdimsize = datacells.NumColumns
			lbllimit = labels.NumColumns - 1
		censorkt = 0
		foundcritfield = False
		
		c = LayerManager(PivotTable)
		for facenumber, face in enumerate(c.layers()):
			for i in range(dimsize):
				a1, a2 = _sargs(rows, lbllimit, i)
				if labels.ValueAt(a1, a2)  == critfield:
					foundcritfield = True
					for other in range(otherdimsize):
						aa1, aa2 = _sargs(rows, other, i)
						if crittest(datacells.ValueAt(aa1, aa2)):
							for offset in neighborlist:
								if 0 <= i+ offset < dimsize:
									aaa1, aaa2 = _sargs(rows, other, i + offset)
									datacells.SetValueAt(aaa1, aaa2, symbol)
									datacells.SetHAlignAt(aaa1, aaa2, 1)  # right align cell
									censorkt += 1
					if hidecrit and facenumber == 0:   # only need to hide once for all the layers
						hide(labels, rows, i, lbllimit)
	except AttributeError:
		print "Error in censortbl:", sys.exc_info()[0]
		print "This may indicate a need to run makepy on the SPSS Pivot Table type library"
		raise
	if not foundcritfield:
		print  "Warning: The criterion field was not found in the table: " + critfield
		raise AttributeError, "The criterion field was not found in the table: " + critfield

	return censorkt

def hide(array, rows, i, lastdim):
	"""Hide the data and labels at i.
	array is the array of row or column labels.
	i is the row or column number to hide.
	lastdim is the index of the innermost label row or column."""
	
	#collblarray = PivotTable.columnlabelarray()
	#lblnumrows = collblarray.numrows
	x, y = _sargs(rows, lastdim, i)
	array.HideLabelsWithDataAt(x, y)

def _sargs(row,x,y):
	""" return x, y, if row == True else y,x"""
	if row:
		return (x,y)
	else:
		return (y,x)
	
class LayerManager(object):
	def __init__(self, pt):
		"""pt is an activated pivot table"""
		self.ptmgr = pt.PivotManager()
		self.numlayerdims = self.ptmgr.NumLayerDimensions
		self. layercats = []
		self.layercurrcat = []
		for c in range(self.numlayerdims):
			self.layercats.append(self.ptmgr.LayerDimension(c).NumCategories)
			self.layercurrcat.append(self.ptmgr.LayerDimension(c).CurrentCategory)

	def layers(self):
		"""Generator to iterate over all the categories of all the dimensions in the layer.
		
		Returns the current category number, but the main purpose is to change the current category."""
		
		if self.numlayerdims == 0:
			yield 0
		else:
			for ld in range(self.numlayerdims):
				cc = self.layercurrcat[ld]
				for c in range(self.layercats[ld]):
					yield cc
					cc = (cc+1) % self.layercats[ld]
					self.ptmgr.LayerDimension(ld).CurrentCategory = cc