"""
a library of useful utility routines for working with the SPSS variable dictionary and other common tasks
Copyright (C) 2006, 2007 by SPSS Inc."""

import spss
import random
import copy, sys, os.path
import ConfigParser
import re, operator
import codecs

# All messages for this module are collected here for translation purposes.  Docstrings, however, are not.
# To translate, duplicate the message block and translate the later copy, maintaining the assignment syntax.
# The original can be commented out, but it will be replaced by the later version anyway, so this is not necessary.
# Maintaining the original English text will make it easier to update translations later.
# Place the translated block after all the English messages in order to facilitate checking for changes in the English text.

_msg1 = "An OMS subtype must be specified."
_msg2 = "Unequal number of dataset names and subtypes was specified."
_msg3 = "Invalid type specified"
_msg4 = "Invalid verify specified"
_msg5 = "cd must be an absolute path if used"
_msg6 = "filespec does not exist:"
_msg7 = "filespec is not of the specified type: "
_msg8 = "filespec does not exist or is not of the specified type: "
_msg9 = "getMissingValues2 requires at least SPSS 15"
_msg10 = "Variable not in this dictionary"
_msg11 = "start or end variable not in dictionary"
_msg12 = "A VariableDict is required if var is a string."
_msg13 = "Invalid variable type"
_msg14 = "Invalid variable measurement level or not a list"
_msg15 = "No variable values found"
_msg16 = "Invalid variable name."
_msg17 = "VariableDict pattern parameter specifies and invalid regular expression: "
_msg18 = """Error: Cannot set to original name.  File probably already exists.
        Opening under temporary name.  Original  name is\n"""
_msg19 = "Invalid variable or TO usage: "
_msg20 = "Invalid file type: %s"

#History
#    2005-10-3   tolerate extra group level in GetSHOW xml
#                         add GetProcessInstallDir
#    2005-10-6   add GetSPSSInstallDir
#    2005-10-27  fine tune GetDatasetInfo
#    2005-11-22  adjustments to VariableDict class for iterating and indexing
#    2005-12-01  allow variable names in VariableDict indexing and preserve creation order of variables
#    2006-01-13  provide for regular expression matches in in VariableDict creation and retrieving variable lists (Variables method)
#    2006-01-17  extend selection feature to include variable type and measurement level
#    2006-02-21  New functions getActiveDatasetName and CreateOutputDataset
#    2006-02-27  New method, Indexes, in VariableDict class, new function, isDateVariable,
#                and __int__ implemented for Variable class.  Added varsWithDuplicateLabels function
#                and to the VariableDict class, the variables property
#    2006-03-24 Add setters for Variable class properties
#    2006-03-28 Generalize createDatasetOutput to take a list of table subtypes
#    2006-03-30 Add range method to VariableDict class
#    2006-04-18 Added CreateFileHandle
#    2006-04-28 Added __str__ method to Variable object, returning the variable name
#    2006-06-29 Added datasetvisible parameter to createDatasetOutput function
#    2006-07-11  Added CreateAttribute and getSpssVersion functions and 
#                          implement __eq__,  __ ne__, and __hash__ for Variable class so sets of variables work
#                          Add dataset activate and close methods to VariableDict class.
#    2006-08-01 Changes to GetSPSSInstallDir and getSpssVersion to support SPSS 15
#    2006-08-14 Added getMissingValues2 for SPSS15
#    2006-08-22 Added GetSPSSMajorVersion
#    2006-10-09 Enhanced getValuesFromXmlWorkspace for tables without a row dimension or without a column dimension
#    2006-12-15  ValueLabelsTyped support for getting the labelled values according to the variable's type
#    2007-01-12  Added openDataFileFromUrl
#    2007-02-06 Handle value label for a blank in GetValueLabels function and property
#    2007-03-22 Standardize casing of function and class names and define aliases for old names
#    2007-04-24 Rename variables method of VariableDict to variablesf to avoid conflict with variables property
#    2007-06-20 Add expand method to VariableDict
#    2007-06-26 Bug fix for creating value labels
#    2007-09-10 Adjust _GetSPSSLocFromIni for SPSS 16 location
#    2007-09-27 Generalize OpenDataFile to handle Excel, SAS, and Stata files with all options
#    2007-10-03  Add u and truncatestring functions for Unicode support

__author__  =  'spss'
__version__ =  '2.2.5'
version = __version__

# global variable tracebacklimit is used to save and restore traceback limit state
tracebacklimit = None

def getSpssVersion():
    """Return the SPSS version."""
    
    # This code is needed because the 14.0.x version of the spss module does not have an __version__ variable
    try:
        return spss.__version__
    except:
        try:
            sv = spss.Cursor
            sv = '14.0.1'
        except:
            sv = '14.0.0'
        return sv

ok1600 = int(getSpssVersion().split(".")[0]) >= 16

def _GetVariableInfo(fn, vars=None):
    """_GetVariableInfo(vars) ->list of specified information.
    The result is -
        all variables if no argument is supplied
        a list of variables if a list of positions is supplied
        a one-item list if a single position is supplied
"""
    
    names = []
    if isinstance(vars, (list, tuple)):         #type(vars) is a sequence:
        names = copy.copy(vars)
    elif vars is None:
        names = [v for v in range(spss.GetVariableCount())]
    else:
        names = [vars]
    return [fn(v) for v in names]

def    createXmlOutput(cmd, omsid=None, subtype='', visible=False):
    """
    Create a tagged item in the XMLworkspace containing the output from an SPSS command.
    
        cmd is an SPSS command or sequence of commands.  
        omsid is the OMS identifier for the command whose output is to be captured.  
        (Other commands in the sequence will not be captured,  but all executions of the command 
        with this identifier will be in a single workspace object.)
        The omsid is not case sensitive.
        If omsid is not included, the value of cmd up to the first whitespace is used, except
        that any trailing period is removed.
        If subtype, which only applies to tables, is specified, only tables of that type are included.
        subtype can be a single subtype or a list of subtypes.  It is ignored for items other than tables.
        By default, the captured output is excluded from the Viewer, if visible is True, the output
        will be displayed.  Note that visibility only affects the output for the omsid command.

    Return value is a two-tuple consisting of the workspace tag by which to retrieve the output,
    which should be destroyed when no longer needed, and the last (maximum) error level of the request.  
    The error level will be zero if there were no SPSS errors.
    """
    tag = "S"+ str(random.uniform(0, 1))
    if omsid is None:
        omsid = cmd.split()[0]
        if omsid[-1] == ".": 
            omsid = omsid[0:-1]
    if cmd[-1] != ".":
        cmd += "."
    if subtype:
        if isinstance(subtype, (list, tuple)):
            subtype = " ".join(["'" + s + "'" for s in subtype])
        else:
            subtype = "'" + subtype + "'"
        subtype = " subtype=[" + subtype + "]"
    s = ["OMS /if commands='" + str(omsid) + "'" + subtype,
       " /DESTINATION " \
       + (not visible and "Viewer=no " or "") \
       + "format=oxml xmlworkspace='" + tag + "'",
       "/TAG = '" + tag +"'.",
    cmd,
    "OMSEND TAG='" + tag + "'."]
    ###print s     #debug
    try:
        spss.Submit(s)
    except:
        pass
    return (tag, spss.GetLastErrorLevel())

def    _XPathSelector(category, attrib):
    """Create XPath qualifier fragment from a category and value
    If category is a list, a sequence of nested predicates is constructed
    """
    if category:
        c = category
        if not isinstance(c, list):        #type(c) is not list:
            c = [c]
        return reduce(lambda x, y: x + '//category[@' + attrib + '="' + y + '"]', c,"")
    else:
        return "//category"


def    getValuesFromXmlWorkspace(tag, tableSubtype, rowCategory=None, rowAttrib="text",
        colCategory=None, colAttrib="text", cellAttrib="*", xpathExpr=False, hasRows=None, hasCols=None):
    """Return a list of 0 or more values from a table item in the XMLWorkspace and, optionally,
    return the xpath expression for the request.

        tag is the XMLWorkspace identifier to check.
        tableSubtype is the subtype of the table from which to retrieve data.  It is the
        OMS table type identifier.
        rowCategory is the optional row category value to fetch.  If omitted, all rows
        meeting the other criteria are returned.
        rowAttrib is the attrib to use with rowCategory.  It defaults to text but may
        be any type in the table row labels such as label or varName
        colCategory is the optional column category to fetch.  If omitted, all columns
        meeting the other criteria are returned.
        colAttrib is the attribute type for colCategory.  It defaults to text but may
        be any type in the column labels such as label or varName
        cellAttrib is the attribute type to return.  It defaults to all attributes, but it may usefully
        be specified as number, date, or text.
        Some tables have no row dimension: set hasRows=False to remove the row selector from the xpath expression
        Some tables have no column dimension: set hasCols=False to remove the column selector from the xpath expression
        If the xpath expression returns an empty list and neither hasRows nor hasCols was specified, it is retried without the row dimension
        If still empty, it is tried without the col dimension
        If xpathExpr is True, the returned result is a 2-tuple with the first element as
        the result and the second as the xpath expression.
    """
    
    hasr = hasRows != False
    hasc = hasCols != False
    xpathbase = """//pivotTable[@subType='%(tableSubtype)s']"""
    xpathrows = """//dimension[@axis='row']%(rowselector)s"""
    xpathcols = """//dimension[@axis='column']%(colselector)s"""
    xpathtail =  """/cell/@%(cellAttrib)s"""
    
    d = {"tableSubtype": tableSubtype,
           "rowselector": _XPathSelector(rowCategory,rowAttrib),
           "colselector": _XPathSelector(colCategory,colAttrib),
           "cellAttrib": cellAttrib}
    xpath = (xpathbase + (hasr and xpathrows or '') + (hasc and xpathcols or '') + xpathtail) % d    
    res = spss.EvaluateXPath(tag, "/outputTree", xpath)
    
    # if Xpath query got nothing and caller did not declare no rows or no cols
    # so find out whether the table has row or column axes and adjust accordingly
    if res == []  and hasRows is None and hasCols is None:
        if rowCategory is None:
            hasr =  spss.EvaluateXPath(tag, "/outputTree", (xpathbase + "//dimension[@axis='row']") %d ) != []
        if colCategory is None:
            hasc =  spss.EvaluateXPath(tag, "/outputTree", (xpathbase + "//dimension[@axis='column']") %d) != []
        if hasr or hasc:
            xpath2 = (xpathbase + (hasr and xpathrows or '') + (hasc and xpathcols or '') + xpathtail) % d
            if xpath != xpath2:
                xpath = xpath2
                res = spss.EvaluateXPath(tag, "/outputTree", xpath)

    if xpathExpr:
        return res, xpath
    else:
        return res

def createDatasetOutput(cmd, omsid=None,  subtype=None, visible=False, newdsn=None, datasetvisible=True):
    """
    Create a dataset containing the output from a selected table or list of tables of an SPSS command sequence.
    
        cmd is a single SPSS command or a sequence of commands.
        omsid is the OMS identifier for the command whose output is to be captured.
        (Other commands in the sequence will not be captured.)
        The omsid and subtypes are not case sensitive.
        If omsid is not included, the value of cmd up to the first whitespace is used, except
        that any trailing period is removed.  For this convention to work, do not use command abbreviations.
        subtype must be specified: only tables of those types are included.
        By default, the cmd output is excluded from the Viewer, if visible is True, the output
        will be displayed.
        By default, the dataset(s) created will be visible.  If datasetvisible is False, the datasets will be hidden.
        Random names are assigned by default to the new datasets.  newdsn can be used to specify a
        different name.  The number of items in newdsn must match the number of subtypes.
    
    Side effect: if the current active dataset does not have a name, it is assigned one in order to
    preserve it.

    Return value is a tuple of the names of the dataset created and the last (maximum) error level of
    the request. The error level will be zero if there were no SPSS errors.

    Example:
    tag, err = spssaux.createDatasetOutput("FREQUENCIES year/ntiles=4",
    subtype='statistics', visible=True, newdsn='freqs')
    print "output dsn:", tag, "error code:", err
    """
    if subtype is None:
        raise ValueError, _msg1
    subtype = _listify(subtype)
    if newdsn:
        newdsn = _listify(newdsn)
        if len(newdsn) != len(subtype):
            raise ValueError, _msg2
    tag = ["S" + str(random.uniform(0, 1)) + "x" for i in range(len(subtype))]  #used both for OMS and as the dataset name unless newdsn is given
    tagtherest = "S" + str(random.uniform(0, 1)) + "x"
    if not newdsn:
        newdsn = tag
    dsn = getActiveDatasetName()        #a name must be assigned if there isn't one already
    if not dsn:
        dsn = "D" + str(random.uniform(0, 1)) + 'x'
        spss.Submit("DATASET NAME " + dsn)
    if omsid is None:
        omsid = cmd[:cmd.find(" ")]
        if omsid[-1] == ".": 
            omsid = omsid[0:-1]
    if datasetvisible:
        dsv = ""
    else:
        dsv = " WINDOW=HIDDEN"
    for itag, idsn, isubtype in zip(tag, newdsn, subtype):                                    
        spss.Submit(["DATASET DECLARE " + idsn + dsv + ".",
        "OMS SELECT TABLES /IF COMMANDS='" + str(omsid) + "' SUBTYPE='" + isubtype + "'",
        " /DESTINATION " + (not visible and "Viewer=no " or "")   + " FORMAT=SAV OUTFILE='" + idsn + "'",
        "/TAG = '" + itag +"'."])
    if not visible:
        spss.Submit("OMS /DESTINATION VIEWER=NO/TAG='" + tagtherest + "'.")
    try:
        err = 0
        spss.Submit(cmd)
    except:
        err = spss.GetLastErrorLevel()
    spss.Submit("OMSEND TAG=['" + "' '".join(tag) + "'].")
    if not visible:
        spss.Submit("OMSEND TAG='" + tagtherest + "'.")
    spss.Submit("DATASET ACTIVATE " + dsn)
    newdsn.append(err)
    return newdsn

def getVariableNamesList(vars=None):
    "Return a list containing variable names.  vars is an index or a list of indexes"
    fn = spss.GetVariableName
    return _GetVariableInfo(fn, vars)

def getVariableFormatsList(vars=None):
    "Return a list containing variable formats.  vars is an index or a list of indexes"
    fn = spss.GetVariableFormat
    return _GetVariableInfo(fn, vars)

def getVariableLabelsList(vars=None):
    "Return a list containing variable labels.  vars is an index or a list of indexes"
    fn = spss.GetVariableLabel
    return _GetVariableInfo(fn, vars)

def getVariableMeasurementLevelsList(vars=None):
    "Return a list containing variable measurement levels.  vars is an index or a list of indexes"
    fn = spss.GetVariableMeasurementLevel
    return _GetVariableInfo(fn, vars)

def getVariableTypesList(vars=None):
    "Return a list containing variable types.  vars is an index or a list of indexes"
    fn = spss.GetVariableType
    return _GetVariableInfo(fn, vars)

# SHOW command output functions
def getShow(item, olang=None):
    """Return the output for "item" as a string, where item is a SHOW command keyword.
    
    Item must appear in the System Settings table. For the few items where there are multiple fields,
    the fields are separated with ";". An empty string will be returned for an invalid keyword.
    olang specifies the output language that should be set for the output, defaulting to current.
    An exception is raised if the olang is not available."""
    
    try:
        if not olang is None:
            spss.Submit(["PRESERVE.", "SET OLANG = " + olang + "."])
        

        tag, ignore = createXmlOutput("SHOW " + item + ".", "SHOW")
        
        # The xpath expression below is designed to work with all output languages
        result = ";".join(spss.EvaluateXPath(tag, '/',
        '//pivotTable[@subType="System Settings"]/dimension//category/dimension/category[2]/cell/@text'))
    finally:
        if not olang is None:
            spss.Submit("RESTORE")
        if sys.exc_info()[0] is None:
            spss.DeleteXPathHandle(tag)
    return result
    
# find the Process installation directory
def getProcessInstallDir(handle=None): 
    """Return the path to the spss executable or whatever directory the current process was launched
    from. If handle is specified, an SPSS FILE HANDLE for that location is created."""
    
    spssloc = os.path.dirname(sys.executable)
    if handle:
        if not isinstance(handle, basestring):
            raise TypeError("file handle must be a string")
        spss.Submit("FILE HANDLE " + handle + " /NAME='" + spssloc + "'.")
    return spssloc

# find the SPSS installation directory (works in both xd and dx modes)
def getSpssInstallDir(handle=None):
    """Return the path to the spss executable. If handle is specified, an SPSS FILE HANDLE for that
    location is created."""
    
    spssloc = os.path.dirname(sys.executable)
    if not os.path.basename(spssloc).lower().startswith("spss"):
        spssloc = _getSPSSLocFromIni()
    if not spssloc:
        raise AttributeError("Cannot determine SPSS location")
    if handle:
        if not isinstance(handle, basestring):
            raise TypeError("file handle must be a string")
        spss.Submit("FILE HANDLE " + handle + " /NAME='" + spssloc + "'.")
    return spssloc

def createFileHandle(filespec, handlename, type=None, verify=None, cd="", subcommands=""):
    """Create an SPSS File Handle named handlename. 
    
        If type == "path", assume that filespec is a directory; 
        If type == "file", assume that it refers to a file (with an optional path)
        
        If verify == None, no validity check is made; 
        If verify == "full", check that the complete spec exists and has the type specified, if any.
        If verify == "path", check that the path is valid but succeed if the type is "file" and the path to the file exists even if the file does not).
        
        On failure, raise an exception.
        If filespec is or has a relative path, it will be evaluated against the process current working directory when verifying.  This might not be
        the SPSS Processor working directory.  Specify the working directory to be used for a relative path in the cd parameter,
        which must be an absolute path if used.
        subcommands can be specified as a string containing other parameters of the FILE HANDLE command such as /LRECL.
    """
    
    if not type in [None, "path", "file"]:
        raise ValueError, _msg3
    if not verify in [None, "full", "path"]:
        raise ValueError, _msg4
    if verify and not os.path.isabs(filespec):
        if cd != "" and not os.path.isabs(cd):
            raise Exception, _msg5
        filespec = os.path.join(cd, filespec)
        
    if verify == "full":
        if not os.path.exists(filespec):
            raise Exception, _msg6 + filespec
        if (type == "path" and not os.path.isdir(filespec)) or (type == "file" and not os.path.isfile(filespec)):
            raise Exception, _msg7 + filespec
    if verify == "path":
        if type == "file":
            dirname = os.path.dirname(filespec)
        else:
            dirname = filespec
        if not os.path.isdir(dirname):
            raise Exception, _msg8 + dirname
        
    cmd = "FILE HANDLE %(handlename)s /NAME '%(filespec)s' %(subcommands)s" % locals()
    spss.Submit(cmd)
    

# get Variable attributes for selected variable
def getAttributesDict(varname=None):
    """return a dictionary of attributes and their values for variable varname.
    
    If no varname is supplied, datafile attributes are returned instead.
    The attribute names are the keys.  For array attributes, the subscript is part of
    the name.  E.g., an array attribute might have the name "y[1]".
    """
    if varname:
        subcmd = "/variable=" + varname
        attnamespath = \
            "//pivotTable[@subType='Variable Attributes']/dimension/category/dimension/category/@text"
        attvaluespath = \
    "//pivotTable[@subType='Variable Attributes']/dimension/category/dimension/category/dimension/category/cell/@text"
    else:
        subcmd = ""
        attnamespath = \
            "//pivotTable[@subType='Datafile Attributes']/dimension/category/@text"
        attvaluespath = \
            "//pivotTable[@subType='Datafile Attributes']/dimension/category/dimension/category/cell/@text"
    
    tag, ignore = createXmlOutput("display attributes " + subcmd + ".", "File Information")
    
    attnames = spss.EvaluateXPath(tag, '/outputTree', attnamespath)
    attvalues = spss.EvaluateXPath(tag, '/outputTree', attvaluespath)
    spss.DeleteXPathHandle(tag)    
    return dict(zip(attnames, attvalues))

def getDatasetInfo(Info="Data"):
    """Return selected information on the current active dataset.  Value is empty if none.
    
        Info can be
            Data    name of the active file (empty if unnamed)
            Filter    filter variable or empty
            Weight    weight variable or empty
            SplitFile    comma-separated split variable(s) or empty
        If there is no active file but there IS a dataset name, the dataset name is returned.
        Except for Data, the form of the information returned depends on the SPSS Output Labels
        preference setting.
    """
    InfoTypes = {"data" : 0 , "filter" : -3, "weight" : -2, "splitfile" : -1}
    Info = Info.lower()
    if not Info in InfoTypes:
        raise ValueError
    
    tag = createXmlOutput("show dir")
    cf = spss.EvaluateXPath(tag[0], "/outputTree",
        "//pivotTable[@subType='Notes']/dimension[@axis='row']/group[1]/category/cell/@text|@label")
    #cf will have three entries if no active file; otherwise four or five after discarding possible case count
    spss.DeleteXPathHandle(tag[0])
    try:
        x = int(cf[-1])        # if last item is a number, it is the case count (hack)
        cf = cf[:-1]
    except: pass
    if Info == "data":
        if len(cf) >= 4:
            return cf[0].replace("\\", "/").rstrip()
        else:
            return ""    #no entry for file is present if there is no named active file
    else:
        item = cf[InfoTypes[Info]]
        if item.startswith("<"):
            return ""
        else:
            return item

def getActiveDatasetName():
    """Return the dataset name for the active dataset or None if unnamed."""
    
    d = {"tag" : "S"+ str(random.uniform(0, 1)), "tag2" : "S"+ str(random.uniform(0, 1))}
    spss.Submit(
        """OMS /IF SUBTYPES='Datasets'/DESTINATION XMLWORKSPACE='%(tag)s' FORMAT=OXML VIEWER=NO/TAG='%(tag)s'.
        OMS /DESTINATION VIEWER=NO/TAG='%(tag2)s'.
        DATASET DISPLAY.
        OMSEND TAG=['%(tag)s' '%(tag2)s'].""" % d)
    dsn = spss.EvaluateXPath(d["tag"], "/", "//pivotTable/dimension//cell/footnote/../@text")
    spss.DeleteXPathHandle(d["tag"])
    if dsn[0].startswith("("):
        return None
    else:
        return dsn[0]

def    getValueLabels(varindex, matchtype=False):
    """Get the value labels for the variable with index varindex.  varindex may be an int or a Variable object.
    Returns a Python dictionary with values and labels.
    
        If matchtype is False (the default), all values are returned as strings.
        If matchtype is True, values are converted to doubles for numeric variables
    """
    varindex = int(varindex)
    vn = spss.GetVariableName(varindex)
    vt = spss.GetVariableType(varindex)
    xptail = vt == 0 and "@number" or "@string"
    
    tag, errlevel = createXmlOutput("DISPLAY DICTIONARY /VARIABLES=" + vn + ".", "File Information")
    if errlevel:
        raise Exception, spss.GetLastErrorMessage()
    
    ###spss.GetXmlUtf16(tag, filename="c:/temp/dis.xml")
    valpath = "pivotTable[@subType='Variable Values']/dimension/group/category/" + xptail
    labpath = "pivotTable[@subType='Variable Values']/dimension/group/category/@label"
    vallist = spss.EvaluateXPath(tag, '/outputTree/command', valpath)
    if matchtype and vt == 0:
        vallist = [float(v) for v in vallist]
    lablist = spss.EvaluateXPath(tag, '/outputTree/command', labpath)
    spss.DeleteXPathHandle(tag)
    
    # if the variable type is string and there is a label for a blank value, there may be no string (or text) attribute for the
    # value.  If the label and value list lengths differ by one, assume a blank value occurs first
    if vt > 0 and len(vallist) - len(lablist) == -1:
        vallist.insert(0, " ")

    # there will be no variable values table if no labels are defined.
    return dict(zip(vallist, lablist))

def getMissingValues(varindex):
    """Return a string of the missing values for the variable with index varindex.
    
        varindex may be a number or a Variable object. The values are comma separated and are formatted
        according to the variable's format and, for range specifications, are expressed in a
        language-sensitive way. String codes are quoted. 
        
        If no missing values are defined, the returned string is empty. """
    
    varindex = int(varindex)
    tag, ignore = createXmlOutput("DISPLAY DICTIONARY /VARIABLE=" + spss.GetVariableName(varindex) + ".",
        "File Information")
    result = spss.EvaluateXPath(tag, '/outputTree',
    "//pivotTable[@subType='Variable Information']/dimension/category/dimension/category[8]/cell/@text")
    spss.DeleteXPathHandle(tag)
    return len(result) and result[0] or ""

def getMissingValues2(varindex):
    """Return a 4-tuple of missing value codes for the variable with index varindex.
    varindex may be a number or a Variable object.
    This function requires SPSS 15.
    The first element of the tuple indicates the type of the following triple.
      0 = simple values
      1 = range
      2 = range plus one additional missing value
    Unused slots have value None"""
    try:
        return spss.GetVarMissingValues(int(varindex))
    except AttributeError:
        print _msg9
        raise

def openDataFile(filespec, datasetName=None, dataset=None, varlist=None, filetype='sav', **kwargs):
    """Open an SPSS, Excel, SAS, or Stata data file.
    
    filespec is required and is a string that identifies the file to open.
    datasetName is optional and specifies a dataset name to be assigned to the file,
    which allows the file to remain open and usable when another data file is opened.
    dataset is an alternative name for the datasetName parameter.  Only one should be used.
    
    filetype can be sav, xls, xlsm, xlsx, sas, or stata.
    
    Options for SAV:
    a single variable name or a list of variable
    names to be retained in the opened file.
    
    Options for Excel:
    assumedstrwidth.  Default 32767
    sheet: index or name.  Default: index
    sheetid: the sheet number.  Default: 1
    cellrange: FULL or RANGE.  Default: FULL
    rangespec: required if RANGE: No default.
    readnames: ON or OFF.  read names from first row.  Default: ON
    
    SAS:
    dset: dataset name within file.  Default: first dataset
    formats: a formats file to read.  Default: none
    
    Stata:
    No options.
    
    Parameters are not validated: if they are not valid, an SPSS error will be produced.
    """

    filetype = filetype.lower()
    filespec = _smartquote(filespec)
    
    if filetype == "sav":
        varlist = kwargs.get("varlist", " ")
        if varlist.strip():
            varlist = " /KEEP=" + (isinstance(varlist, basestring) and varlist or ' '.join(varlist))
        cmd = "GET FILE= %(filespec)s %(varlist)s"
        
    elif filetype in ['xls', 'xlsx', 'xlsm'] :    # Excel
        assumedstrwidth = kwargs.get("assumedstrwidth", 32767)
        sheet = kwargs.get("sheet", "INDEX")
        sheetid = kwargs.get("sheetid", 1)
        if isinstance(sheetid, basestring):
            sheetid = _smartquote(sheetid)
        cellrange= kwargs.get("cellrange", "FULL")
        rangespec = kwargs.get("rangespec", " ")
        if rangespec.strip():
            rangespec = '"' + rangespec + '"'
        readnames = kwargs.get("readnames", "ON")

        cmd = """GET DATA /TYPE= %(filetype)s /FILE=%(filespec)s /ASSUMEDSTRWIDTH=%(assumedstrwidth)d
        /SHEET=%(sheet)s %(sheetid)s /CELLRANGE=%(cellrange)s %(rangespec)s /READNAMES=%(readnames)s"""
        
    elif filetype == "sas":
        dset = kwargs.get("dset", " ")
        if dset.strip():
            dset = "DSET(" + _smartquote(dset) + ")"
        formats = kwargs.get("formats", " ")
        if formats.strip():
            formats = "/FORMATS=" + _smartquote(formats) 
        cmd = "GET SAS DATA=%(filespec)s %(dset)s %(formats)s"
        
    elif filetype == "stata":
        cmd = "GET STATA FILE=" + filespec
        
    else:
        raise ValueError, _msg20 % filetype
    
    kwargs.update(locals())
    cmd = cmd % kwargs

    spss.Submit(cmd)
    
    datasetName = datasetName or dataset
    if datasetName:
        spss.Submit("DATASET NAME " + str(datasetName))

def openDataFileFromUrl(url,  *args, **kwds):
    """Copy a data file from a location specified by a url and open it.
    
    url is the location from which to retrieve the file.
    The other arguments are identical to the openDataFile parameters.
    
    The file is restored to its original name if possible."""
    
    import urllib, os
    localfilename, headers = urllib.urlretrieve(url)
    unqname = urllib.unquote_plus(url)
    truefilename = os.path.split(unqname)[-1]
    localdir = "".join(os.path.split(localfilename)[:-1])
    newname = localdir + "/" + truefilename
    try:
        os.rename(localfilename, newname)
    except:
        print _msg18 + truefilename
        newname = localfilename
    openDataFile(newname, *args, **kwds)
    
def saveDataFile(filespec):
    """Save the active dataset as filespec.
    
    An SpssError exception will be raised if there is no active dataset."""
    
    # just for symmetry with openDataFile
    spss.Submit("SAVE OUTFILE='" + filespec + "'")

class Variable(object):
    """variable class that delegates to VariableDict to get properties dynamically based on index.
    If passing a Variable object to a function that takes only a variable index, use int(variable).
    Variable properties that are supported are
    VariableName (get)
    VariableIndex (get)
    VariableLabel (get, set)
    VariableLevel (get, set)
    VariableType (get)
    VariableFormat (get, set)
    ValueLabels (get, set by assigning a dictionary of values and labels)
    Attributes= (get, set)
    MissingValues (get, set by assigning a list of values.  For numeric, entry can include ranges, e.g., 99 thru high)
    MissingValues2(get)  returns 4-tuple.  Requires SPSS 15
"""
    def __init__(self, dict, index, indextype='VariableDict'):
        self.dict = dict
        if indextype == 'VariableDict':
            self.index = self.dict.vdict[index] #map from dict subscript to SPSS dict slot number
        elif indextype =='spss':
            self.index = index
        else:
            raise ValueError

    #  originally, repr displayed the variable index.  Changed to display variable name
    # To get the original behavior, uncomment the line below and comment out the one
    # following.
    def __repr__(self):
        #return str(self.index)
        return self.dict.VariableName(self.index)
    
    def __eq__(self, other):
        """Return True if self and other have the same variable name."""
        #return self.dict.VariableName(self.index) == other.dict.VariableName(other.index)
        return str(self) == str(other)
    
    def __ne__(self, other):
        """Return True if self and other have different variable names"""
        return str(self) != str(other)
    
    def __hash__(self):
        """Implement this function so that set operations will work as expected."""
        return hash(str(self))
    
    def _VN(self):
        "returns the name of the variable"
        return self.dict.VariableName(self.index)
    def _VI(self):
        return self.dict.VariableIndex(self.index)
    def __int__(self):      #so int(variable) will work
        return self.dict.VariableIndex(self.index)
    def __str__(self):     #for str(variable)
        return self.dict.VariableName(self.index)
    def _VarL(self):
        return self.dict.VariableLabel(self.index)
    def _VarLSet(self, value):
        "sets the variable label.  value is the string to be used as the label"
        spss.Submit("VARIABLE LABELS " + spss.GetVariableName(self.index) + " " + _smartquote(value) )
    def _VarLevel(self):
        return self.dict.VariableLevel(self.index)
    def _VarLevelSet(self, value):
        "sets the measurement level for the variable.  value must be nominal, ordinal, or scale"
        spss.Submit("VARIABLE LEVEL " + spss.GetVariableName(self.index) + " (" + value + ").")
    def _VarType(self):
        return self.dict.VariableType(self.index)
    def _VarFmt(self):
        return self.dict.VariableFormat(self.index)
    def _VarFmtSet(self, value):
        "set the variable format. value is an SPSS format specification"
        spss.Submit("FORMAT " + spss.GetVariableName(self.index) + " (" + value + ").")
    def _ValLab(self):
        return self.dict.ValueLabels(self.index)
    def _ValLab2(self):
        return self.dict.ValueLabelsTyped(self.index)

    def _ValLabSet(self, vldict):
        """set the value labels for the variable.  vldict is a dictionary of values and their labels.
        Existing labels are replaced."""
        vllist = " ".join([_smartquote(str(k)) + " " + _smartquote(str(v)) for k, v in vldict.items()])
        ###spss.Submit("VALUE LABELS " + spss.GetVariableName(self.index) + " " + vllist)
        spss.Submit("VALUE LABELS " + self.VariableName + " " + vllist)
    def _Attr(self):
        return self.dict.Attributes(self.index)
    def _AttrSet(self, attrdict):
        """set the attributes according to attrdict, using keys as names and values as values.  Unmentioned
        attributes are not affected."""
        attrlist = " ".join([k + "(" + _smartquote(str(v))+ ")" for k, v in attrdict.items()])
        spss.Submit("VARIABLE ATTRIBUTE VARIABLES=" +  spss.GetVariableName(self.index) + \
                    " ATTRIBUTE=" + attrlist)
    def _MV(self):
        return self.dict.MissingValues(self.index)
    def _MV2(self):
        return self.dict.MissingValues2(self.index)
    def _MVSet(self, mlist):
        """set the missing value(s) for the variable.  The low, high, and thru keywords are valid only for numeric variables."""
        
        if spss.GetVariableType(self.index) == 0:   #numeric
            mlist = " ".join([str(m) for m in mlist])
        else:
            mlist = " ".join([_smartquote(v) for v in mlist])
        spss.Submit("MISSING VALUES " + spss.GetVariableName(self.index) + " (" + mlist + ").")

    VariableName = property(_VN)
    VariableIndex = property(_VI)
    VariableLabel = property(_VarL, _VarLSet)
    VariableLevel = property(_VarLevel, _VarLevelSet)
    VariableType = property(_VarType)
    VariableFormat = property(_VarFmt, _VarFmtSet)
    ValueLabels = property(_ValLab, _ValLabSet)
    ValueLabelsTyped = property(_ValLab2)
    Attributes = property(_Attr, _AttrSet)
    MissingValues = property(_MV, _MVSet)
    MissingValues2 = property(_MV2)

class VariableDict(object):
    """A Python dictionary indexed by variable name for use with the SPSS Variable Dictionary.
    
    The Python dictionary does not contain SPSS dictionary properties other than name and index.
    Other variable properties are retrieved dynamically when requested.

    Unlike the SPSS dictionary, variable names in this class are case sensitive (except for regular
    expression matches).
    
    If no argument is supplied, all variable names in the SPSS dictionary are indexed.
    If a list of names or indexes is supplied, only those variables are included.
    If pattern is supplied, the dictionary includes only the subset of variables with names
    matching the pattern.  The pattern is a regular expression that starts at the beginning of the name.
    Examples:
        r'age'  - any variables starting with age
        r'.*age'- any variable whose name contains age
        r'.*\d$' - any variable whose name ends in a digit
    Matches are case insensitive and pattern can be combined with namelists, in which case
    the match subsets the name list.
    If variableType ('numeric' or 'string') is specified, the dictionary is restricted to variables
    with that type.
    If variableLevel, which must be a list, ('nominal', 'ordinal', 'scale', 'unknown') is
    specified, only variables with any of those levels are included.
    These selection criteria can be combined.  They can also be used with
    the Variables method, which returns a list of variable names and the Indexes method, which
    returns a list of variable indexes in the SPSS dictionary.
    The class contains a property named variables (note lower case) that returns a list of all
    the variables in the object.
    
    If safe == True, each time a variable property is retrieved after the dictionary is created, the
    actual SPSS dictionary is checked to see if the index still refers to that variable, and KeyError
    is raised if it is not.

    The class includes iterator and subscripted access.
    Subscripts and iterative access go according to the variables in the object, but
    api access functions take SPSS dictionary slot numbers.  Subscripts can be variable names
    or numbers.
    
    See the Variable class for details on variable properties. Most properties and attributes can be
    retrieved, and those that are changeable can be changed by assigning to the property.
    
    This class includes methods for manipulating named datasets as well.
    """
    # The indexes in the Variable Dictionary refer to the index in the SPSS dictionary, not
    # the index within this class.  The keys are variable names, and the values are
    # SPSS dictionary slot numbers (which are needed for the spss dictionary apis).

    # The theory of this implementation is that if no variable list is given, the number
    # of variables may be very large and efficiency is important, but if a list is given,
    # the number of variables will rarely be very large, and efficiency is not critical.
    
    def __init__(self, namelist=None, safe=False, pattern=None, variableType=None, variableLevel=None, dataset=None):
        """
        All arguments are optional.  All conditions on variables are ANDed together to determine what variables to include.
        
            namelist is a list of names or indexes.  Defaults to all variables.  It can also be a white-space-separated string of names or indexes.
            safe specifies whether or not check for whether variable names still correspond to slots when fetching properties.
            pattern is a regular expression used as a match on variable names
            variableType can be "numeric" or "string".
            variableLevel can be one or more of "nominal", "ordinal", "scale", and "unknown".
            dataset is the current name of the dataset.  Specifying it does NOT cause a name to be assigned.
        """
    
        match = makeVariableFilter(pattern, variableType, variableLevel)           
        allnames = [spss.GetVariableName(v) for v in range(spss.GetVariableCount())]
        self.keys = []
        namelist = _buildvarlist(namelist)  #convert a string of names into a sequence
        if namelist is None:
            self.vdict = dict([(n, index) for index, n in enumerate(allnames) if match(index, n)])
            if not match.noop: #build list only if subset
                self.keys = [index for index, name in enumerate(allnames) if self.vdict.get(name) is not None]
        elif isinstance(namelist[0], basestring):        #type(namelist[0]) is str:    # got list of names
            self.vdict = dict([(n, index) for index, n in enumerate(allnames) if n in namelist and match(index, n)])
            for name in namelist:      # user order
                vnumber = self.vdict.get(name)  #no entry if failed match
                if vnumber is not None:
                    self.keys.append(vnumber)    #spss variable numbers
        else:                                # got list of indexes
            self.vdict = dict([(n, index) for index, n in enumerate(allnames) if index in namelist and match(index, n)])
            values = self.vdict.values()
            self.keys = [vnumber for vnumber in namelist if vnumber in values]        #spss variable numbers
        self.safe = safe
        self.numvars = len(self.vdict) 
        self.datasetname = dataset
            
    def __iter__(self):
        """generator to iterate over the variables in this dictionary.
        Returns an item with all the variable properties implemented in this class."""
        
        if self.numvars > 0 and not self.keys:
            for v in range(spss.GetVariableCount()):
                yield Variable(self, v, indextype='spss')
        else:
            for v in self.keys:
                yield Variable(self, v, indextype='spss')


    #subscripted access
    def __getitem__(self, index):
        if isinstance(index, basestring):
            try:
                return Variable(self, self.vdict[index], indextype='spss')
            except:
                raise ValueError, _msg10
        elif self.keys:
            return Variable(self, self.keys[index], indextype = 'spss')
        else:
            return Variable(self, index, indextype = 'spss')

    def __len__(self):
        return self.numvars
    
    def variableCount(self):
        "return the number of variables in this dictionary"
        return self.numvars
    #method renamed from variables to distinguish variables property from subset selection
    def variablesf(self, variableType=None, variableLevel=None, pattern=None):
        """return a list of the variables in this dictionary, optionally filtered by one or
        more of a type, level, or pattern specification."""
        
        match = makeVariableFilter(pattern, variableType, variableLevel)
        if match.noop:
            return self.vdict.keys()
        else:
            return [var for var in self.vdict.keys() if match(self.vdict[var], var)]
    #property - make complete variable list available as property
    variables = property(variablesf, None)
    
    def range(self, start=None, end=None, variableType=None, variableLevel=None, pattern=None):
        """return a list of variable names in this dictionary between start and end, inclusive in the order in this dictionary.
        
        start and end are variable names.  If either is omitted, the list extends to the first or last variable.
        variableType, variableLevel, and pattern can be used to filter the list.
        Omitting both returns all variables in dictionary order.
        If end precedes start, the returned list will be empty."""
        
        match = makeVariableFilter(pattern, variableType, variableLevel)
        varlist = []
        if not start:
            start = self[0].VariableName
        if not end:
            end = self[self.numvars-1].VariableName
        if not (start in self.vdict and end in self.vdict):
            raise ValueError, _msg11
        fetching = False
        for v in self:
            if v.VariableName == start:
                fetching = True
            if fetching and match(index=v.VariableIndex, name=v.VariableName):
                varlist.append(v.VariableName)
            if v.VariableName == end:
                break
        return varlist

    def expand(self, vlist):
        """return a validated variable list with TO and ALL expanded.
        
        vlist is the sequence or string of names to validate.  If a name is not found, a ValueError exception is raised with that name.
        An exception will also be raised for a malformed TO construct.
        The set of valid names consists of those in this VariableDict object.
        case is IGNORED here, unlike elsewhere in this class."""
        
        vlist = _buildvarlist(vlist)
        if vlist[0].lower() == "all":
            return self.variables
        # generate lower case variable list in SPSS dictionary order if not already available
        if not hasattr(self, "simplelist"):
            self.simplelist = [v.lower() for (v, l) in sorted(self.vdict.items(), key=operator.itemgetter(1))]
        #simplelist = [v.lower() for v in self.variablesf()]
        resultlist = []
        try:
            for i, v in enumerate(vlist):
                if v.lower() in self.simplelist:
                    resultlist.append(v)
                elif v.lower() == "to":
                    start = self.simplelist.index(vlist[i-1].lower())+1
                    end = self.simplelist.index(vlist[i+1].lower())
                    if start > end:
                        continue
                    resultlist.extend(self.simplelist[start:end])
                else:
                    raise ValueError
        except:
            raise ValueError, _msg19 + v
        return resultlist
    
    def indexes(self, variableType=None, variableLevel=None, pattern=None):
        """return a list of the variable indexes in this dictionary, optionally filtered by one or
        more of a type, level, or pattern specification."""
        match = makeVariableFilter(pattern, variableType, variableLevel)
        if match.noop:
            return self.vdict.values()
        else:
            return [self.vdict[var] for var in self.vdict.keys() if match(self.vdict[var], var)]
        
    def variableIndex(self, id):
        "return the index in the SPSS dictionary of the variable with name or index id."
        # if varDict is safe, check that the named variable is still at the saved index value
        if isinstance(id, basestring):            #type(id) is str:
            res = self.vdict[id]
            if self.safe and id != spss.GetVariableName(res): 
                raise KeyError
        else:
            res = id
        
        return res

    def variableName(self, id=None):
        "return the name of the variable with name or index id."
        # admittedly pointless if id is in fact a name, but allows the class to support names and
        # indexes uniformly
        if id is None:
            return None
        return spss.GetVariableName(self.VariableIndex(id))

    def variableLabel(self, id):
        "return the label of the variable with name or index id."
        return spss.GetVariableLabel(self.VariableIndex(id))
        
    def variableLevel(self, id):
        "return the measurement level of the variable with name or index id."
        return spss.GetVariableMeasurementLevel(self.VariableIndex(id))

    def variableType(self, id):
        """return the type of the variable with name or index id.
        0 = numeric
        >0 = string of that length
        """
        return spss.GetVariableType(self.VariableIndex(id))
    
    def variableFormat(self, id):
        "return the format of the variable with name or index id."
        return spss.GetVariableFormat(self.VariableIndex(id))

    def valueLabels(self, id):
        "return the set of value labels of the variable with name or index id as a dictionary."
        return GetValueLabels(self.VariableIndex(id))
    
    def valueLabelsTyped(self, id):
        """return the set of value labels of the variable with name or index id as a dictionary.
        
        The values are converted to numbers for numeric variables"""
        return GetValueLabels(self.VariableIndex(id), matchtype=True)

    def attributes(self, id=None):
        """return a dictionary of attributes for variable or index id.  If id = None
        the datafile attributes are returned
        """
        return getAttributesDict(self.VariableName(id))

    def missingValues(self, id):
        "return a string listing the missing value codes for variable or index id"
        return getMissingValues(self.VariableIndex(id))
    
    def missingValues2(self, id):
        "return a 4-tuple of missing value codes for variable or index id"
        return getMissingValues2(id)
    
    def variableNamesFromLabel(self, label):
        """Return a list of variable names for variables that have as label the specified text.
        
        This may be useful when SPSS output returns a variable label when the name is neede.  For
        example, getSHOW('weight') returns the label of the weight variable.  Variable labels need
        not be unique, however.
        """
        return [name for (name, index) in self.vdict.items() if spss.GetVariableLabel(index) == label]
    
    def activate(self, window="ASIS"):
        """Activate this dataset.  T
        
        he datasetname must already have been specified.
        window can be specified as ASIS, the default, or FRONT to bring it to the front."""
        
        spss.Submit("DATASET ACTIVATE " + self.datasetname + " WINDOW="+window)
        
    def close(self):
        """Close the dataset."""
        spss.Submit("DATASET CLOSE " + self.datasetname)
        
    #VariableDict class
    VariableCount = variableCount
    Variables = variables
    Indexes = indexes
    VariableIndex = variableIndex
    VariableName = variableName
    VariableLabel = variableLabel
    VariableLevel = variableLevel
    VariableType = variableType
    VariableFormat = variableFormat
    ValueLabels = valueLabels
    ValueLabelsTyped = valueLabelsTyped
    Attributes = attributes
    MissingValues = missingValues
    MissingValues2 = missingValues2
    VariableNamesFromLabel = variableNamesFromLabel

def isDateVariable(var, varDict=None):
    """return True if var is an SPSS date variable.  
    
    That is, its format is any of
    DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, WKYR, or DATETIME.  These are the variables that
    can be converted to Python dates.  TIME, WKDAY, and MONTH
    formats cannot be converted to dates, because they do not represent a particular point in
    time and, hence, this function returns False for them.

    var may be an SPSS dictionary variable index, or a Variable reference from a VariableDict object.
    If a VariableDict object is supplied, var may be a string holding a variable name."""
    
    if isinstance(var, basestring):
        if varDict is None or not isinstance(varDict, VariableDict):
            raise Exception(_msg12)
        var = int(varDict[var])
    else:
        var = int(var)
    vfmt = spss.GetVariableFormat(var)
    return vfmt[:3] in ['DAT','ADA','EDA','JDA','SDA','QYR','MOY','WKY','DAT']

def makeVariableFilter(pattern=None, variableType=None, variableLevel=None):
    """Make a criterion closure function for selecting variables based on regular expressions over names,
    variable type, and variable level (a list).  
    
    Returns the function.  The indexes in this function refer to the SPSS dictionary."""

    if pattern:
        try:
            pat = re.compile(pattern, re.IGNORECASE)
        except:
            raise ValueError, _msg17 + pattern
    if variableType and not variableType in ['numeric', 'string']:
        raise ValueError, _msg13

    if variableLevel is not None:
        for vl in variableLevel:
            if not vl in ['nominal', 'ordinal', 'scale', 'unknown']:
                raise ValueError, _msg14

    def match(index=-1, name=None):
        """test closure.  name is the variable name, and index is the SPSS dictionary index.
        
        name is not required unless an re is used, and index is not required unless type or level is specified.
        noop attribute is True if match does nothing."""
        
        if variableType is not None:
            p = spss.GetVariableType(index)
            if (variableType == "numeric" and p > 0) or (variableType == "string" and p ==0):
                return False
        if variableLevel is not None:
            p = spss.GetVariableMeasurementLevel(index)
            if not p in variableLevel:
                return False
        if pattern and not pat.match(name):
            return False
        return True
    match.noop = pattern == None and variableType == None and variableLevel == None
    return match

def _getSPSSLocFromIni():
    "return None or the SPSS directory for the preferred SPSS version from the xd ini file."
    
    #The config file is assumed to be ...site-packages/spss/spssxdcfg.ini if prior to SPSS 16
    #Otherwise, it is ...site-packages/spss<majorversion>0/spss/spssxdcfg.ini
    #This function is not needed in spss drives mode, but in xd, the
    #process executable will not indicate the SPSS location.
    #Starting with SPSS 15, check for preferred version
    
    pathlist = sys.executable.split(os.path.sep)
    spssver = GetSPSSVersion()
    spssmajor, spssminor = spssver.split(".")[0:2]
    if int(spssmajor) < 16:
        spsspath = "spss"
    else:
        spsspath = "spss" + spssmajor + spssminor + os.sep + "spss"
        
    pathtoini = sys.prefix + "/lib/site-packages" + os.sep + spsspath + os.sep

    try:    # for SPSS 15 and beyond, the spss_installed.ini file indicated the preferred version of SPSS to use
        suffix = ""
        cfgfile = pathtoini + "spss_installed.ini"
        cfg = ConfigParser.ConfigParser()
        cfg.read(cfgfile)
        suffix = cfg.get("default", "prefer")
        if suffix != "spss140":
            suffix = suffix + os.sep
        else:
            suffix = ""
    except:
        pass

        
    try:
        cfgfile = pathtoini +os.sep + suffix + "spssxdcfg.ini"
        cfg = ConfigParser.ConfigParser()
        cfg.read(cfgfile)
        return cfg.get("path", "spssxd_path")
    except:
        return None

def getVariableValues(varindex, missing=True):
    """Return a list of the values of the variable with index varindex as strings.  
    
    Raises exception if no nonsysmis values are found.
    If missing is True, user missing values are included; otherwise they are not.
    Note: if values are used to generate SPSS syntax and may contain quotes, be sure to handle
    them appropriately."""

    # spss module apis require simple int type    
    if isinstance(varindex, Variable): 
        varindex = varindex.VariableIndex

    varname = spss.GetVariableName(varindex)
    vartype = spss.GetVariableType(varindex)
    xptail, quot = vartype == 0 and ("@number", " ") or ("@string", "\"")
    if missing:
        xpath ="//pivotTable[@subType='Frequencies']/dimension/group//category/" + xptail
    else:
        xpath ="//pivotTable[@subType='Frequencies']/dimension/group/group//category/" + xptail

    tag, ignore = createXmlOutput("FREQUENCIES " + varname  +"/statistics none.")
    freqvalues = spss.EvaluateXPath(tag, '/outputTree',    xpath)
    spss.DeleteXPathHandle(tag)
    if len(freqvalues) == 0:
        raise Exception, _msg15
    return freqvalues                
    
# check value labels for uniqueness
def varsWithDuplicateLabels(vars):
    """Returns a dictionary where each SPSS variable with any duplicate value labels is the key,
    and the value for that key is the list of duplicates.  
    
    If a variable has no value labels, it has no duplicate values.
    vars can be an SPSS variable name (string), a list of names, or an spssaux VariableDict object.
    If a variable does not exist, an exception will be raised.  Variable names are not case
    sensitive.

    Example:
    vard = spssaux.VariableDict()
    d = spssaux.varsWithDuplicateLabels(vard)
    if d:
        print "Variables with Duplicate Value Labels"
        for var, values in d.items():
            print var, ":", values
    else:
        print "no duplicate value labels"
    """

    if isinstance(vars, VariableDict):
        vars = [v.VariableName for v in vars]
    elif isinstance(vars, basestring):
        vars = [vars]
    tag, err = createXmlOutput("DISPLAY DICT /VARIABLES = " + " ".join(vars),
        omsid='File Information')
    if err > 0:
        raise Exception, _msg16
    dupdict = {}
    #Xpath 1.0 does not include case conversion functions, so we have to match the actual Xpath names
    #Variables with no value labels do not show up in this table.
    xmlvarnames = spss.EvaluateXPath(tag, "/",
            "/outputTree/command/pivotTable[@subType='Variable Values']/dimension/group/@varName")  #get varnames w labels in XML case
    for v in xmlvarnames:
        valueLabels = spss.EvaluateXPath(tag, "/outputTree/command/pivotTable[@subType='Variable Values']",
        "dimension/group/category[@varName='%s']/@label" % v)
        d = _dups(valueLabels)
        if d:
            dupdict[v] = d

    spss.DeleteXPathHandle(tag)
    return dupdict

def _dups(vlist):
    """return a list of the values that occur more than once in the list vlist."""
    
    dupd = {}
    vdic = {}
    for v in vlist:
        if v in vdic:
            dupd[v] = None
        else:
            vdic[v] = None
    return list(dupd)


def _smartquote(s, qchar='"'):
    """ smartquote a string so that internal quotes are distinguished from surrounding
    quotes for SPSS and return that string with the surrounding quotes.  
    
    qchar is the character to use for surrounding quotes."""
    
    return qchar + s.replace(qchar, qchar+qchar) + qchar

def _listify(item):
    "Make item into a list but a string is a singleton"
    
    if isinstance(item, basestring):
        item = [item]
    else:
        item = list(item)
    return item

def createAttribute(varnames, attrname, attrvalue):
    """Create the scalar attribute attrname for the list of variables varnames with value attrvalue"""
    
    spss.Submit("VARIABLE ATTRIBUTE VARIABLES = " + varnames +\
                " ATTRIBUTE=" + attrname + "(" + _smartquote(str(attrvalue)) + ").")
    

def getSpssMajorVersion():
    "Return the major version number as an integer"
    
    return int(getSpssVersion().split(".")[0])

def _isseq(obj):
    """Return True if obj is a sequence, i.e., is iterable.
    
    Will be False if obj is a string or basic data type"""
    
    # differs from operator.isSequenceType() in being False for a string
    
    if isinstance(obj, basestring):
        return False
    else:
        try:
            iter(obj)
        except:
            return False
        return True
    
def deleteVars(varlist):
    """Delete the specified variables from the active dataset.
    
    varlist is a list of variable names to delete.
    If a variable in varlist does not exist, the delete is silently ignored."""
    
    # use a VariableDict object to silently eliminate variables that do not exist
    vard = VariableDict(namelist=varlist)
    if vard:
        spss.Submit("DELETE VARIABLES " + " ".join(vard.variables))
    
def _buildvarlist(arg):
    """return a list of (presumed) variable names or indexes.
    
    arg can be a sequence, including an spssaux VariableDict or a string of white-space or comma-separated names
    if arg is a string and its items can be converted to integers, the sequence is converted, but if any item cannot be
    converted, a list of strings is returned."""
    
    # sequences are not converted in order to preserve Variable objects
        
    if _isseq(arg) or arg is None or isinstance(arg, int):
        return arg
    else:
        arg = re.split("[ \t,\n]+", arg)
        try:
            numarg = [int(item) for item in arg if item != ""]  #if condition accounts for terminal blank
            return numarg
        except:
            return arg 
        
def getcleartblimit():
    """Save the traceback limit, if any, and suppress tracebacks"""

    global tracebacklimit
    try:
        tracebacklimit = sys.tracebacklimit
    except:
        tracebacklimit = None
    sys.tracebacklimit = 0
    
def restoretblimit():
    """Restore traceback limit to saved value, if any"""

    global tracebacklimit
    if tracebacklimit is None:
        if hasattr(sys, "tracebacklimit"):
            del sys.tracebacklimit
    else:
        sys.tracebacklimit = tracebacklimit
        
def u(txt):
    """Return txt as Unicode or unmodified according to the SPSS mode"""
    
    if not ok1600 or not isinstance(txt, str):
        return txt
    if spss.PyInvokeSpss.IsUTF8mode():
        if isinstance(txt, unicode):
            return txt
        else:
            return unicode(txt, "utf-8")
    else:
        return txt

ecutf8 = codecs.getencoder("utf_8")   # in Unicode mode, must figure var names in bytes of utf-8

def truncatestring(name, maxlength=64):
    """Return a name truncated to no more than maxlength BYTES.
    
    name is the candidate string
    maxlength is the maximum byte count allowed.  It must be a positive integer and defaults to 64,
    which is the maximum legal size for an SPSS variable name.
    
    If name is a (code page) string, truncation is straightforward.  If it is Unicode utf-8,
    the utf-8 byte representation must be used to figure this out but still truncate on a character
    boundary."""
    
    unicodemode = ok1600 and spss.PyInvokeSpss.IsUTF8mode()
    if not unicodemode:
        name =  name[:maxlength]
    else:
        newname = []
        nnlen = 0
        
        # In Unicode mode, length must be calculated in terms of utf-8 bytes
        for c in name:
            c8 = ecutf8(c)[0]   # one character in utf-8
            nnlen += len(c8)
            if nnlen <= maxlength:
                newname.append(c)
            else:
                break
        name = "".join(newname)
    # in 16.0.0, names cannot end in "_"
    #if name[-1] == "_":
    #    name = name[:-1]
    return name


#aliases for original casing of names for compatibility
CreateXMLOutput = createXmlOutput
GetValuesFromXMLWorkspace = getValuesFromXmlWorkspace
CreateDatasetOutput = createDatasetOutput
GetVariableNamesList = getVariableNamesList
GetVariableFormatsList = getVariableFormatsList
GetVariableLabelsList = getVariableLabelsList
GetVariableMeasurementLevelsList = getVariableMeasurementLevelsList
GetVariableTypesList = getVariableTypesList
GetSHOW = getShow
GetProcessInstallDir = getProcessInstallDir
GetSPSSInstallDir = getSpssInstallDir
CreateFileHandle = createFileHandle
GetAttributesDict = getAttributesDict
GetDatasetInfo = getDatasetInfo
GetActiveDatasetName = getActiveDatasetName
GetValueLabels = getValueLabels
GetMissingValues = getMissingValues
GetMissingValues2 = getMissingValues2
OpenDataFile = openDataFile
OpenDataFileFromUrl = openDataFileFromUrl
SaveDataFile = saveDataFile

GetVariableValues = getVariableValues
VarsWithDuplicateLabels = varsWithDuplicateLabels
CreateAttribute = createAttribute
GetSPSSVersion = getSpssVersion
GetSPSSMajorVersion = getSpssMajorVersion
DeleteVars = deleteVars