"This module manages SPSS database functions at a higher level than the SPSS module"

# All messages for this module are collected here for translation purposes.  Docstrings, however, are not.
# To translate, duplicate the message block and translate the later copy, maintaining the assignment syntax.
# The original can be commented out, but it will be replaced by the later version anyway, so this is not necessary.
# Maintaining the original English text will make it easier to update translations later.
# Place the translated block after all the English messages in order to facilitate checking for changes in the English text.

_msg1 = "Warning: this module requires at least spssaux version "
_msg2 = "Measurement level must be nominal, ordinal, or scale"
_msg3 = "Variable format type  is invalid: "
_msg4 = "attrib parameter must be a dictionary or similar object type"
_msg5 = "Invalid accessType: "
_msg6 = "Cannot access data because active dataset is empty"
_msg7 = "accessType n cannot be combined with indexes or cvtDates"
_msg8 = "Indexes and cvtDates arguments must be a sequence of variable names or numbers or a VariableDict: "
_msg9 = "cvtDates must be subset of the index list"
_msg10 = "w,  a, and n data access require at least SPSS 15"
_msg11 = "convertUserMissing = False requires at least SPSS 15.0"
_msg12 = "makemvchecker cannot be used until dictionary is committed"
_msg13 = "Missing value checking requires at least SPSS 15"
_msg14 = "append is only available for accessType='w' and 'n'"
_msg15 = "casevalues is only available for accessType='w'"
_msg16 = "setvalue is only available for accessType w"
_msg17 = "appendvalue is only available for accessType='a'"
_msg18 = "Invalid variable name, variable index, or variable value specified: "
_msg19 = "CommitCase is only available for accessTypes a and w"
_msg20 = "Only three missing values can be defined for a variable."
_msg21 = "Invalid missing value range specification"
_msg22 = "IsStartSplit is only available for accessType = 'r' or 'w'"
_msg23 = "Cannot check for missing values.  makemvchecker must be called first."
_msg24 = "ismissing: The specified variable name or index is not in the cursor"
_msg25 = "fetchone cannot be used with accessType: "
_msg26 = "Only one VariableDict can be used in the index list"
_msg27 = "A cursor request specified an undefined variable"
_msg28 = "Unconvertible datetime value"
_msg29 = "date specification must include at least (yyyy, mm, dd) but not more than six parts"


# copyright(c) SPSS Inc, 2005
# history
# 2005-oct-19 initial version
# 2005-nov-04 integrate optional date conversion into fetching
# 2006-feb-25 make cvtDates work when index is zero
# 2006-mar-28 Dataset class, dataset parameter in Spssdata class
#2006-aug-14  Add ismissing to test for missing value.  Requires SPSS 15 structure for mvs
#2006-aug-28 Add support for write and append cursors for SPSS 15
#2006-sep-22 Adjustments for write cursor
#2006-sep-25 Add methods for append cursor
#2006-oct-10 Allow Spssdata to accept string lists of indexes
#2006-oct-20 Implement Spssdata convertUserMissing parameter
#2006-nov-01 Add accessType='n' to Spssdata class to allow creating new datasets from scratch
#2006-nov-09 Validate THRU missing value specification
#2008-feb-11 Spssdata class now obeys contexthandler so it can be used in a with statement
#2008-jun-27 Generalize yrmodasec to allow time portions

__author__  =  'spss'
__version__ =  '2.2.0'

import spss, spssaux, namedtuple
import datetime

minver = [2, 0, 1]
if [int(v) for v in spssaux.__version__.split(".")] < minver:
    raise ImportError, _msg1 + ".".join([str(item) for item in minver])

spssver = [int(v) for v in spssaux.GetSPSSVersion().split(".")]
ok1500 = spssver >= [15, 0, 0]
ok1501 = spssver >= [15, 0, 1]
ok1600 = spssver >= [16,0,0]


if ok1500:
    spsslow, spsshigh = spss.GetSPSSLowHigh()

def vdef(vname, vtype=None, vlabel=None, vmeasurelevel=None, vfmt=None, valuelabels=None, missingvalues=None, attrib=None):
    """helper function for defining new variables when Spssdata cursor has accessType='w'.
    
    Returns a tuple of all the variable attributes, making it easy to supply only nondefault values.
    The tuple is of the form expected by Spssdata.commit.
    The variable may be appened more than once with subsets of properties in each append.  They will be merged, with
    the last one winning in case of conflicts.
    
    vname is the variable name.  It is the only required parameter.
    vtype is the numerical type code, default is numeric (0) if nothing is specified before the dictionary is committed.
    vlabel is the variable label, if any.
    vmeasurelevel = 'nominal','ordinal' or 'scale'.  Default is SPSS default according to type.
    vfmt = variable format as a sequence of type, width, optional decimals.  See varfmts list below for types.
    valuelabels is a dictionary of values and labels.
    missingvalues is a user missing value specification tuple.
    It can be a 4-tuple matching the definition for spss.GetVarMissingValues, or it can be a list of up to three
    missing values or it can include "THRU" in the list to indicate a range, and, optionally a third value.
    Examples: [9, "THRU", 99], [99,98,97], [9, "THRU", 99]
    Ranges are only available for numeric variables.
    If used, the range specification must precede the singleton missing value: [99, 999, "THRU", 9999] is invalid.
    attrib is a dictionary of attributes and their values
    """
    try:
        if vmeasurelevel:
            ml = ['nominal', 'ordinal', 'scale'].index(vmeasurelevel) + 2
        else:
            ml = None
    except:
        raise AttributeError, _msg2
    try:
        if vfmt:
            fmtspec = [varfmts[vfmt[0].upper()]]
            fmtspec.extend(vfmt[1:])
            if len(fmtspec) < 3:
                fmtspec.append(None)
            vfmt = fmtspec
    except:
        raise KeyError, _msg3 + str(vfmt[0])
    if attrib and not spssaux._isseq(attrib):
        raise AttributeError, _msg4
    return (vname, vtype, vlabel, ml, vfmt, valuelabels, missingvalues, attrib)

varfmts = dict(
    [("A",1),
    ("AHEX",2),
    ("COMMA",3),
    ("DOLLAR",4),
    ("F",5),
    ("N",16),
    ("E",17),
    ("DATE",20),
    ("TIME",21),
    ("DATETIME",22),
    ("ADATE",23),
    ("JDATE",24),
    ("DTIME",25),
    ("MONTH",26),
    ("MOYR",27),
    ("QYR",28),
    ("WKYR",29),
    ("PCT",30),
    ("DOT",31),
    ("CCA",32),
    ("CCB",33),
    ("CCC",34),
    ("CCD",35),
    ("CCE",36),
    ("EDATE",37),
    ("SDATE",38)])

# format numbers where decimals can be specified
vardecimalsallowed = set([3,4,5,16,17,21, 22,25,30,31,32,33,34,35,36])
# for reverse lookups when creating data definitions via syntax
varfmtsrev = dict([(v, k) for k, v in varfmts.iteritems()])

varlevels = {2: "Nominal", 3: "Ordinal", 4: "Scale"}


def _fmtmap(fmt, width, decimals=0):
    """convert a format specification into an SPSS format spec, which is returned.
    
    fmt is a format type code in varfmts.
    width is the field width.
    decimal is the number of decimals (gnored for non-numeric formats)."""
    
    res = varfmtsrev[fmt] + str(width)
    if fmt in vardecimalsallowed and not decimals is None:
        res += "." + str(decimals)
    return res

class Spssdata(object):
    """Spssdata manages the active SPSS dataset retrievals at a higher level than
    in the spss module.
    
    Spssdata can be used as a context handler supporting the with statement available with Python 2.5.
    Example:
    from __future__ import with_statement
    import spss, spssdata
    with spssdata.Spssdata(indexes=[0]) as curs:
      for case in curs:
        print case
      z = case[0]/0
  
  This usage ensures that the cursor is properly closed at the end of the with whether an exception occurs (forced in this example), or not.
  """
    def __init__(self, indexes=(), names=True, cvtDates=(), dataset=None, omitmissing=False, convertUserMissing=True,
                 accessType='r', maxaddbuffer=80):
        """Create new cursor to active data.  
        
        Indexes can be a sequence of variable names or index numbers or omitted or () or "ALL" for
        all. Indexes can also be a string of names or index numbers separated by white space and/or
        commas. The index entries must all be the same type. Invalid variable names or out of range
        numbers will raise an exception either in the constructor when cases are fetched.
        
        names == True causes NamedTuples to be produced.  If false, standard tuples are returned.
        If names, the items in the index list must not contain any duplicates.
        If omitmissing is True (requires at least SPSS 15), then any case where any of the selected variables
        is user or system missing is not returned.  If missing values need to be checked but the cases returned
        anyway, use method makemvchecker to create a checking function and call it on the case with the ismissing method.
        For example,
          curs = spssdata.Spssdata(indexes=['mpg','year','accel'], convertUserMissing=False)
          curs.makemvchecker()
          curs.ismissing("year", 0)
          or
          curs.ismissing(1, 0)
          would return True if 0 is defined as a missing value for variable "year"
          Method hasmissing can be used to check the entire variable list at once.
          
        This module also contains a function ismissing (not part of this class) that can be used independently of the cursor.
        
        If convertUserMissing is True, user missing values are converted to None.
        
        If convertUserMissing is False and this is at least SPSS 15, user missing values are returned
        as is. If this is version 14, an exception is raised. convertUserMissing is irrelevant, of
        course, if omitmissing is True.
        
        If some of the variables retrieved are SPSS date/time variables they can be
        converted to Python datetime objects by listing them in cvtDates.  Otherwise
        the values are the SPSS datetime values.
        cvtDates accepts the same types of values as indexes, but an empty sequence means convert
        no dates, and "ALL" refers to all the variables in indexes.
        cvtDates should be a subset of indexes.
        If dataset is specified, that name is activated before creating the cursor.  If the current dataset
        has no name, it will be lost when the new one is activated.  The dataset parameter cannot be used between
        spss.StartProcedure and spss.EndProcedure
        accessType can be 'r', 'w', or 'a' for read (the default), write, or append access.  w and a require at least SPSS 15.
        
        For accessType w, use append to define a new variable, commitdict to commit the dictionary
        and start setting values, and casevalues to set the new values. Use CClose to close the
        cursor.
        
        For accessType a, use appendvalues to set the values of variables in a new case and
        CommitCase to finish that case. Use CClose to end the appends.
        
        If accessType='w' or will be 'w' within the current procedure and it is SPSS 15.0.1 or later,
        maxaddbuffer can be specified to allocate space for new variables. It will be added to the space
        required by appended variables with at least room for ten more numeric variables (on a future
        pass). If the new variables are appended on the first pass, there is no need to specify this
        parameter.
        
        To create an entirely new dataset (which will automatically close the active dataset if it
        does not have a dataset name), set accessType='n'. This will create the dataset definition
        using syntax and then change to append mode for adding the cases. """

        if not accessType in ["r", "w", "a", "n"]:
            raise ValueError, _msg5 + str(accessType)
        if accessType != 'n' and spss.GetVariableCount() == 0:
            raise ValueError, _msg6
        if accessType == 'n' and (indexes or cvtDates):
            raise ValueError, _msg7
        indexes = spssaux._buildvarlist(indexes)
        cvtDates = spssaux._buildvarlist(cvtDates)
        self.maxaddbuffer = maxaddbuffer
        self.calledalloc = False  # some functions can be used only once with a given cursor
        self.omitmissing = omitmissing
        self.mvdata = []  # for use in checking for missing data
        self.accessType = accessType
        self.convertUserMissing = convertUserMissing
        if accessType in ['a', 'n']:
            self.omitmissing = False
            self.convertUserMissing = False
        self.newvars = []  #for new variable specifications in w mode
        self.cvtDateIndexes = []
        self.namelist = []
        self.first = True
        self.isendsplit = None   #split status is undefined until a case has been read.
        self.unicodemode = ok1600 and spss.PyInvokeSpss.IsUTF8mode()
        if self.unicodemode:
            self.unistr = unicode
        else:
            self.unistr = str
        
        if dataset and self.accessType != 'n':
            spss.Submit("DATASET ACTIVATE " + self.unistr(dataset))        
        for item in indexes, cvtDates:  # never executed for accessType='n'           
            if not spssaux._isseq(item):  #for clearer diagnosis of common errors
                raise TypeError, _msg8 +self.unistr( item)
        if not accessType == 'n':  # cannot use until variables are defined if new dataset
            self.indexes, self.namelist = _getIndexInfo(list(indexes), datelist=False)
            self.numvars = len(self.indexes)
        if cvtDates:
            self.cvtDateIndexes = _getIndexInfo(list(cvtDates), datelist=True)
            if not set(self.cvtDateIndexes).issubset(self.indexes):
                raise TypeError, _msg9

        #make date index list refer to positions in projected tuple of SPSS variables if there is a projection
        if cvtDates:
            self.cvtDateIndexes = [self.indexes.index(item) for item in self.cvtDateIndexes]

        if names:
            self.rettype = namedtuple.MakeNamedTuple('namedTuple', self.namelist)
        else:
            self.rettype = tuple
        if self.accessType == 'r':
            self.cur = spss.Cursor(self.indexes)
        elif not ok1500:
            raise AttributeError, _msg10
        elif accessType != 'n':
            self.cur = spss.Cursor(self.indexes, self.accessType)
        if self.omitmissing:
            self.makemvchecker()
        if accessType == "a":
            self.vartypes = [spss.GetVariableType(i) for i in self.indexes]
        if accessType not in ['n', 'a'] and not self.convertUserMissing:
            if not ok1500:
                raise ValueError, _msg11
            self.cur.SetUserMissingInclude(True) # do not convert user missing values to None

    def __enter__(self):
        """initialization for with statement"""
        return self
        
    def __exit__(self, type, value, tb):
        """cleanup for use with with statement"""
        self.CClose()
        return False

    def getvarindex(self, varname):
        """return index number for varname in current cursor.
        
        varname is the case-matching variable name.
        The return value is an index suitable for use with spss module functions.
        A ValueError exception is raised if the variable is not in the cursor."""
        
        return self.indexes[self.namelist.index(varname)]
    
    def makemvchecker(self):
        """Prepare missing list for checking variables in the cursor for user and system missing values"""
        
        if self.accessType == 'n':
            raise ValueError, _msg12
        # side effect is building a dict for quicker access to missing value information via ismissing method
        self.vardict = {}
        try:
            for cindex, i in enumerate(self.indexes):
                val = spss.GetVarMissingValues(i)
                trimval = []
                for i in range(len(val)):
                    if isinstance(val[i], basestring):
                        trimval.append(val[i].rstrip())
                    else:
                        trimval.append(val[i])
                self.mvdata.append(tuple(trimval))
                self.vardict[self.namelist[cindex]] = cindex
        except AttributeError:
            print _msg13
            raise
    
    def append(self, variablespec):
        """Append a specification for a new variable consisting of a tuple as defined in function vdef above.
        
        If a variable is appended more than once, the properties are merged.  The last ones win.
        The variable order will be based on the first mention of the name.
        This is only allowed with accessType='w'
        """
        if not self.accessType in ['w', 'n']:
            raise TypeError, _msg14
        
        if not spssaux._isseq(variablespec):
            variablespec = [variablespec]
        self.newvars.append(list(variablespec))            

    def commitdict(self):
        """Create any specified new variable definitions.
        
        Order the variables by earliest mention, but use the last mention of properties.
        Note that case matters even though SPSS itself does not care.
        
        If vdef was used to create the variable properties, the property set will always be full size.
        Short sequences could be appended directly, in which case the item order is expected to be
        the same as in vdef, and property assignment stops when it runs out of items.
        This is only allowed with accessType='w'"""
        
        if not self.accessType in ['w', 'n']:
            raise TypeError, "commitdict is only available for accessType='w' and 'n'"
        if self.newvars:
            names = [] ; types = [] 
            for item in self.newvars:
                try:
                    if item[4][0] < 3 and item[1] is None:  #A formats imply string type
                        item[1] = int(item[4][1])
                except:
                    pass
                if len(item) == 1:
                    item.append(0)
                elif item[1] is None:
                    item[1] = 0
                try:
                    index = names.index(item[0])
                    types[index] =  item[1]
                except:    # first mention of variable
                    names.append(item[0])
                    types.append(item[1])
                    
            #  Automatic allocation can only address the first pass of added variables
            #  The buffer needs 8 bytes per numeric variable and length rounded up to a multiple of 8 for string variables
            if not self.calledalloc:
                bufferalloc = 0
                for t in types:
                    if t == 0:  #numeric
                        bufferalloc += 8
                    else:
                        bufferalloc += int((t+7)//8)*8
                self._bufferalloc(bufferalloc)
            self._SetVarNameAndType(names, types)


            for item in self.newvars:
                try:
                    if item[2]:  # variable label
                        self._SetVarLabel(item[0], item[2])
                    if item[3]:
                        self._SetVarMeasureLevel(item[0], item[3])
                    if item[4]:  # variable format
                        width = item[4][1] or item[1] # strings might not have a width in the format spec
                        decimals = None
                        if len(item[4]) > 2:
                            decimals = item[4][2]
                        self._SetVarFormat(item[0], item[4][0], width, decimals)
                    if item[5]:   # value labels
                        self._SetVarValueLabels(item[0], item[1], item[5])
                    if item[6]:  # missing values
                        self._SetVarMissingValues(item[0], item[1], item[6])
                    if item[7]:   #attributes
                        self._SetVarAttributes(item[0], item[7])
                except IndexError:   #allow list of attributes to be incomplete
                    pass
            self._CommitDictionary()
            
    def casevalues(self, valuelist):
        """Assign values to new variables in current case and commit it.
        
        With a write cursor, valuelist is a sequence of values, one for each new variable in the order appended.  
        If the list is shorter than the number of variables the extra variables will be sysmis.  Excess values will raise an exception."""
        
        # A user could have a write cursor but not have created new variables on the first pass, but buffer allocation must happen
        # anyway, so it is called here even though it would seem not to be needed.
        
        if self.accessType != 'w':
            raise TypeError, _msg15
        else:
            self._bufferalloc()
        
        for i, v in enumerate(valuelist):
            if len(self.newvars[i]) == 1 or not self.newvars[i][1] :   #numeric (0 or None)
                if not v is None:
                    v = float(v)
                self.cur.SetValueNumeric(self.newvars[i][0], v)
            else:
                if self.unicodemode:
                    v = unicode(v)
                self.cur.SetValueChar(self.newvars[i][0], self.unistr(v))
        self.cur.CommitCase()
        
    def setvalue(self, var, value):
        """Set the value of a variable for the current case with accessType w.  Case must be explicitly commited if this method is used.
        
        var is the name of the new value whose value is to be set.
        value is the value to set.
        ValueError will be raised if var is not a new variable name or the value type is inappropriate."""
        
        if not self.accessType == 'w':
            raise ValueError, _msg16
        index = [varspec[0] for varspec in  self.newvars].index(var)
        if len(self.newvars[index]) == 1 or not self.newvars[index][1]:  #numeric variable
            if not value is None:
                value = float(value)
            self.cur.SetValueNumeric(var, value)
        else:
            self.cur.SetValueChar(var, unistr(value))

        
    def appendvalue(self, var, value):
        """Append a value in the current new case for the specified variable.
        
        var is the variable name or variable index in the SPSS dictionary (as used in the constructor).
        value is the value to assign to the case.
        call CommitCase on the cursor to add the case to the dataset.
        If varname/varindex is not in the current cursor, an exception will be raised.
        """
        if self.accessType != 'a':
            raise ValueError, _msg17
        
        try:
            spssindex = int(var)
            var = self.namelist[self.indexes.index(spssindex)]  #convert SPSS index to name
        except:  #not numeric, assume it is a name
            pass
        try:
            if self.vartypes[self.namelist.index(var)] == 0:  #numeric
                self.cur.SetValueNumeric(var, value)
            else:
                self.cur.SetValueChar(var, value)
        except:
            raise ValueError, _msg18 + str(var)
        
    def CommitCase(self):
        """Commit the current case.
        
        This api is only intended for use with accessType append or with write after setting the case values."""
        
        # simply a passthrough to lower-level method
        if self.accessType == 'r':
            raise TypeError, _msg19
        else:
            self._bufferalloc()
        self.cur.CommitCase()

    def CClose(self):
        "Close the cursor after commiting cases"
        if self.accessType == 'a':
            try:
                self.cur.EndChanges()
            except:
                pass
        self.cur.close()
        del self.cur

    def _SetVarNameAndType(self, names, types):
        """Create variables with specified names and types.
        
        names is a sequence of names and types is a corresponding sequence of types.
        With a write cursor, use the Cursor.  With a new cursor, use syntax."""
        
        if self.accessType == 'w':
            self.cur.SetVarNameAndType(names, types)
        else:
            cmd = []
            for name, vtype in zip(names, types):
                if vtype == 0:
                    vt = "F8.2"
                else:
                    vt = "A" + str(vtype)
                cmd.append(name + " (" + vt + ")" )
            spss.Submit("DATA LIST NOTABLE/" + " ".join(cmd))
            self.indexes, self.namelist = _getIndexInfo([], datelist=False) #fetch all variables
            self.numvars = len(self.indexes)

    def _SetVarLabel(self, name, label):
        if self.accessType == 'w':
            self.cur.SetVarLabel(name, label)
        else:
            spss.Submit("VARIABLE LABEL " + name + " " + spssaux._smartquote(label))
            
    def _SetVarMeasureLevel(self, name, level):
        if self.accessType == 'w':
            self.cur.SetVarMeasureLevel(name, level)
        else:
            spss.Submit("VARIABLE LEVEL " + name + " (" + varlevels[level] + ")")
            
    def _SetVarFormat(self, name, fmt, width, decimals):
        if self.accessType == 'w':
            self.cur.SetVarFormat(name, fmt, width, decimals)
        else:
            spss.Submit("FORMATS " + name + "(" + _fmtmap(fmt, width, decimals) +")")
    
    def _SetVarValueLabels(self, name, vtype, labels):
        if self.accessType == 'w':
            for value, label in labels.iteritems():
                if not vtype:  #numeric (code of 0 or None)
                    self.cur.SetVarNValueLabel(name, float(value), str(label))
                else:
                    self.cur.SetVarCValueLabel(name, str(value), str(label))
        else:
            cmd = []
            for value, label in labels.iteritems():
                cmd.append(vtype and spssaux._smartquote(str(value)) or str(value) + " " + spssaux._smartquote(label))
            spss.Submit("VALUE LABELS " + name + " " + " ".join(cmd))

    def _SetVarMissingValues(self, name, vtype, mvtuple):
        if len(mvtuple) > 4:
            raise ValueError, _msg20
        # if variable is numeric, look for THRU to indicate a range spec
        # alternatively, if THRU is not present and the tuple is full length (4), assume type code is included.  Otherwise prefix with type 0
        # if variable is string and first element is not integer, prefix with type 0
        
        if vtype == 0:  #numeric?
            try:
                thruindex = list(mvtuple).index("THRU")   # range mv's?
                if thruindex != 1:
                    raise IndexError
                mvtuple = mvtuple[:thruindex] + mvtuple[thruindex+1:]
                mvtuple = (len(mvtuple) < 3 and 1 or 2,) + tuple(mvtuple)
            except IndexError:
                raise ValueError, _msg21
            except:
                if len(mvtuple) < 4:
                    mvtuple = (0,) + tuple(mvtuple)
        else:
            if not isinstance(mvtuple[0], int):
                mvtuple = (0,) + tuple(mvtuple)

        if self.accessType == 'w':
            mvtuple = tuple(mvtuple) + (None, None, None)  #user spec could be short
            if vtype == 0:
                self.cur.SetVarNMissingValues(name, mvtuple[0], mvtuple[1], mvtuple[2], mvtuple[3])
            else:
                self.cur.SetVarCMissingValues(name, mvtuple[1], mvtuple[2], mvtuple[3])
        else:
            if vtype == 0:
                mvspec = []
                for i in range(1, len(mvtuple)):
                    if mvtuple[i] == spsslow:
                        mvspec.append("LOWEST")
                    elif mvtuple[i] == spsshigh:
                        mvspec.append("HIGHEST")
                    elif not mvtuple[i] is None:
                        mvspec.append(str(mvtuple[i]))
                if mvtuple[0] > 0:  # range spec (only applies to numeric variables)
                    mvspec = " ".join(mvspec[:1] + ["THRU"] + mvspec[1:])
                else:
                    mvspec = " ".join(mvspec)
            else:
                mvspec = " ".join([spssaux._smartquote(item) for item in mvtuple[1:] if item is not None])
            if mvspec:
                spss.Submit("MISSING VALUES " + name + " (" + mvspec + ")")
                
    def _SetVarAttributes(self, name, attrs):
        if self.accessType == 'w':
            for key, value in attrs.iteritems():
                self.cur.SetVarAttributes(name, key, value, 0)
        else:
            cmd = []
            for key, value in attrs.iteritems():
                cmd.append(key + "(" + spssaux._smartquote(value) + ")")
            spss.Submit("VARIABLE ATTRIBUTE VARIABLES = " + name + " ATTRIBUTE = " + " ".join(cmd))
            
    def _CommitDictionary(self):
        if self.accessType == 'w':
            self.cur.CommitDictionary()
        else:
            self.accessType = 'a'
            self.cur = spss.Cursor(self.indexes, self.accessType)
            self.vartypes = [spss.GetVariableType(i) for i in self.indexes]
            
    def _dateconverter(self, row):
        """convert any values in row specified in cvtDates to Python date values."""
        if not self.cvtDateIndexes:
            return row
        if isinstance(row[self.cvtDateIndexes[0]], datetime.datetime):  #already converted?
            return row
        row = list(row)
        for index in self.cvtDateIndexes:
            row[index] = CvtSpssDatetime(row[index])
        return tuple(row)
            
    
    def fetchone(self):
        """Return the next case from the active dataset.  If omitmissing, return the next case without missing data.
        Note that missing data can cause EOFError to be raised.  Iterators will want to trap this.
        
        If split files is active, IsStartSplit can be called to determine a split boundary."""
        
        self._bufferalloc()
        if not self.accessType in ['r', 'w']:
            raise AttributeError, "fetchone cannot be used with accessType " + self.accessType
        if self.first:
            self.isendsplit = ok1500 and True  # split tracking only available in SPSS 15+
            self.first = False
        else:
            self.isendsplit = False
        while True:
            row = self.cur.fetchone()
            if row is None and ok1500 and self.cur.IsEndSplit():
                self.isendsplit = True
                row = self.cur.fetchone()   # in case split file processing is active
            if row is None:
                raise EOFError
            if self.omitmissing and self.hasmissing(row):
                continue
            return self.rettype(self._dateconverter(row))  # makes named or plain tuple
        
    def IsStartSplit(self):
        """Return True or False according to whether the cursor has crossed a split boundary.
        
        If no cases have been read or fetchall is used, the state is indeterminate, and None is returned.
        This function always returns False prior to SPSS 15 as the underlying support was introduced in that release."""
        
        if not self.accessType in ['r', 'w']:
            raise AttributeError, _msg22
        return ok1500 and self.isendsplit
        
    
    def hasmissing(self, row):
        """Return True if any variable value in current row is user or system missing"""
        
        if self.mvdata == []:
            raise AttributeError, _msg23
        for i in range(self.numvars):
            if ismissing(row[i], self.mvdata[i]):
                return True
        return False
    
    def ismissing(self, var, value):
        """Return True or False according to whether value is a missing value for the variable var in the current cursor.
        
        var is a variable name or index in the cursor.
        value is the value to check.
        makemvchecker must have been called on the cursor in order to use this method."""
        if self.mvdata == []:
            raise AttributeError, _msg23
        try:
            if isinstance(var, basestring):
                var = self.vardict[var]  # convert name to index
            return ismissing(value, self.mvdata[var])
        except:
            raise ValueError, _msg24
    
    def __iter__(self):
        """generator to iterate over all remaining cases in the active dataset."""
        while True:
            try:
                row = self.fetchone()
            except EOFError:
                raise StopIteration
            if not row:
                raise StopIteration
            else:
                yield self.rettype(self._dateconverter(row))
                
    def __del__(self):
        """make sure cursor is closed when the object is deleted."""
        try:
            self.CClose()
        except:
            pass

    def fetchall(self):
        """Fetch all rows of data.  Return a list of tuples or named tuples.
        Omit cases with missing data if constructor specified omitmissing.
        
        If split files is active, fetchall returns all cases in the current split only.  IsStartSplit can be
        used to monitor splits."""
        
        if not self.accessType in ['r', 'w']:
            raise AttributeError, _msg25 + self.accessType
        self.first = False
        self._bufferalloc()
        rows = self.cur.fetchall()
        self.isendsplit = ok1500 and self.cur.IsEndSplit()
        if self.rettype != tuple or self.omitmissing or self.cvtDateIndexes:
            rows = [self.rettype(self._dateconverter(row)) for row in rows if not (self.omitmissing and self.hasmissing(row))]
        return rows
        

    def close(self):
        self.cur.close()
        del self.cur

    def restart(self):
        """Reset the open cursor to the same set of variables for another data pass.
        
        In append mode, any appended cases are committed before the reset."""
        
        if self.accessType == 'a':
            try:
                self.cur.EndChanges()
            except:
                pass
        self.cur.reset()
        self.newvars = []  # any previously added variables are now old
        self.first = True
        self.isendsplit = None

    def varnames(self):
        """Return a list of the variable names being fetched for the cursor."""
        
        return self.namelist
    
    def _bufferalloc(self, extra=0):
        """Ensure that buffer for new variables has been set if in write mode.
        
        self.maxaddbuffer is padding.  extra specifies a known amount to which the padding is added.
        This call will fail prior to 15.0.1 because the api is undefined."""
        # AllocNewVarsBuffer can only be called once for a cursor
        # Call is a no-op except in w mode
        
        try:
            if self.accessType == 'w' and not self.calledalloc:
                self.calledalloc = True
                self.cur.AllocNewVarsBuffer(self.maxaddbuffer+extra)
        except AttributeError:  #api undefined in this version
            pass

def ismissing(value, missingtuple):
    """Return True or False according to whether value is either user or system missing according to the 3 or 4-tuple missingtuple.
    
    missingtuple corresponds to what is returned by GetVarMissingValues or the spssaux Variable class MissingValues2 property"""
    
    #string variables return only a 3-tuple, so must check from the end.  Strings do not support range mv's
    #string values arrive with trailing blanks, but missing values do not.


    stringmv = isinstance(value, basestring)
    if stringmv:
        value = value.rstrip()
    if value is None or value in missingtuple[-3:]:
        return True
    if missingtuple[0] == 0 or stringmv:
        return False
    return missingtuple[1] <= value <= missingtuple[2]
    

def _getIndexInfo(indexes, datelist):
    """Return a duple of a list of variable indexes and a list of variable names.
    
    indexes is a sequence of SPSS dictionary slot numbers, variable names, or a single VariableDict object.
    If indexes is empty or "ALL", all the variables are retrieved.
    If datelist, then only SPSS date variables are included, and no variable name list is returned."""

    namelist = []
    if not indexes or indexes[0] == "ALL":
        indexes = range(spss.GetVariableCount())
    numrequestedvars = len(indexes)

    if isinstance(indexes[0], basestring):      #list of names
        #namelist = list(copy.copy(indexes))
        vdict = spssaux.VariableDict(list(indexes))
        indexes = [int(v) for v in vdict]
    elif isinstance(indexes[0], spssaux.VariableDict):
        if len(indexes) > 1:
            raise ValueError, _msg26
        namelist = [v.VariableName for v in indexes[0]]
        indexes = [int(v) for v in indexes[0]]
        
    if not namelist:
        namelist = [spss.GetVariableName(int(i)) for i in indexes]
    if len(indexes) < numrequestedvars:
        raise ValueError, _msg27
    if datelist:
        indexes = filter(spssaux.isDateVariable, indexes)  # restrict to date variables

    if datelist:
        return indexes
    else:
        return (indexes, namelist)
    
class Dataset(object):
    """Simple class for managing dataset operations.  The constructor takes a string as the dataset name"""
    
    def __init__(self, dsname):
        self.dsname = dsname
    def __str__(self):   # for unaware users of the object
        return self.dsname
    def name(self):
        spss.Submit("DATASET NAME " + self.dsname)
    def activate(self):
        spss.Submit("DATASET ACTIVATE " + self.dsname)
    def declare(self):
        spss.Submit("DATASET DECLARE " + self.dsname)
    def close(self):
        spss.Submit("DATASET CLOSE " + self.dsname)

def CvtSpssDatetime(dt):
    """Return a Python datetime object from an SPSS datetime value.
    
    Note that SPSS day of week and month of year values cannot be converted."""

    if not dt:
        return None
    if dt < 86400:
        raise ValueError, _msg28

    #t = dt//86400   # date part
    #time = dt % 86400
    t, time = divmod(dt, 86400)
    j = t + 578041
    y = (4*j-1)//146097
    j = (4*j) -1 - (146097*y)
    d = j//4
    j = (4*d+3)//1461
    d = (4*d) + 3 -(1461*j)
    d = (d+4)//4
    m = (5*d-3)//153
    d = (5*d) - 3 - (153*m)
    d = (d+5)//5
    y = 100*y + j
    if (m < 10):
        m = m + 3
    else:
        m = m - 9
        y = y + 1
        
    itime = int(time)
    hr, itime = divmod(itime, 60*60)
    minute, itime = divmod(itime, 60)
    return datetime.datetime(int(y), int(m), int(d), int(hr), int(minute), int(itime), int((time-int(time))*10**6))
    
def yrmodasec(ymd):
    """Compute SPSS internal date value from four digit year, month, and day and optional time.
    
    ymd is a sequence of numbers in that order.  The numbers will be truncated to integers.
    If there are 4, 5, or 6 parts to the tuple, they are assumed to be h, m, and s and are not
    truncated.  The omitted parts are considered to be zero.
    The result is equivalent to the SPSS subroutine yrmoda result converted to seconds
    except that hms is an extension."""
    
    if not 3 <= len(ymd) <= 6:
        raise ValueError, _msg29
    year = int(ymd[0])
    month = int(ymd[1])
    day = int(ymd[2])
    
    if year < 1582 or month < 1 or month > 13 or day < 0 or day > 31:
        raise ValueError, ("Invalid date value: %d %d %d") % (year, month, day)
    yrmo = year * 365 + (year+3)//4 - (year+99)//100 + (year + 399)//400 \
         + 3055 *(month+2)//100 - 578192
    if month > 2:
        yrmo -= 2
        if (year%4 == 0 and (year%100 != 0 or year%400 ==0)):
            yrmo += 1
    ret = (yrmo + day) * 86400   #24 * 60 * 60
    # allow for h, hm, or hms
    for i in range(3, len(ymd)):
        ret += ymd[i] * 60**(5-i)
    return ret