Source code for gpudb

##############################################
# Autogenerated GPUdb Python API file. 
# 
# *****Do NOT modify this file***** 
# 
##############################################

# ---------------------------------------------------------------------------
# gpudb.py - The Python API to interact with a GPUdb server.
#
# Copyright (c) 2018 Kinetica DB Inc.
# ---------------------------------------------------------------------------

from __future__ import print_function

try:
    from io import BytesIO
except:
    from cStringIO import StringIO as BytesIO
try:
    import httplib
except:
    import http.client as httplib
import base64
import os, sys
import datetime
import json
import random
import re
import ssl
import time
import traceback
import uuid

from collections import Iterator
from decimal import Decimal

# We'll need to do python 2 vs. 3 things in many places
IS_PYTHON_3 = (sys.version_info[0] >= 3) # checking the major component
IS_PYTHON_27_OR_ABOVE = sys.version_info >= (2, 7)

if IS_PYTHON_3:
    long = int
    basestring = str
    class unicode:
        pass

# ---------------------------------------------------------------------------
# The absolute path of this gpudb.py module for importing local packages
gpudb_module_path = __file__
if gpudb_module_path[len(gpudb_module_path)-3:] == "pyc": # allow symlinks to gpudb.py
    gpudb_module_path = gpudb_module_path[0:len(gpudb_module_path)-1]
if os.path.islink(gpudb_module_path): # allow symlinks to gpudb.py
    gpudb_module_path = os.readlink(gpudb_module_path)
if not os.path.isabs(gpudb_module_path): # take care of relative symlinks
    gpudb_module_path = os.path.join(os.path.dirname(__file__), gpudb_module_path)
gpudb_module_path = os.path.dirname(os.path.abspath(gpudb_module_path))

# Search for our modules first, probably don't need imp or virt envs.
if not gpudb_module_path in sys.path :
    sys.path.insert(1, gpudb_module_path)
if not gpudb_module_path + "/packages" in sys.path :
    sys.path.insert(1, gpudb_module_path + "/packages")


# ---------------------------------------------------------------------------
# Local imports after adding our module search path


# C-extension classes for avro encoding/decoding
from protocol import RecordColumn
from protocol import RecordType
from protocol import Record
from protocol import Schema

from avro import schema, datafile, io


if IS_PYTHON_27_OR_ABOVE:
    import collections
else:
    import ordereddict as collections # a separate package


# Override some python3 avro things
if IS_PYTHON_3:
    schema.parse = schema.Parse
    schema.RecordSchema.fields_dict = schema.RecordSchema.field_map



have_snappy = False
try:
    import snappy
    have_snappy = True
except ImportError:
    have_snappy = False

from tabulate import tabulate



# Some string constants used throughout the program
class C:
    """Some string constants used throughout the program."""

    _fields = "fields"

    # /show/table response
    _table_descriptions = "table_descriptions"
    _collection   = "COLLECTION"
    _view         = "VIEW"
    _replicated   = "REPLICATED"
    _join         = "JOIN"
    _result_table = "RESULT_TABLE"
    _total_full_size = "total_full_size"
    _additional_info = "additional_info"
    _collection_name = "collection_names"

    # /show/system/properties response
    _property_map = "property_map"
    _gaia_version = "version.gpudb_core_version"

# end class C



# ---------------------------------------------------------------------------
# GPUdbException - Exception for Generic GPUdb Issues
# ---------------------------------------------------------------------------
class GPUdbException( Exception ):

    # We need to handle the given exception differenlty for different pythons
    if IS_PYTHON_3:
        def __init__( self, value ):
            self.value = value
            if isinstance(value, (basestring, unicode)):
                # We got a message only
                self.message = value
                self.traceback_msg = ""
            elif isinstance(value, Exception):
                # Preserve the message and also the stack trace
                self.message = value.args[0]
                self.traceback_msg = "".join( traceback.format_exception( sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2] ) )
        # end __init__
    else:
        def __init__( self, value ):
            self.value = value
            if isinstance(value, (basestring, unicode)):
                # We got a message only
                self.message = value
                self.traceback_msg = ""
            elif isinstance(value, Exception):
                # Preserve the message and also the stack trace
                self.message = value.message
                self.traceback_msg = "".join( traceback.format_exception( sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2] ) )
        # end __init__
    # end if

    def __str__( self ):
        return repr( self.value )
    # end __str__


    def get_formatted_traceback( self ):
        """If this exception is created from another exception,
        then returns a string containing the original traceback.
        Otherwise, returns an empty string.
        """
        return self.traceback_msg
    # end get_formatted_traceback
    
# end class GPUdbException


# ---------------------------------------------------------------------------
# GPUdbConnectionException - Exception for HTTP Issues
# ---------------------------------------------------------------------------
class GPUdbConnectionException( GPUdbException ):

    def __init__( self, value ):
        self.value = value
        self.message = value
    # end __init__

    def __str__( self ):
        return repr( self.value )
    # end __str__
    
# end class GPUdbConnectionException


# ---------------------------------------------------------------------------
# _ConnectionToken - Private wrapper class to manage connection logic
# ---------------------------------------------------------------------------
class _ConnectionToken(object):
    """Internal wrapper class to handle multiple server logic."""
    def __init__(self, host, port, host_manager_port, connection):
        if not isinstance(host, (basestring, unicode)):
            raise GPUdbException( "Expected a string host address, got: '"+str(host)+"'" )

        # host may take the form of :
        #  - "https://user:password@domain.com:port/path/"

        if host.startswith("http://") :    # Allow http://, but remove it.
            host = host[7:]
        elif host.startswith("https://") : # Allow https://, but remove it.
            host = host[8:]
            connection = "HTTPS" # force it

        # Parse the username and password, if supplied.
        host_at_sign_pos = host.find('@')
        if host_at_sign_pos != -1 :
            user_pass = host[:host_at_sign_pos]
            host = host[host_at_sign_pos+1:]
            user_pass_list = user_pass.split(':')
            username = user_pass_list[0]
            if len(user_pass_list) > 1 :
                password = user_pass_list[1]

        url_path = ""
        # Find the URL /path/ and remove it to get the ip address.
        host_path_pos = host.find('/')
        if host_path_pos != -1:
            url_path = host[host_path_pos:]
            if url_path[-1] == '/':
                url_path = url_path[:-1]
            host = host[:host_path_pos]

        # Override default port if specified in ip address
        host_port_pos = host.find(':')
        if host_port_pos != -1 :
            port = host[host_port_pos+1:]
            host = host[:host_port_pos]

        # Port does not have to be provided if using standard HTTP(S) ports.
        if (port == None) or len(str(port)) == 0:
            if connection == 'HTTP' :
                port = 80
            elif connection == 'HTTPS' :
                port = 443

        # Validate the head node port
        try :
            port = int( port )
        except :
            raise GPUdbException( "Expected a numeric port, got: '" + str(port) + "'" )
        # Validate the host manager port
        try :
            host_manager_port = int( host_manager_port )
        except :
            raise GPUdbException( "Expected a numeric host manager port, got: '" + str(host_manager_port) + "'" )

        # Port values must be within (0, 65536)
        if ( (port <= 0) or (port >= 65536) ):
            raise GPUdbException( "Expected a valid port (1-65535), got: '"+str(port)+"'" )
        if ( (host_manager_port <= 0) or (host_manager_port >= 65536) ):
            raise GPUdbException( "Expected a valid host manager port (1-65535), got: '"+str(host_manager_port)+"'" )
        # Must have at least one host
        if not (len(host) > 0):
            raise GPUdbException( "Expected a valid host address, got an empty string." )
        # Valid protocols: http and https
        if connection not in ["HTTP", "HTTPS"]:
            raise GPUdbException( "Expected connection to be 'HTTP' or 'HTTPS', got: '"+str(connection)+"'" )

        self._host              = str( host )
        self._port              = int(port)
        self._host_manager_port = int(host_manager_port)
        self._connection        = str( connection )
        self._gpudb_url_path    = str( url_path )
    # end __init__
# end class _ConnectionToken


# ---------------------------------------------------------------------------
# Utility Functions
# ---------------------------------------------------------------------------
class _Util(object):

    @staticmethod
    def is_ok( response_object ):
        """Returns True if the response object's status is OK."""
        return (response_object['status_info']['status'] == 'OK')
    # end is_ok


    @staticmethod
    def get_error_msg( response_object ):
        """Returns the error message for the query, if any.  None otherwise."""
        if (response_object['status_info']['status'] != 'ERROR'):
            return None
        return response_object['status_info']['message']
    # end get_error_msg


    @staticmethod
    def is_list_or_dict( arg ):
        """Returns whether the given argument either a list or a dict
        (or an OrderedDict).
        """
        return ( isinstance( arg, list )
                 or isinstance( arg, dict )
                 or isinstance( arg, collections.OrderedDict ) )
    # end is_list_or_dict

    if IS_PYTHON_3:
        # Declaring the python 3 version of this static method
        @staticmethod
        def str_to_bytes(value):
            return bytes( ord(b) for b in value )
        # end str_to_bytes
    else:
        # Declaring the python 2 version of this static method
        @staticmethod
        def str_to_bytes(value):
            if isinstance(value, unicode):
                data = bytes()
                for c in value:
                    data += chr(ord(c))
                return data
            else:
                # The python 2 output
                return value
        # end str_to_bytes
    # end py 2 vs. 3


    @staticmethod
    def ensure_bytes(value):
        if isinstance(value, bytes) and not isinstance(value, str):
            return value
        elif isinstance(value, basestring):
            return _Util.str_to_bytes(value)
        else:
            raise Exception("Unhandled data type: " + str(type(value)))
    # end ensure_bytes


    @staticmethod
    def bytes_to_str(value):
        return ''.join([chr(b) for b in value])
    # end bytes_to_str


    @staticmethod
    def ensure_str(value):
        if isinstance(value, basestring):
            if ( ( not isinstance(value, unicode) )
                 and (not IS_PYTHON_3) ): # Python 2
                return unicode( value, 'utf-8' )
            # Python 3
            return value
        elif isinstance(value, bytes):
            return _Util.bytes_to_str(value)
        else:
            raise Exception("Unhandled data type: " + str(type(value)))
    # end ensure_str


    @staticmethod
    def convert_dict_bytes_to_str(value):
        for key in list(value):
            val = value[key]
            if isinstance(val, bytes) and not isinstance(val, str):
                value[key] = ''.join([chr(b) for b in val])
            elif isinstance(val, dict):
                value[key] = _Util.convert_dict_bytes_to_str(val)
        return value
    # end convert_dict_bytes_to_str


    @staticmethod
    def decode_binary_data( SCHEMA, encoded_data ):
        """Given a schema and binary encoded data, decode it.
        """
        encoded_data = _Util.ensure_bytes( encoded_data )
        output = BytesIO( encoded_data )
        bd = io.BinaryDecoder( output )
        reader = io.DatumReader( SCHEMA )
        out = reader.read( bd ) # read, give a decoder
        return out
    # end decode_binary_data


    @staticmethod
    def encode_binary_data( SCHEMA, raw_data, encoding = "binary" ):
        """Given a schema and raw data, encode it.
        """
        output = BytesIO()
        be = io.BinaryEncoder( output )

        # Create a 'record' (datum) writer
        writer = io.DatumWriter( SCHEMA )
        writer.write( raw_data, be )

        result = None
        if encoding.lower() == 'json':
            result = _Util.ensure_str( output.getvalue() )
        else:
            result = output.getvalue()
        return result
    # end encode_binary_data


    @staticmethod
    def encode_binary_data_cext( SCHEMA, raw_data, encoding = "binary" ):
        """Given a schema and raw data, encode it.
        """
        result = None
        if encoding.lower() == 'json':
            result = _Util.ensure_str( output.getvalue() )
        else:
            result = SCHEMA.encode( raw_data )
        return result
    # end encode_binary_data_cext



    # Regular expression needed for converting records to protocol.Record objects
    re_datetime_full  = re.compile("^\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}:\d{2}\.\d{1,3}\Z")
    re_datetime_noMS  = re.compile("^\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}:\d{2}\Z")
    re_date_only      = re.compile("^\d{4}-\d{2}-\d{2}\Z")
    re_time_only_ms   = re.compile("^\d{1,2}:\d{2}:\d{2}\.\d{1,3}\Z")
    re_time_only_noMS = re.compile("^\d{1,2}:\d{2}:\d{2}\Z")

    @staticmethod
    def convert_binary_data_to_cext_records( db, table_name, records, record_type = None ):
        """Given a list of objects, convert them to either bytes or Record objects.
        If the records are already of type Record, do nothing.  If not, then, if the record
        type is given, convert the records into Record objects.

        If the associated RecordType is not given, then it is assumed that they have already
        been encoded using the python avro package.

        Each record can be a list of values (in that case, it is assumed that the values
        are given in order of column declaration), a dict, or an OrderedDict.

        Parameters:
            db (GPUdb)
                A client handle for the connection to the database.
            table_name (str)
                The name of the table to which the records are associated,
                must be the name of an existing table.
            records (list of Records, lists, dicts, or OrderedDicts)
                A list of records.  Each record can be a list of values,
                a dict, an OrderedDict, or a Record.
            record_type (RecordType)
                The type for the records.  If not given, then it will be
                deduced by invoking :meth:`GPUdb.show_table`.  Default None.

        Returns:
            A tuple the first element of which is a boolean indicating whether
            the records are encoded into the c-extension Record objects, and the
            second element is the list of encoded records.  If avro encoding is used,
            then the encoded records are simply bytes.  If the c-extension avro
            package is used, then the encoded records are Record objects.
        """
        if not records: # empty list; nothing to do
            return (True, records)

        # If all the objects are of type Record, no conversion is necessary
        if all( [ isinstance(r, Record) for r in records ] ):
            # True == the records of c-extension Record type
            return (True, records)

        if not record_type:
            # False == the records were NOT converted to c-extension Record objects
            # (it is assumed that the python avro package has been used to encode
            # the records)
            return (False, records)
        
        # If the record type is given, ensure that it is a RecordType
        if not isinstance( record_type, RecordType):
            raise GPUdbException( "Argument 'record_type' must be a RecordType object; "
                                  "given {}".format( str(type( record_type )) ) )

        # Now convert each record object into Record
        converted_records = []
        try:
            for obj in records:
                # Each record object's type will be individually assessed and the
                # relevant conversion be applied
                if isinstance( obj, Record ):
                    # Already a Record
                    converted_records.append( obj )
                    continue # skip to the next object
                elif isinstance( obj, GPUdbRecord ):
                    # A GPUdbRecord ; get the (column name, column value) pairs
                    obj = obj.data
                elif isinstance( obj, list ):
                    # A list is given; create (col name, col value) pairs; using the dict constructor
                    # to support python 2.6)
                    obj = dict( [ (record_type[ i ].name, col_val) for (i, col_val) in enumerate( obj ) ] )
                    # obj = { record_type[ i ].name: col_val for (i, col_val) in enumerate( obj ) }
                elif not isinstance( obj, (dict, collections.OrderedDict)):
                    raise GPUdbException( "Unrecognized format for record (accepted: "
                                          "Record, GPUdbRecord, list, dict, OrderedDict): "
                                          + str(type( obj )) )
                # end if

                # Create a Record object based on the column values
                record = Record( record_type )
                for column in record_type:
                    col_name = column.name
                    col_value = obj[ col_name ]
                
                    # Handle nulls
                    if col_value is None:
                        record[ col_name ] = col_value
                        continue
                    # end if
                
                    # Get column data type
                    col_data_type = column.data_type

                    # Handle unicode
                    if (col_data_type == "string"):
                        if (not IS_PYTHON_3):
                            col_value = _Util.ensure_str( col_value )
                    elif (col_data_type == "decimal"): # Handle decimal
                        raise GPUdbException("TODO: *********type 'decimal' not supported yet*********")
                    elif (col_data_type == "ipv4"): # Handle IPv4
                        raise GPUdbException("TODO: *********type 'ipv4' not supported yet*********")
                    elif (col_data_type == "bytes"):
                        col_value = _Util.ensure_bytes( col_value )

                    # NO NEED TO CHECK DATE & TIME FORMATS DUE TO "init_with_now";
                    # but keeping it around in case the C-module code changes again.
                    # # Handle datetime
                    # elif (col_data_type == "datetime"):
                    #     # Conversion needed if it is NOT already a datetime struct
                    #     if not isinstance( col_value, datetime.datetime ):
                    #         # Better be a string if not a datetime object
                    #         if not isinstance( col_value, basestring ):
                    #             raise GPUdbException( "'datetime' type column value must be a datetime "
                    #                                   "object or a string, given {}".format( str( type( col_value ) ) ) )

                    #         col_value = col_value.strip()
                        
                    #         if _Util.re_datetime_full.match( col_value ):
                    #             # Full datetime with time (including milliseconds)
                    #             col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d %H:%M:%S.%f" )
                    #         elif _Util.re_datetime_noMS.match( col_value ):
                    #             # Date and time, but no milliseconds
                    #             col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d %H:%M:%S" )
                    #         elif _Util.re_date_only.match( col_value ):
                    #             # Date only (no time)
                    #             col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d" )
                    #         else:
                    #             raise GPUdbException( "Could not convert value to datetime pattern ('YYYY-MM-DD [HH:MM:SS[.mmm]]'); "
                    #                                   "given '{}'".format( col_value ) )
                    #         # end if
                    #     # end if
                    # elif (col_data_type == "date"): # Handle date
                    #     # Conversion needed if it is NOT already a date struct
                    #     if not isinstance( col_value, datetime.date ):
                    #         print ("Got date; type is {}; value is '{}'".format( str(type(col_value)), col_value ) ) # debug~~~~~~
                    #         # Better be a string if not a date object
                    #         if not isinstance( col_value, basestring ):
                    #             raise GPUdbException( "'date' type column value must be a datetime.date "
                    #                                   "object or a string, given {}".format( str( type( col_value ) ) ) )

                    #         col_value = col_value.strip()

                    #         # Check that it matches the date pattern
                    #         if _Util.re_date_only.match( col_value ):
                    #             col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d" ).date()
                    #         else:
                    #             raise GPUdbException( "Could not convert value to date pattern ('YYYY-MM-DD'); "
                    #                                   "given '{}'".format( col_value ) )
                    #         # end if
                    #     # end if
                    # elif (col_data_type == "time"): # Handle time
                    #     # Conversion needed if it is NOT already a time struct
                    #     if not isinstance( col_value, datetime.time ):
                    #         # Better be a string if not a time object
                    #         if not isinstance( col_value, basestring ):
                    #             raise GPUdbException( "'time' type column value must be a datetime.time "
                    #                                   "object or a string, given {}".format( str( type( col_value ) ) ) )

                    #         col_value = col_value.strip()

                    #         # Check that it matches the allowed time patterns
                    #         if _Util.re_time_only_ms.match( col_value ):
                    #             # Time with milliseconds
                    #             col_value = datetime.datetime.strptime( col_value, "%H:%M:%S.%f" ).time()
                    #         elif _Util.re_time_only_noMS.match( col_value ):
                    #             # Time without milliseconds
                    #             col_value = datetime.datetime.strptime( col_value, "%H:%M:%S" ).time()
                    #         else:
                    #             raise GPUdbException( "Could not convert value to time pattern ('HH:MM:SS[.mmm]'); "
                    #                                   "given '{}'".format( col_value ) )
                    #         # end if
                    #     # end if
                    # end handling special data type conversions
                
                    record[ col_name ] = col_value
                # end inner loop
            
                converted_records.append( record )
            # end loop
        except GPUdbException as e:
            raise
        except KeyError as e:
            raise GPUdbException( "Missing column value for '{}'".format( str(e) ) )
        except:
            raise GPUdbException( str( sys.exc_info()[1] ) )

        # True == the records were converted to c-extension Record objects
        return (True, converted_records)
    # end convert_binary_data_to_cext_records

    
    # ----------- Begin override of strftime ------------------
    # Override datetime's strftime which in python does not accept
    # years before 1900--how annoying!

    # remove the unsupposed "%s" command.  But don't
    # do it if there's an even number of %s before the s
    # because those are all escaped.  Can't simply
    # remove the s because the result of
    #  %sY
    # should be %Y if %s isn't supported, not the
    # 4 digit year.
    _illegal_s = re.compile(r"((^|[^%])(%%)*%s)")

    @staticmethod
    def __findall(text, substr):
         # Also finds overlaps
         sites = []
         i = 0
         while 1:
             j = text.find(substr, i)
             if j == -1:
                 break
             sites.append(j)
             i=j+1
         return sites
    # end __findall


    # Every 28 years the calendar repeats, except through century leap
    # years where it's 6 years.  But only if you're using the Gregorian
    # calendar.  ;)

    @staticmethod
    def strftime(dt, fmt):
        if _Util._illegal_s.search(fmt):
            raise TypeError("This strftime implementation does not handle %s")
        if dt.year > 1900:
            return dt.strftime(fmt)

        # Handle the microsecond, if desired in the format
        microsecond = None
        if ".%f" in fmt:
            # Zero-padded six-digit microseconds
            microsecond = ( "."
                            + ("{f}".format( f = dt.microsecond )).rjust( 6, '0' ) )
            # Remove .%f from the format
            fmt = fmt.replace(".%f", "")
        # end if

        year = dt.year
        # For every non-leap year century, advance by
        # 6 years to get into the 28-year repeat cycle
        delta = 2000 - year
        off = 6*(delta // 100 + delta // 400)
        year = year + off

        # Move to around the year 2000
        year = year + ((2000 - year)//28)*28
        timetuple = dt.timetuple()
        s1 = time.strftime(fmt, (year,) + timetuple[1:])
        sites1 = _Util.__findall(s1, str(year))

        s2 = time.strftime(fmt, (year+28,) + timetuple[1:])
        sites2 = _Util.__findall(s2, str(year+28))

        sites = []
        for site in sites1:
            if site in sites2:
                sites.append(site)

        s = s1
        syear = "%4d" % (dt.year,)
        for site in sites:
            s = s[:site] + syear + s[site+4:]
        # end loop

        if microsecond:
            s += microsecond

        return s
    # end strftime

    # ----------- end override ------------------


    @staticmethod
    def convert_cext_records_to_ordered_dicts( records ):
        """Given a list of Record objects, convert them to OrderedDicts if the
        record type contains any date, time, datetime types. Otherwise,
        the records (of Record type) will be returned without
        any conversion since they are equivalent to OrderedDicts.

        If the records are already of type GPUdbRecord or OrderedDicts, do
        nothing (return those)

        Parameters:
            records (list of Records, lists, dicts, or OrderedDicts)
                A list of records.  Each record can be a list of values,
                a dict, an OrderedDict, or a Record.

        Returns:
            If the record type contains any date, time, datetime, then they will
            be converted to strings and a list of OrderedDicts will be returned.
            Otherwise, the records (of Record type) will be returned without
            any conversion since they are equivalent to OrderedDicts.
        """
        if not records: # empty list
            return records

        # If all the objects are OrderedDicts or GPUdbRecords, no conversion is necessary
        if isinstance( records[0], (GPUdbRecord, collections.OrderedDict) ):
            return records

        # If a conversion is necessary, make sure that all objects are Records
        if not all( [ isinstance(r, Record) for r in records ] ):
            raise GPUdbException( "Either all records must be Record objects or none; "
                                  "a mix is given." )

        # Check if the record contains any date, time, and datetime types
        types_needing_conversion = ["datetime", "date", "time", "decimal", "ipv4"]
        record_type = records[ 0 ].type
        columns_needing_conversion = [ column for column in record_type
                                       if (column.data_type in types_needing_conversion) ]

        if not columns_needing_conversion:
            return records

        # Create OrderedDict objects with the special column values converted
        # to strings
        converted_records = []
        for obj in records:
            # Create an OrderedDict object based on the record
            record = collections.OrderedDict( map( list, obj.items() ) )

            # We only need to convert the special columns
            for column in columns_needing_conversion:
                col_name = column.name
                col_value = record[ col_name ]
                
                # Handle nulls
                if col_value is None:
                    record[ col_name ] = col_value
                    continue
                # end if
                
                # Get column data type
                col_data_type = column.data_type

                # For now, all datetime formats are just simple strings; so need
                # to do the following checks anymore; but keeping it around in case
                # the C-module code changes again.
                # if (col_data_type == "datetime"):
                #     col_value = _Util.strftime( col_value, "%Y-%m-%d %H:%M:%S.%f" )[ : -3 ]
                # elif (col_data_type == "date"): # Handle date
                #     col_value = _Util.strftime( col_value, "%Y-%m-%d" )
                # elif (col_data_type == "time"): # Handle time
                #     col_value = col_value.strftime( "%H:%M:%S.%f" )[ : -3 ]

                # Handle decimal and IPv4
                if (col_data_type == "decimal"): # Handle decimal
                    raise GPUdbException("TODO: *********type 'decimal' not supported yet*********")
                elif (col_data_type == "ipv4"): # Handle IPv4
                    raise GPUdbException("TODO: *********type 'ipv4' not supported yet*********")
                # end handling special data type conversions
                
                record[ col_name ] = col_value
            # end inner loop
            
            converted_records.append( record )
        # end loop

        return converted_records
    # end convert_cext_records_to_ordered_dicts
    
    
# end class _Util

# ---------------------------------------------------------------------------
# Utility Classes
# ---------------------------------------------------------------------------
class AttrDict(dict):
    """Converts a dictionary into a class object such that the entries in the
    dict can be accessed using dot '.' notation.
    """
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self
    # end init

    def is_ok( self ):
        """Returns True if the response object's status is OK."""
        try:
            return (self.__dict__['status_info']['status'] == 'OK')
        except KeyError as ex:
            raise GPUdbException( "Unknown wrapped object; could not find "
                                  " the following key: {}".format( str(ex) ) )
    # end is_ok


    def get_error_msg( self ):
        """Returns the error message for the query, if any.  None otherwise."""
        try:
            if (self.__dict__['status_info']['status'] != 'ERROR'):
                return None
            return self.__dict__['status_info']['message']
        except KeyError as ex:
            raise GPUdbException( "Unknown wrapped object; could not find "
                                  " the following key: {}".format( str(ex) ) )
    # end get_error_msg

    
# end class AttrDict


# ---------------------------------------------------------------------------
# GPUdbColumnProperty - Class to Handle GPUdb Column Properties
# ---------------------------------------------------------------------------

[docs]class GPUdbColumnProperty(object): """Column properties used for GPUdb record data types. The properties are class-level read-only properties, so the user can use them as such:: GPUdbColumnProperty.prop_name """ DATA = "data" """str: Default property for all numeric and string type columns; makes the column available for GPU queries. """ TEXT_SEARCH = "text_search" """str: Valid only for 'string' columns. Enables full text search for string columns. Can be set independently of *data* and *store_only*. """ STORE_ONLY = "store_only" """str: Persist the column value but do not make it available to queries (e.g. :meth:`.filter`)-i.e. it is mutually exclusive to the *data* property. Any 'bytes' type column must have a *store_only* property. This property reduces system memory usage. """ DISK_OPTIMIZED = "disk_optimized" """str: Works in conjunction with the *data* property for string columns. This property reduces system disk usage by disabling reverse string lookups. Queries like :meth:`.filter`, :meth:`.filter_by_list`, and :meth:`.filter_by_value` work as usual but :meth:`.aggregate_unique`, :meth:`.aggregate_group_by` and :meth:`.get_records_by_column` are not allowed on columns with this property. """ TIMESTAMP = "timestamp" """str: Valid only for 'long' columns. Indicates that this field represents a timestamp and will be provided in milliseconds since the Unix epoch: 00:00:00 Jan 1 1970. Dates represented by a timestamp must fall between the year 1000 and the year 2900. """ DECIMAL = "decimal" """str: Valid only for 'string' columns. It represents a SQL type NUMERIC(19, 4) data type. There can be up to 15 digits before the decimal point and up to four digits in the fractional part. The value can be positive or negative (indicated by a minus sign at the beginning). This property is mutually exclusive with the *text_search* property. """ DATE = "date" """str: Valid only for 'string' columns. Indicates that this field represents a date and will be provided in the format 'YYYY-MM-DD'. The allowable range is 1000-01-01 through 2900-01-01. This property is mutually exclusive with the *text_search* property. """ TIME = "time" """str: Valid only for 'string' columns. Indicates that this field represents a time-of-day and will be provided in the format 'HH:MM:SS.mmm'. The allowable range is 00:00:00.000 through 23:59:59.999. This property is mutually exclusive with the *text_search* property. """ DATETIME = "datetime" """str: Valid only for 'string' columns. Indicates that this field represents a datetime and will be provided in the format 'YYYY-MM-DD HH:MM:SS.mmm'. The allowable range is 1000-01-01 00:00:00.000 through 2900-01-01 23:59:59.999. This property is mutually exclusive with the *text_search* property. """ CHAR1 = "char1" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 1 character. """ CHAR2 = "char2" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 2 characters. """ CHAR4 = "char4" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 4 characters. """ CHAR8 = "char8" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 8 characters. """ CHAR16 = "char16" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 16 characters. """ CHAR32 = "char32" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 32 characters. """ CHAR64 = "char64" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 64 characters. """ CHAR128 = "char128" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 128 characters. """ CHAR256 = "char256" """str: This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 256 characters. """ INT8 = "int8" """str: This property provides optimized memory and query performance for int columns. Ints with this property must be between -128 and +127 (inclusive) """ INT16 = "int16" """str: This property provides optimized memory and query performance for int columns. Ints with this property must be between -32768 and +32767 (inclusive) """ IPV4 = "ipv4" """str: This property provides optimized memory, disk and query performance for string columns representing IPv4 addresses (i.e. 192.168.1.1). Strings with this property must be of the form: A.B.C.D where A, B, C and D are in the range of 0-255. """ WKT = "wkt" """str: Valid only for 'string' and 'bytes' columns. Indicates that this field contains geospatial geometry objects in Well-Known Text (WKT) or Well-Known Binary (WKB) format. """ PRIMARY_KEY = "primary_key" """str: This property indicates that this column will be part of (or the entire) `primary key <../../../concepts/tables.html#primary-keys>`_. """ SHARD_KEY = "shard_key" """str: This property indicates that this column will be part of (or the entire) `shard key <../../../concepts/tables.html#shard-keys>`_. """ NULLABLE = "nullable" """str: This property indicates that this column is nullable. However, setting this property is insufficient for making the column nullable. The user must declare the type of the column as a union between its regular type and 'null' in the avro schema for the record type in input parameter *type_definition*. For example, if a column is of type integer and is nullable, then the entry for the column in the avro schema must be: ['int', 'null']. The C++, C#, Java, and Python APIs have built-in convenience for bypassing setting the avro schema by hand. For those languages, one can use this property as usual and not have to worry about the avro schema for the record. """ DICT = "dict" """str: This property indicates that this column should be dictionary encoded. It can only be used in conjunction with string columns marked with a charN or date property or with int or long columns. This property is appropriate for columns where the cardinality (the number of unique values) is expected to be low, and can save a large amount of memory. """ INIT_WITH_NOW = "init_with_now" """str: For columns with attributes of date, time, datetime or timestamp, at insert time, replace empty strings and invalid timestamps with NOW() """
# end class GPUdbColumnProperty # --------------------------------------------------------------------------- # GPUdbRecordColumn - Class to Handle GPUdb Record Column Data Types # ---------------------------------------------------------------------------
[docs]class GPUdbRecordColumn(object): """Represents a column in a GPUdb record object (:class:`.GPUdbRecordType`). """ class _ColumnType(object): """A class acting as an enum for the data types allowed for a column.""" INT = "int" LONG = "long" FLOAT = "float" DOUBLE = "double" STRING = "string" BYTES = "bytes" # end class _ColumnType # The allowe data types _allowed_data_types = [ _ColumnType.INT, _ColumnType.LONG, _ColumnType.FLOAT, _ColumnType.DOUBLE, _ColumnType.STRING, _ColumnType.BYTES ] # All non-numeric data types _non_numeric_data_types = [ _ColumnType.STRING, _ColumnType.BYTES ] # All allowed numeric data types _numeric_data_types = [ _ColumnType.INT, _ColumnType.LONG, _ColumnType.FLOAT, _ColumnType.DOUBLE ] # All allowed integral numeric data types _numeric_integral_data_types = [ _ColumnType.INT, _ColumnType.LONG ] # All allowed decimal numeric data types _numeric_decimal_data_types = [ _ColumnType.FLOAT, _ColumnType.DOUBLE ] def __init__( self, name, column_type, column_properties = None, is_nullable = False ): """Construct a GPUdbRecordColumn object. Parameters: name (str) The name of the column, must be a non-empty string. column_type (str) The data type of the column. Must be one of int, long, float, double, string, bytes. column_properties (list) Optional list of properties for the column. is_nullable (bool) Optional boolean flag indicating whether the column is nullable. """ # Validate and save the stringified name if (not name): raise GPUdbException( "The name of the column must be a non-empty string; given " + repr(name) ) self._name = name # Validate and save the data type if column_type not in self._allowed_data_types: raise GPUdbException( "Data type must be one of " + str(self._allowed_data_types) + "; given " + str(column_type) ) self._column_type = column_type # Validate and save the column properties if not column_properties: # it's ok to not have any column_properties = [] if not isinstance( column_properties, list ): raise GPUdbException( "'column_properties' must be a list; given " + str(type(column_properties)) ) # Sort and stringify the column properties so that the order for a given set of # properties is always the same--handy for equivalency checks self._column_properties = sorted( column_properties, key = lambda x : str(x[0]) ) # Check for nullability self._is_nullable = False # default value if (GPUdbColumnProperty.NULLABLE in self.column_properties): self._is_nullable = True # Check the optional 'is_nullable' argument if is_nullable not in [True, False]: raise GPUdbException( "'is_nullable' must be a boolean value; given " + repr(type(is_nullable)) ) if (is_nullable == True): self._is_nullable = True # Enter the 'nullable' property into the list of propertie, even though # GPUdb doesn't actually use it (make sure not to make duplicates) if (GPUdbColumnProperty.NULLABLE not in self._column_properties): self._column_properties.append( GPUdbColumnProperty.NULLABLE ) # Re-sort for equivalency tests down the road self._column_properties = sorted( self._column_properties, key = lambda x : str(x[0]) ) # end inner if # end if # end __init__ @property def name(self): # read-only name """The name of the column.""" return self._name # end name @property def column_type(self): # read-only column_type """The data type of the column.""" return self._column_type # end column_type @property def column_properties(self): # read-only column_properties """The properties of the column.""" return self._column_properties # end column_properties @property def is_nullable(self): # read-only is_nullable """The nullability of the column.""" return self._is_nullable # end is_nullable def __eq__( self, other ): if isinstance(other, self.__class__): if ( self._name != other.name ): return False if ( self._column_type != other.column_type ): return False if ( self._is_nullable != other.is_nullable ): return False if ( self._column_properties == other.column_properties ): return True # The column properties are tricky; need to disregard # 'data' and 'text_search' disregarded_props = [ GPUdbColumnProperty.TEXT_SEARCH, GPUdbColumnProperty.DATA ] LHS_column_properties = [ prop for prop in self._column_properties \ if prop not in disregarded_props ] RHS_column_properties = [ prop for prop in other.column_properties \ if prop not in disregarded_props ] if (LHS_column_properties == RHS_column_properties): return True return False # Column properties did not match else: return False # end __eq__ def __ne__(self, other): return not self.__eq__(other)
# end __ne__ # end class GPUdbRecordColumn # --------------------------------------------------------------------------- # GPUdbRecordType - Class to Handle GPUdb Record Data Types # ---------------------------------------------------------------------------
[docs]class GPUdbRecordType(object): """Represent the data type for a given record in GPUdb. Has convenience functions for creating the type in GPUdb (among others). """ def __init__( self, columns = None, label = "", schema_string = None, column_properties = None ): """Create a GPUdbRecordType object which represents the data type for a given record for GPUdb. Parameters: columns (list) A list of :class:`.GPUdbRecordColumn` objects. Either this argument or the schema_string argument must be given. label (str) Optional string label for the column. schema_string (str) The JSON string containing the schema for the type. Either this argument or the columns argument must be given. column_properties (dict) Optional dict that lists the properties for the columns of the type. Meant to be used in conjunction with schema_string only; will be ignored if columns is given. """ # Validate and save the label if not isinstance( label, basestring ): raise GPUdbException( "Column label must be a string; given " + str(type( label )) ) self._label = label # The server always uses this hardcoded name and trumps any label self.name = "type_name" # Either columns or schema_string must be given, but not both! if ((columns == None) and (schema_string == None)): raise GPUdbException( "Either columns or schema_string must be given, but none is!" ) elif ((columns != None) and (schema_string != None)): raise GPUdbException( "Either columns or schema_string must be given, but not both!" ) # Construct the object from the given columns try: if (columns != None): self.__initiate_from_columns( columns ) else: self.__initiate_from_schema_string( schema_string, column_properties ) except Exception as ex: raise GPUdbException( ex ) # The type hasn't been registered with GPUdb yet self._type_id = None # end __init__ def __initiate_from_columns( self, columns ): """Private method that constructs the object using the given columns. Parameters: columns (list) A list of GPUdbRecordColumn objects or a list with the following format: [name, type, ...] where ... is optional properties. For example, ['x', 'int', 'int8'] """ # Validate the columns if not columns: # Must NOT be empty raise GPUdbException( "Non-empty list of columns must be given. Given none." ) if not isinstance( columns, list ): # Must be a list raise GPUdbException( "Non-empty list of columns must be given. Given " + str(type( columns )) ) # Check if the list contains only GPUdbRecordColumns, then nothing to do if all( isinstance( x, GPUdbRecordColumn ) for x in columns ): self._columns = columns else: # unroll the information contained within # If the caller provided one list of arguments, wrap it into a list of lists so we can # properly iterate over columns = columns if all( isinstance( elm, list ) for elm in columns ) else [ columns ] # Unroll the information about the column(s) and create GPUdbRecordColumn objects self._columns = [] for col_info in columns: # Arguments 3 and beyond--these are properties--must be combined into one list argument if len( col_info ) > 2: self._columns.append( GPUdbRecordColumn( col_info[0], col_info[1], col_info[2:] ) ) elif len( col_info ) < 2: # Need at least two elements: the name and the type raise GPUdbException( "Need a list with the column name, type, and optional properties; " "given '%s'" % col_info ) else: self._columns.append( GPUdbRecordColumn( *col_info ) ) # end if-else # Column property container self._column_properties = {} # Avro schema string field container fields = [] # Validate each column and deduce its properties for col in self._columns: # Check that each element is a GPUdbRecordColumn object if not isinstance( col, GPUdbRecordColumn ): raise GPUdbException( "columns must contain only GPUdbRecordColumn objects. Given " + str(type( col )) ) # Extract the column's properties, if any if col.column_properties: self._column_properties[ col.name ] = sorted( col.column_properties, key = lambda x : str(x[0]) ) # done handling column props # Create the field for the schema string field_type = '"{_type}"'.format( _type = col.column_type ) # Handle nullable fields if col.is_nullable: field_type = ('[{_type}, "null"]'.format( _type = field_type )) field = ('{{"name": "{_name}", "type": {_type} }}'.format( _name = col.name, _type = field_type )) fields.append( field ) # end for loop # Put the fields together fields = ", ".join( fields ) # Generate the avro schema string schema_string = """{{ "type" : "record", "name" : "{_label}", "fields" : [ {_fields} ]}} """.format( _label = self.name, _fields = fields ) schema_string = schema_string.replace( "\t", "" ).replace( "\n", "" ) # Generate the avro schema and save it self._record_schema = schema.parse( schema_string ) # Save this version of the schema string so that it is standard self._schema_string = json.dumps( self._record_schema.to_json() ) # Create and save a RecordType object self._record_type = RecordType.from_type_schema( "", self._schema_string, self._column_properties ) return # end __initiate_from_columns def __initiate_from_schema_string( self, schema_string, column_properties = None ): """Private method that constructs the object using the given schema string. Parameters: schema_string (str) The schema string for the record type. column_properties (dict) An optional dict containing property information for some or all of the columns. """ # Validate the schema string if not schema_string: # Must NOT be empty! raise GPUdbException( "A schema string must be given. Given none." ) # Try to parse the schema string, this would also help us validate it self._record_schema = schema.parse( schema_string ) # Rename the schema with a generic name just like the database self._record_schema._props[ "name" ] = self.name # If no exception was thrown above, then save the schema string self._schema_string = json.dumps( self._record_schema.to_json() ) # Save the column properties, if any self._column_properties = column_properties if column_properties else {} # Now, deduce the columns from the schema string schema_json = self._record_schema.to_json() columns = [] for field in schema_json["fields"]: # Get the field's type field_type = field["type"] # Is the type nullable? is_nullable = False if ( isinstance( field_type, list ) and ("null" in field_type) ): is_nullable = True # Then, also get the scalar type of the field field_type = field_type[ 0 ] # end if field_name = field["name"] # Get any properties for the column col_props = None if (self._column_properties and (field_name in self._column_properties)): col_props = column_properties[ field_name ] # end if # Create the column object and to the list column = GPUdbRecordColumn( field["name"], field_type, col_props, is_nullable = is_nullable ) columns.append( column ) # end for # Save the columns self._columns = columns # Create and save a RecordType object self._record_type = RecordType.from_type_schema( "", self._schema_string, self._column_properties ) return # end __initiate_from_schema_string @property def columns(self): # read-only columns """A list of columns for the record type.""" return self._columns # end columns @property def label(self): # read-only label """A label for the record type.""" return self._label # end label @property def schema_string(self): # read-only schema string """The schema string for the record type.""" return self._schema_string # end schema_string @property def record_schema(self): # read-only avro schema """The avro schema for the record type.""" return self._record_schema # end record_schema @property def record_type(self): # read-only RecordType object """The RecordType object for the record type.""" return self._record_type # end record_schema @property def column_properties(self): # read-only column properties """The properties for the type's columns.""" return self._column_properties # end column_properties @property def type_id(self): # read-only ID for the type """The ID for the type, if it has already been registered with GPUdb.""" if not self._type_id: raise GPUdbException( "The record type has not been registered with GPUdb yet." ) return self._type_id # end type_id
[docs] def create_type( self, gpudb, options = None ): """Create the record type in GPUdb so that users can create tables using this type. Parameters: gpudb (GPUdb) A GPUdb object to connect to a GPUdb server. option (dict) Optional dictionary containing options for the /create/type call. Returns: The type ID. """ # Validate the GPUdb handle if not isinstance( gpudb, GPUdb ): raise GPUdbException( "'gpudb' must be a GPUdb object; given " + str(type( gpudb )) ) if not options: options = {} response = gpudb.create_type( self._schema_string, self._label, self._column_properties, options ) if not _Util.is_ok( response ): # problem creating the type raise GPUdbException( _Util.get_error_msg( response ) ) self._type_id = response[ "type_id" ] return self._type_id
# end create_type def __eq__( self, other ): if isinstance(other, self.__class__): # Compare the schema strings of the two types if (self._schema_string != other.schema_string): return False # Now compare the properties (need to disregard 'data' and 'text_search') disregarded_props = [ GPUdbColumnProperty.TEXT_SEARCH, GPUdbColumnProperty.DATA ] # Get the sanitized column properties lhs_col_props = {} for name, props in self._column_properties.items(): sanitized_props = [ prop for prop in props if (prop not in disregarded_props) ] if sanitized_props: lhs_col_props[ name ] = sanitized_props # end loop # Get the sanitized column properties rhs_col_props = {} for name, props in other.column_properties.items(): sanitized_props = [ prop for prop in props if (prop not in disregarded_props) ] if sanitized_props: rhs_col_props[ name ] = sanitized_props # end loop if (lhs_col_props == rhs_col_props): return True # distilled props matched return False # properties did not match else: return False # end __eq__ def __ne__(self, other): return not self.__eq__(other)
# end __ne__ # end class GPUdbRecordType # --------------------------------------------------------------------------- # GPUdbRecord - Class to Handle GPUdb Record Data # ---------------------------------------------------------------------------
[docs]class GPUdbRecord( object ): """Represent the data for a given record in GPUdb. Has convenience functions for encoding/decoding the data. """ @staticmethod
[docs] def decode_binary_data( record_type, binary_data ): """Decode binary encoded data (generally returned by GPUdb) using the schema for the data. Return the decoded data. Parameters: record_type (str or RecordType) If string, then the schema string for the record type, or a :class:`RecordType` object representing the type. binary_data (obj or list) The binary encoded data. Could be a single object or a list of data. Returns: The decoded data (a single object or a list) """ # Convert a single data object to a list if not isinstance( binary_data, list ): binary_data = [ binary_data ] # end if decoded_data = [] # Using the in-house c-extension for avro encoding and decoding if isinstance( record_type, RecordType ): # Decode the list of data for binary_datum in binary_data: decoded_data.append( record_type.decode_records( binary_datum )[0] ) # end for else: # use the python avro package to decode the data # Create an avro schema from the schema string record_type = schema.parse( record_type ) # Get an avro data reader data_reader = io.DatumReader( record_type ) # Decode the list of data for binary_datum in binary_data: decoded_data.append( _Util.decode_binary_data( record_type, binary_datum ) ) # end for # end if return decoded_data
# end decode_binary_data @staticmethod
[docs] def decode_dynamic_binary_data( record_type, binary_data ): """Decode binary encoded data (generally returned by GPUdb) using the schema for the data. Return the decoded data. Parameters: record_type (str or RecordType) If string, then the schema string for the record type, or a :class:`RecordType` object representing the type. binary_data (obj or list) The binary encoded data. Could be a single object or a list of data. Returns: The decoded data (a single object or a list) """ # Convert a single data object to a list if not isinstance( binary_data, list ): binary_data = [ binary_data ] # end if decoded_data = [] # Using the in-house c-extension for avro encoding and decoding if isinstance( record_type, RecordType ): # Decode the list of data for binary_datum in binary_data: decoded_data.append( record_type.decode_records( binary_datum )[0] ) # end for else: # use the python avro package to decode the data # Create an avro schema from the schema string record_type = schema.parse( record_type ) # Get an avro data reader data_reader = io.DatumReader( record_type ) # Decode the list of data for binary_datum in binary_data: decoded_data.append( _Util.decode_binary_data( record_schema, binary_datum ) ) # end for # end if return decoded_data
# end decode_dynamic_binary_data @staticmethod
[docs] def decode_json_string_data( json_string_data ): """Decode binary encoded data in string form (generally returned by GPUdb). Return the decoded data. Parameters: json_string_data (str) The stringified json encoded data. Could be a single object or a list of data. Returns: The decoded data (a single object or a list) """ # Decode the single data object if not isinstance( json_string_data, list ): json_string_data = json_string_data.replace( "\\U", "\\u") json_string_data = _Util.ensure_str( json_string_data ) decoded_datum = json.loads( json_string_data ) return decoded_datum # end if # Decode the list of data data decoded_data = [] for json_datum in json_string_data: json_datum = json_datum.replace( "\\U", "\\u") json_datum = _Util.ensure_str( json_datum ) decoded_datum = json.loads( json_datum, object_pairs_hook = collections.OrderedDict ) decoded_data.append( decoded_datum ) # end for return decoded_data
# end decode_json_string_data @staticmethod
[docs] def decode_dynamic_json_data_column_major( dynamic_json_data, dynamic_schema ): """Decode JSON encoded data (generally returned by GPUdb) using the embedded dynamic schema for the data. Return the decoded data. Parameters: dynamic_json_data (dict) The JSON encoded data with a dynamic schema. dynamic_schema (str) The schema string for the data Returns: The decoded data (a single object or a list) """ # Convert the dynamic schema to an Avro schema dynamic_schema = schema.parse( dynamic_schema ) decoded_data = collections.OrderedDict() column_names = dynamic_json_data['column_headers'] for i, column_name in enumerate( column_names ): column_index_name = "column_{}".format( i+1 ) # Double/float conversion here #get the datatype of the underlying data column_type = dynamic_schema.fields_dict[ column_index_name ].type.items.type if ( (column_type == 'double') or (column_type == 'float') ): decoded_data[ column_name ] = [float(x) for x in dynamic_json_data[ column_index_name ] ] else: decoded_data[ column_name ] = dynamic_json_data[column_index_name] return decoded_data
# end decode_dynamic_json_data_column_major @staticmethod
[docs] def decode_dynamic_json_data_row_major( dynamic_json_data, dynamic_schema ): """Decode JSON encoded data (generally returned by GPUdb) using the embedded dynamic schema for the data. Return the decoded data. Parameters: dynamic_json_data (dict) The JSON encoded data with a dynamic schema. dynamic_schema (str) The schema string for the data Returns: The decoded data in row-format (a single object or a list). """ # Convert the dynamic schema to an Avro schema dynamic_schema = schema.parse( dynamic_schema ) decoded_records = [] # Get the actual column names column_names = dynamic_json_data['column_headers'] # Get the index-based column names idx_column_names = [ name for name in dynamic_json_data.keys() if name not in ['column_headers', 'column_datatypes'] ] # Get the column types column_types = [ dynamic_schema.fields_dict[ n ].type.items.type for n in idx_column_names ] # How many records in total do we have? num_records = len( dynamic_json_data["column_1"] ) # Create all the records for i in list( range(0, num_records) ): record = collections.OrderedDict() # Create a single record for (col_name, col_idx_name, col_type) in zip(column_names, idx_column_names, column_types): # Get the column value col_val = dynamic_json_data[ col_idx_name ][ i ] # Convert double/float if ( (col_type == 'double') or (col_type == 'float') ): col_val = float( col_val ) record[ col_name ] = col_val # end inner loop # Add this record to the list decoded_records.append( record ) # end loop return decoded_records
# end decode_dynamic_json_data_row_major @staticmethod
[docs] def convert_data_col_major_to_row_major( col_major_data, col_major_schema_str ): """Given some column major data, convert it to row major data. Parameters: col_major_data (OrderedDict) An OrderedDict of arrays containing the data by column names. col_major_schema_str (str) A JSON schema string describing the column major data. Returns: A list of GPUdbRecord objects. """ if not isinstance( col_major_data, collections.OrderedDict ): raise GPUdbException( "Argument 'col_major_data' must be an OrderedDict;" " given %s" % str( type( col_major_data ) ) ) try: schema_json = json.loads( col_major_schema_str ) except Exception as e: raise GPUdbException( "Could not parse 'col_major_schema_str': " "%s" % str(e) ) # Create the schema for each record from the column-major format's schema columns = [] for col_name, field in zip(col_major_data.keys(), schema_json[ C._fields ]): field_type = field[ "type" ][ "items" ] if isinstance( field_type, (str, unicode) ): columns.append( [ col_name, field_type ] ) elif (isinstance( field_type, list ) and ("null" in field_type )): # The column is nullable columns.append( [ col_name, field_type[0], GPUdbColumnProperty.NULLABLE ] ) else: raise GPUdbException( "Unknown column type: {0}".format( field_type ) ) # end loop # Create a record type record_type = GPUdbRecordType( columns ) # Create the records records = [] for record in zip( *col_major_data.values() ): records.append( GPUdbRecord( record_type, list( record ) ) ) # end loop return records
# end convert_data_col_major_to_row_major @staticmethod
[docs] def transpose_data_to_col_major( row_major_data ): """Given some row major data, convert it to column major data. Parameters: row_major_data (list of :class:`Record` or collections.OrderedDicts) A list of :class:`Record` or collections.OrderedDicts objects containing the data. Returns: A dict of lists where the keys are column names and the values are lists (containing the values for the pertinent column of all the records) """ if not row_major_data: # Handle empty/none etc. return row_major_data # Turn a single record into a list, if applicable row_major_data = [ row_major_data ] if not isinstance( row_major_data, list ) else row_major_data # Get the record type if isinstance( row_major_data[ 0 ], Record ): column_names = row_major_data[ 0 ].type.keys() column_values = map( list, zip( *row_major_data ) ) # Need to use the dict constructor to be python 2.6 compatible transposed_data = collections.OrderedDict( zip( column_names, column_values ) ) else: column_names = row_major_data[ 0 ].keys() column_values = zip([ record.values() for record in row_major_data ]) # Trasnpose the data transposed_data = collections.OrderedDict() for col_name in column_names: column_values = [ record[ col_name ] for record in row_major_data ] transposed_data[ col_name ] = column_values # end loop # end if return transposed_data
# end transpose_data_to_col_major def __init__( self, record_type, column_values ): """Create a GPUdbRecord object which holds the data for a given record. Parameters: record_type (GPUdbRecordType) A :class:`.GPUdbRecordType` object that describes the columns of this record. column_values (dict or list) Either a dict or a list that contains the values for the columns. In either case, must contain values for ALL columns. If a list, then the columns must be in the correct order. """ # Validate and save the record type if not isinstance( record_type, GPUdbRecordType ): raise GPUdbException( "'record_type' must be a GPUdbRecordType; given " + str(type( record_type )) ) self._record_type = record_type # Validate the column values if not _Util.is_list_or_dict( column_values ): # Must be a list or a dict raise GPUdbException( "Columns must be one of the following: list, dict, OrderedDict. " "Given " + str(type( column_values )) ) if not column_values: # Must NOT be empty raise GPUdbException( "Column values must be given. Given none." ) # The column values must be saved in the order they're declared in the type self._column_values = collections.OrderedDict() # Get the expected number of columns based on the data type provided num_columns = len( self._record_type.columns ) # Check that there are correct number of values if (len( column_values ) != num_columns ): raise GPUdbException( "Given list of column values does not have the correct (%d) " "number of values; it has %d" % (num_columns, len( column_values )) ) # Check and save the column values # -------------------------------- # Case 1: The values are given in a list if isinstance( column_values, list ): # Check that the order of the columns is ok # (we can only check string vs. numeric types, really; # we can also check for nulls) for i in list( range(0, num_columns) ): column_name = self._record_type.columns[ i ].name # The given value for this column column_val = column_values[ i ] # Check that the value is of the given type, save the value if it is if self.__is_valid_column_value( column_val, self._record_type.columns[ i ] ): self._column_values[ column_name ] = column_val # end for loop else: # the values are given either in a dict or an OrderedDict # Check that the column names given match those of the record's type given_column_names = set( column_values.keys() ) record_type_column_names = set( [c.name for c in self._record_type.columns] ) if ( given_column_names != record_type_column_names ): if (given_column_names - record_type_column_names): raise GPUdbException( "Given column names do not match that of the record type. " "Extra column names are: " + str( (given_column_names - record_type_column_names) )) else: raise GPUdbException( "Given column names do not match that of the record type. " "Missing column names are: " + str( (record_type_column_names - given_column_names) )) # end if # We will disregard the order in which the column values were listed # in column_values (this should help the user somewhat) for i in list( range(0, num_columns) ): column_name = self._record_type.columns[ i ].name column_val = column_values[ column_name ] # Check that the value is of the given type, save the value if it is if self.__is_valid_column_value( column_val, self._record_type.columns[ i ] ): self._column_values[ column_name ] = column_val # end checking and save column values # Encode the record into binary and save it # ----------------------------------------- self._binary_encoded_data = _Util.encode_binary_data( self._record_type.record_schema, self._column_values ) # end __init__ @property def record_type(self): # read-only record type """The type for this record.""" return self._record_type # end record_type @property def column_values(self): # read-only column_values """The values for this record.""" return self._column_values # end column_values @property def data(self): # read-only column_values, just a convenient name """The values for this record.""" return self._column_values # end data @property def binary_data(self): # read-only binary_data """The binary encoded values for this record.""" return self._binary_encoded_data # end binary_data @property def json_data_string(self): # JSON encoded column_values in a string """The stringified JSON encoded values for this record.""" return json.dumps( _Util.convert_dict_bytes_to_str(self._column_values) ) # end json_data_string
[docs] def keys( self ): """Return a list of the column names of the record. """ return self.data.keys()
# end values
[docs] def values( self ): """Return a list of the values of the record. """ return self.data.values()
# end values
[docs] def insert_record( self, gpudb, table_name, encoding = "binary", options = None ): """Insert this record into GPUdb. Parameters: gpudb (GPUdb) A :class:`.GPUdb` client handle. table_name (str) The name of the table into which we need to insert the record. encoding (str) Optional encoding with which to perform the insertion. Default is binary encoding. options (dict) Optional parameter. If given, use the options for the insertion function. Returns: The response from GPUdb. """ # Validate the GPUdb handle if not isinstance( gpudb, GPUdb ): raise GPUdbException( "'gpudb' must be a GPUdb object; given " + str( type( gpudb ) ) ) if not options: options = {} # Based on the encoding, format the data appropriately if (encoding == "binary"): data = [ self._binary_encoded_data ] elif (encoding == "json"): data = [ json.dumps( _Util.convert_dict_bytes_to_str( self._column_values ) ) ] else: raise GPUdbException( "Unknown encoding: " + str( encoding ) ) # Insert the record response = gpudb.insert_records( table_name = table_name, data = data, list_encoding = encoding, options = options ) return response
# end insert_record def __is_valid_column_value( self, column_value, column, do_throw = True ): """Private function that validates the given value for a column. Parameters: column_value The value for the given column column (GPUdbRecordColumn) A :class:`.GPUdbRecordColumn` object that has information about the column. This is used to validate the column value. do_throw (bool) Throw an exception for invalid columns Returns: True if the value can be validated, False otherwise. """ if not isinstance( column, GPUdbRecordColumn ): raise GPUdbException( "'column' must be a GPUdbRecordColumn object; given " + str(type( column )) ) # Check that the value is of the given type # ----------------------------------------- column_type = column.column_type if (column_value == None): # Handle null values if not column.is_nullable: # but the column is not nullable if do_throw: raise GPUdbException( "Non-nullable column '%s' given a null value" % column.name ) else: return False # Numeric types: elif (column_type in GPUdbRecordColumn._numeric_data_types): if not (isinstance( column_value, (int, long, float)) and not isinstance( column_value, bool ) ): if do_throw: raise GPUdbException( ("Column '%s' must be a numeric type (one of int, long, float); " "given " % column.name ) + str(type( column_value )) ) else: return False else: # string/bytes type if not isinstance( column_value, (str, Decimal, unicode, bytes) ): if do_throw: raise GPUdbException( ("Column '%s' must be string or bytes; given " % column.name) + str(type( column_value )) ) else: return False # end if-else checking type-correctness # The value checks out; it is valid return True # end __is_valid_column_value def __eq__( self, other ): if isinstance(other, self.__class__): return self.__dict__ == other.__dict__ else: return False # end __eq__ def __ne__(self, other): return not self.__eq__(other)
# end __ne__ # end class GPUdbRecord # --------------------------------------------------------------------------- # GPUdb - Lightweight client class to interact with a GPUdb server. # ---------------------------------------------------------------------------
[docs]class GPUdb(object): def __init__( self, host = "127.0.0.1", port = "9191", host_manager_port = "9300", encoding = "BINARY", connection = 'HTTP', username = "", password = "", timeout = None, no_init_db_contact = False, skip_ssl_cert_verification = False, **kwargs ): """ Construct a new GPUdb client instance. Parameters: host (str) The IP address of the GPUdb server. May be provided as a list to support HA. Also, can include the port following a colon (the *port* argument then should be unused). Host may take the form "https://user:password@domain.com:port/path/". port (str) The port of the GPUdb server at the given IP address. May be provided as a list in conjunction with host; but if using the same port for all hosts, then a single port value is OK. Also, can be omitted entirely if the host already contains the port. If the *host* does include a port, then this argument will be ignored. host_manager_port (str) The port of the host manager for the GPUdb server at the given IP address. May be provided as a list in conjunction with host; but if using the same port for all hosts, then a single port value is OK. encoding (str) Type of Avro encoding to use, "BINARY", "JSON" or "SNAPPY". connection (str) Connection type, currently only "HTTP" or "HTTPS" supported. May be provided as a list in conjunction with host; but if using the same port for all hosts, then a single port value is OK. username (str) An optional http username. password (str) The http password for the username. timeout (int) HTTP request timeout in seconds. Defaults to global socket timeout. no_init_db_contact (bool) If True, the constructor won't communicate with the database server (e.g. for checking version compatibility). Default is False. skip_ssl_cert_verification (bool) Applies to https connections only; will be ignored for http connections. If True, for https connections, will skip the verification of the SSL certificate sent by the server. Be careful about using this flag; please ensure that you fully understand the repurcussions of skipping this verification step. Default is False. """ # Call the internal function to initialize the object self.__construct( host = host, port = port, host_manager_port = host_manager_port, encoding = encoding, connection = connection, username = username, password = password, timeout = timeout, no_init_db_contact = no_init_db_contact, skip_ssl_cert_verification = skip_ssl_cert_verification, **kwargs ) # end __init__ def __construct( self, host = "127.0.0.1", port = "9191", host_manager_port = "9300", encoding = "BINARY", connection = 'HTTP', username = "", password = "", timeout = None, no_init_db_contact = False, skip_ssl_cert_verification = False, **kwargs ): """ Construct a new GPUdb client instance. Parameters: host (str) The IP address of the GPUdb server. May be provided as a list to support HA. Also, can include the port following a colon (the *port* argument then should be unused). Host may take the form "https://user:password@domain.com:port/path/". port (str) The port of the GPUdb server at the given IP address. May be provided as a list in conjunction with host; but if using the same port for all hosts, then a single port value is OK. Also, can be omitted entirely if the host already contains the port. If the *host* does include a port, then this argument will be ignored. host_manager_port (str) The port of the host manager for the GPUdb server at the given IP address. May be provided as a list in conjunction with host; but if using the same port for all hosts, then a single port value is OK. encoding (str) Type of Avro encoding to use, "BINARY", "JSON" or "SNAPPY". connection (str) Connection type, currently only "HTTP" or "HTTPS" supported. May be provided as a list in conjunction with host; but if using the same port for all hosts, then a single port value is OK. username (str) An optional http username. password (str) The http password for the username. timeout (int) HTTP request timeout in seconds. Defaults to global socket timeout. no_init_db_contact (bool) If True, the constructor won't communicate with the database server (e.g. for checking version compatibility). Default is False. skip_ssl_cert_verification (bool) Applies to https connections only; will be ignored for http connections. If True, for https connections, will skip the verification of the SSL certificate sent by the server. Be careful about using this flag; please ensure that you fully understand the repurcussions of skipping this verification step. Default is False. """ if type(host) is list: if not type(port) is list: port = [port]*len(host) if not type(host_manager_port) is list: host_manager_port = [host_manager_port]*len(host) if not type(connection) is list: connection = [connection]*len(host) assert len(host) == len(port) == len(host_manager_port) == len(connection), \ "Host, port, host_manager_port and connection list must have the same number of items" else: assert (not (type(port) is list) and not (type(host_manager_port) is list) and not (type(connection) is list)), \ "Host is not a list, port and connection must not be either" host = [host] port = [port] host_manager_port = [host_manager_port] connection = [connection] assert (encoding in ["BINARY", "JSON", "SNAPPY"]), "Expected encoding to be either 'BINARY', 'JSON' or 'SNAPPY' got: '"+str(encoding)+"'" if (encoding == 'SNAPPY' and not have_snappy): print('SNAPPY encoding specified but python-snappy is not installed; reverting to BINARY') encoding = 'BINARY' # Verify that the parameter value is a bool if not isinstance(skip_ssl_cert_verification, bool): raise GPUdbException( "Parameter 'skip_ssl_cert_verification' must be a boolean, given '{}'" "".format( str( type( skip_ssl_cert_verification ) ) ) ) self._conn_tokens = tuple(_ConnectionToken(h, p, hmp, c) \ for h, p, hmp, c in zip(host, port, host_manager_port, connection)) self.current_host_index = random.randint(0, len(self._conn_tokens)) self.encoding = encoding self.username = username self.password = password self.timeout = timeout self.skip_ssl_cert_verification = skip_ssl_cert_verification # Set up the credentials to be used per POST self.auth = None if len(self.username) != 0: if IS_PYTHON_3: # base64 encode the username and password self.auth = ('%s:%s' % (self.username, self.password) ) self.auth = _Util.str_to_bytes( self.auth ) self.auth = base64.encodestring( self.auth ).decode( "ascii" ).replace('\n', '') self.auth = ("Basic %s" % self.auth) else: # Python 2.x self.auth = base64.encodestring('%s:%s' % (self.username, self.password)).replace('\n', '') self.auth = ("Basic %s" % self.auth) # end if self.client_to_object_encoding_map = { \ "BINARY": "binary", "SNAPPY": "binary", "JSON": "json", } # Load all gpudb schemas self.__load_logger_schemas() self.load_gpudb_schemas() # Load the mapping of function names to endpoints self.load_gpudb_func_to_endpoint_map() # Initiate the type store self._known_types = {} # Make sure that a connection to the server can be established self.no_init_db_contact = no_init_db_contact if not self.no_init_db_contact: server_status_response = self.show_system_status() if not _Util.is_ok( server_status_response ): raise GPUdbException( _Util.get_error_msg( server_status_response ) ) # Check version compatibility with the server # ------------------------------------------- if not no_init_db_contact: self._perform_version_check() # end __construct def __eq__( self, other ): """Override the equality operator. Note that we ignore the timeout setting. The only things checked are the DB server URL, connection protocol (http vs. https), encoding (binary, json, or snappy), the username and the password. """ # Check the type of the other object if not isinstance( other, GPUdb ): return False # Check the host, port, and other connection protocol if (self._conn_tokens != other._conn_tokens): return False # Check for encoding equivalency if (self.encoding != other.encoding): return False # Check for user name equivalency if (self.username != other.username): return False # Check for password equivalency if (self.password != other.password): return False # Note: We're ignoring the timeout setting return True # end __eq__ def __getstate__( self ): """Defines how to pickle the GPUdb object. """ pickle_this = { "host": self.host, "port": self.port, "encoding": self.encoding, "connection": self.connection, "username": self.username, "password": self.password, "timeout": self.timeout, "no_init_db_contact": self.no_init_db_contact } return pickle_this # end __getstate__ def __setstate__( self, state ): """Re-creates a GPUdb object from the pickled state. For a description of the pickled state, see :meth:`.__getstate__`. """ # Call the internal function to initialize the object self.__construct( host = state["host"], port = state["port"], encoding = state["encoding"], connection = state["connection"], username = state["username"], password = state["password"], timeout = state["timeout"], no_init_db_contact = state["no_init_db_contact"] ) # end __setstate__ def _perform_version_check( self, do_print_warning = True ): """Perform a version check with the database server. Parameters: do_print_warning (bool) If True, print a warning on version mismatch. @returns True if versions match, False otherwise. """ system_props = self.show_system_properties() server_version = system_props[ C._property_map ][ C._gaia_version ] # Extract the version for both server and client: major.minor.revision (ignore ABI) server_version = ".".join( server_version.split( "." )[ 0 : 3 ] ) client_version = ".".join( self.api_version.split( "." )[ 0 : 3 ] ) if (server_version != client_version): if (do_print_warning == True): print ( "Warning: Client version ({0}) does not match that of the server ({1})" "".format( client_version, server_version ) ) return True # all is well # end if return False # version mismatch! # end _perform_version_check def _get_current_conn_token( self ): """Returns the connection information for the current server.""" return self._conn_tokens[self._current_conn_token_index]
[docs] def get_version_info( self ): """Return the version information for this API.""" return self.api_version
# end get_version_info
[docs] def get_host( self ): """Return the host this client is talking to.""" return self._get_current_conn_token()._host
# end get_host
[docs] def get_port( self ): """Return the port the host is listening to.""" return self._get_current_conn_token()._port
# end get_port
[docs] def get_host_manager_port( self ): """Return the port the host manager is listening to.""" return self._get_current_conn_token()._host_manager_port
# end get_host_manager_port
[docs] def get_url( self ): """Return the url of the host this client is listening to.""" return "{host}:{port}".format( host = self.get_host(), port = self.get_port() )
# end get_host @property def host(self): return self.get_host() @host.setter def host(self, value): self._get_current_conn_token()._host = str( value ) @property def port(self): return self.get_port() @port.setter def port(self, value): self._get_current_conn_token()._port = value @property def host_manager_port(self): return self.get_host_manager_port() @host_manager_port.setter def host_manager_port(self, value): self._get_current_conn_token()._host_manager_port = value @property def gpudb_url_path(self): return self._get_current_conn_token()._gpudb_url_path @gpudb_url_path.setter def gpudb_url_path(self, value): self._get_current_conn_token()._gpudb_url_path = str( value ) @property def connection(self): return self._get_current_conn_token()._connection @connection.setter def connection(self, value): self._get_current_conn_token()._connection = value @property def encoding(self): return self.encoding def save_known_type(self, type_id, _type ): self._known_types[ type_id ] = _type @property def get_known_types(self): """Return all known types; if none, return None. """ if type_id not in self._known_types: return None return self._known_types[ type_id ] # end get_known_type
[docs] def get_known_type(self, type_id, lookup_type = True ): """Given an type ID, return any associated known type; if none is found, then optionally try to look it up and save it. Otherwise, return None. Parameters: type_id (str) The ID for the type. lookup_type (bool) If True, then if the type is not already found, then to look it up by invoking :meth:`.show_types`, save it for the future, and return it. Returns: The associated RecordType, if found (or looked up). None otherwise. """ if type_id in self._known_types: return self._known_types[ type_id ] if lookup_type: # Get the type info from the database type_info = self.show_types( type_id = type_id, label = "" ) if not _Util.is_ok( type_info ): raise GPUdbException( "Error in finding type {}: {}" "".format( type_id, _Util.get_error_msg( type_info ) ) ) # Create the RecordType record_type = RecordType.from_type_schema( label = "", type_schema = type_info["type_schemas"][ 0 ], properties = type_info["properties"][ 0 ] ) # Save the RecordType self._known_types[ type_id ] = record_type return record_type # end if return None # none found
# end get_known_type # members _current_conn_token_index = 0 _conn_tokens = () # Collection of parsed url entities timeout = None # HTTP request timeout (None=default socket timeout) encoding = "BINARY" # Input encoding, either 'BINARY' or 'JSON'. username = "" # Input username or empty string for none. password = "" # Input password or empty string for none. api_version = "6.2.0.14" # constants END_OF_SET = -9999 """(int) Used for indicating that all of the records (till the end of the set are desired)--generally used for /get/records/\* functions. """ def __load_logger_schemas( self ): # Some other schemas for internal work self.logger_request_schema_str = """ { "type" : "record", "name" : "logger_request", "fields" : [ {"name" : "ranks", "type" : {"type" : "array", "items" : "int"}}, {"name" : "log_levels", "type" : {"type" : "map", "values" : "string"}} ] } """.replace("\n", "").replace(" ", "") self.logger_response_schema_str = """ { "type" : "record", "name" : "logger_response", "fields" : [ {"name" : "status" , "type" : "string"}, {"name" : "log_levels", "type" : {"type" : "map", "values" : "string"}} ] } """.replace("\n", "").replace(" ", "") self.logger_request_schema = Schema( "record", [ ("ranks", "array", [("int")]), ("log_levels", "map", [("string")] ) ] ) self.logger_response_schema = Schema( "record", [ ("status" , "string"), ("log_levels", "map", [("string")] ) ] ) # end __load_logger_schemas # ----------------------------------------------------------------------- # Helper functions # ----------------------------------------------------------------------- def __create_header_and_process_body_data( self, body_data ): """Create an HTTP or HTTPS header, and compress the body data if needed. Parameters: body_data : The body of the data, already avro or json encoded Returns: A tuple where the first element is the header and the second element is the body data (either unprocessed or processed). """ if self.encoding == 'BINARY': headers = {"Content-type": "application/octet-stream", "Accept": "application/octet-stream"} elif self.encoding == 'JSON': headers = {"Content-type": "application/json", "Accept": "application/json"} elif self.encoding == 'SNAPPY': headers = {"Content-type": "application/x-snappy", "Accept": "application/x-snappy"} body_data = snappy.compress(body_data) # Set the authentication header, if needed if self.auth: headers["Authorization"] = self.auth return (headers, body_data) # end __create_header def __post_and_get( self, host, port, url_path, connection_type, headers, body_data, endpoint ): """ Create a HTTP connection and POST then get GET, returning the server response. Parameters: host (str) The host to send the request to port (str) The port to send the request to url_path (str) The URL for the request (exclusive of the endpoint) connection_type (str) 'HTTP' or 'HTTPS' headers (dict) The headers to use for the HTTP or HTTPS connection body_data (bytes) Data to POST to GPUdb server. endpoint (str) Server path to POST to, e.g. "/add". """ # NOTE: Creating a new httplib.HTTPConnection is suprisingly just as # fast as reusing a persistent one and has the advantage of # fully retrying from scratch if the connection fails. # Get the full URL path for the request url_path = (url_path + endpoint) # Try to establish a connection try: if (connection_type == 'HTTP'): conn = httplib.HTTPConnection( host = host, port = port, timeout = self.timeout) elif (connection_type == 'HTTPS'): if self.skip_ssl_cert_verification: conn = httplib.HTTPSConnection( host = host, port = port, timeout = self.timeout, context = ssl._create_unverified_context() ) else: conn = httplib.HTTPSConnection( host = host, port = port, timeout = self.timeout) except Exception as e: raise GPUdbConnectionException("Error connecting to '{}' on port {} due to: {}" "".format(host, port, str(e)) ) # Try to post the message try: conn.request("POST", url_path, body_data, headers) except Exception as e: raise GPUdbConnectionException( "Error posting to '{}:{}{}' due to: {}" "".format(host, port, url_path, str(e)) ) # Get the response try: resp = conn.getresponse() except: # some error occurred; return a message raise GPUdbConnectionException( "Timeout Error: No response received from %s:%s" "" % (host, port) ) # Read the response try: resp_data = resp.read() resp_time = resp.getheader('x-request-time-secs',None) return resp_data, resp_time except: # some error occurred; return a message raise GPUdbException( "Error reading response from {}:{} for {}" "".format( host, port, endpoint ) ) # end __post_and_get def __post_to_gpudb_read(self, body_data, endpoint): """ Create a HTTP connection and POST then get GET, returning the server response. Parameters: body_data : Data to POST to GPUdb server. endpoint : Server path to POST to, e.g. "/add". """ # Get the header and process the body data ( headers, body_data ) = self.__create_header_and_process_body_data( body_data ) # NOTE: Creating a new httplib.HTTPConnection is suprisingly just as # fast as reusing a persistent one and has the advantage of # fully retrying from scratch if the connection fails. initial_index = self._current_conn_token_index cond = True error = None while cond: loop_error = None conn_token = self._get_current_conn_token() # Try to post and get the message using the current connection # token's information try: ( resp_data, resp_time ) = self.__post_and_get( conn_token._host, conn_token._port, conn_token._gpudb_url_path, conn_token._connection, headers, body_data, endpoint ) except (GPUdbException, GPUdbConnectionException) as ex: loop_error = ex self._current_conn_token_index = \ (self._current_conn_token_index+1) % len(self._conn_tokens) error = loop_error cond = error and (self._current_conn_token_index != initial_index) # end while loop if error: if isinstance( error, (basestring, unicode)): raise GPUdbException( error ) elif isinstance( error, GPUdbException ): raise error else: raise GPUdbException( error ) return resp_data, resp_time # end __post_to_gpudb_read def __post_to_hm_read(self, body_data, endpoint): """ Create a HTTP connection and POST to the host manager, then get GET returning the server response. Parameters: body_data : Data to POST to GPUdb server. endpoint : Server path to POST to, e.g. "/add". """ # Get the header and process the body data ( headers, body_data ) = self.__create_header_and_process_body_data( body_data ) # NOTE: Creating a new httplib.HTTPConnection is suprisingly just as # fast as reusing a persistent one and has the advantage of # fully retrying from scratch if the connection fails. initial_index = self._current_conn_token_index cond = True error = None while cond: loop_error = None conn_token = self._get_current_conn_token() try: ( resp_data, resp_time ) = self.__post_and_get( conn_token._host, conn_token._host_manager_port, conn_token._gpudb_url_path, conn_token._connection, headers, body_data, endpoint ) except (GPUdbException, GPUdbConnectionException) as ex: loop_error = ex self._current_conn_token_index = \ (self._current_conn_token_index+1) % len(self._conn_tokens) error = loop_error cond = error and (self._current_conn_token_index != initial_index) # end while loop # Last ditch effort: if error due to wrong port, inquire the head node # what the port is and use that if different if error: if isinstance( error, GPUdbConnectionException ): # Get the host manager port from the head node try: sys_properties = self.show_system_properties().property_map except (GPUdbException, GPUdbConnectionException) as ex: if IS_PYTHON_3: raise GPUdbException( ex ) else: raise GPUdbException( ex.message ) if "conf.hm_http_port" not in sys_properties: raise GPUdbException( 'Error: "conf.hm_http_port" not found ' 'system properties!' ) try : hm_port = int( sys_properties[ "conf.hm_http_port" ] ) except: raise GPUdbException ( "Expected a numeric port, got: '{}'" "".format( str(sys_properties[ "conf.hm_http_port" ]) ) ) # Check if this host manager port works try: conn_token = self._get_current_conn_token() ( resp_data, resp_time ) = self.__post_and_get( conn_token._host, hm_port, conn_token._gpudb_url_path, conn_token._connection, headers, body_data, endpoint ) # Upon success, update the connection token's host manager port conn_token._host_manager_port = hm_port # Reset the error except (GPUdbException, GPUdbConnectionException) as ex: raise else: if isinstance( error, (basestring, unicode)): raise GPUdbException( error ) elif isinstance( error, GPUdbException ): raise error else: raise GPUdbException( error ) # end inner if # end if error return resp_data, resp_time # end __post_to_hm_read def __client_to_object_encoding( self ): """Returns object encoding for queries based on the GPUdb client's encoding. """ return self.client_to_object_encoding_map[ self.encoding ] # end client_to_object_encoding def __read_orig_datum(self, SCHEMA, encoded_datum, encoding=None): """ Decode the binary or JSON encoded datum using the avro schema and return a dict. Parameters: SCHEMA : A parsed schema from avro.schema.parse(). encoded_datum : Binary or JSON encoded data. encoding : Type of avro encoding, either "BINARY" or "JSON", None uses the encoding this class was initialized with. """ if encoding == None: encoding = self.encoding if (encoding == 'BINARY') or (encoding == 'SNAPPY'): return _Util.decode_binary_data( SCHEMA, encoded_datum ) elif encoding == 'JSON': data_str = json.loads( _Util.ensure_str(encoded_datum).replace('\\U','\\u') ) return data_str # end __read_orig_datum def __read_datum(self, SCHEMA, encoded_datum, encoding=None, response_time=None): """ Decode a gpudb_response and decode the contained message too. Parameters: SCHEMA : The parsed schema from avro.schema.parse() that the gpudb_response contains. encoded_datum : A BINARY or JSON encoded gpudb_response message. Returns: An OrderedDict of the decoded gpudb_response message's data with the gpudb_response put into the "status_info" field. """ # Parse the gpudb_response message REP_SCHEMA = self.gpudb_schemas["gpudb_response"]["RSP_SCHEMA"] resp = self.__read_orig_datum(REP_SCHEMA, encoded_datum, encoding) #now parse the actual response if there is no error #NOTE: DATA_SCHEMA should be equivalent to SCHEMA but is NOT for get_set_sorted stype = resp['data_type'] if stype == 'none': out = collections.OrderedDict() else: if self.encoding == 'JSON': out = self.__read_orig_datum(SCHEMA, resp['data_str'], 'JSON') elif (self.encoding == 'BINARY') or (self.encoding == 'SNAPPY'): out = self.__read_orig_datum(SCHEMA, resp['data'], 'BINARY') del resp['data'] del resp['data_str'] out['status_info'] = resp if (response_time is not None): out['status_info']['response_time'] = float(response_time) return out # end __read_datum def __read_orig_datum_cext(self, SCHEMA, encoded_datum, encoding=None): """ Decode the binary or JSON encoded datum using the avro schema and return a dict. Parameters: SCHEMA : A parsed schema from avro.schema.parse(). encoded_datum : Binary or JSON encoded data. encoding : Type of avro encoding, either "BINARY" or "JSON", None uses the encoding this class was initialized with. """ if encoding == None: encoding = self.encoding if (encoding == 'BINARY') or (encoding == 'SNAPPY'): return SCHEMA.decode( encoded_datum ) elif encoding == 'JSON': data_str = json.loads( _Util.ensure_str(encoded_datum).replace('\\U','\\u') ) return data_str # end __read_orig_datum_cext def __read_datum_cext(self, SCHEMA, encoded_datum, encoding=None, response_time=None): """ Decode a gpudb_response and decode the contained message too. Parameters: SCHEMA : The parsed schema from .protocol.Schema() that the gpudb_response contains. encoded_datum : A BINARY or JSON encoded gpudb_response message. Returns: An OrderedDict of the decoded gpudb_response message's data with the gpudb_response put into the "status_info" field. """ # Parse the gpudb_response message RSP_SCHEMA = self.gpudb_schemas["gpudb_response"]["RSP_SCHEMA"] resp = self.__read_orig_datum_cext( RSP_SCHEMA, encoded_datum, encoding ) # Now parse the actual response if there is no error # NOTE: DATA_SCHEMA should be equivalent to SCHEMA but is NOT for get_set_sorted stype = resp['data_type'] if stype == 'none': out = collections.OrderedDict() else: if self.encoding == 'JSON': out = self.__read_orig_datum_cext(SCHEMA, resp['data_str'], 'JSON') elif (self.encoding == 'BINARY') or (self.encoding == 'SNAPPY'): out = SCHEMA.decode( encoded_datum, resp['data'] ) del resp['data'] del resp['data_str'] out['status_info'] = resp if (response_time is not None): out['status_info']['response_time'] = float(response_time) return out # end __read_datum_cext def __get_schemas(self, base_name, get_req_cext = False, get_rsp_cext = False ): """ Get a tuple of parsed and cached request and reply schemas. Parameters: base_name : Schema name, e.g. "base_name"+"_request.json" or "_response.json" get_req_cext (bool) If True, then try to return the c-extension version of the request schema. If none found, raise exception. Default is False. get_rsp_cext (bool) If True, then try to return the c-extension version of the response schema. If none found, raise exception. Default is False. """ if get_req_cext: if "REQ_SCHEMA_CEXT" not in self.gpudb_schemas[base_name]: raise GPUdbException( "No c-extension version of the request " "schema was found for {}".format( base_name ) ) REQ_SCHEMA = self.gpudb_schemas[base_name]["REQ_SCHEMA_CEXT"] else: REQ_SCHEMA = self.gpudb_schemas[base_name]["REQ_SCHEMA"] if get_rsp_cext: if "RSP_SCHEMA_CEXT" not in self.gpudb_schemas[base_name]: raise GPUdbException( "No c-extension version of the response " "schema was found for {}".format( base_name ) ) RSP_SCHEMA = self.gpudb_schemas[base_name]["RSP_SCHEMA_CEXT"] else: RSP_SCHEMA = self.gpudb_schemas[base_name]["RSP_SCHEMA"] return (REQ_SCHEMA, RSP_SCHEMA) # end __get_schemas def __get_endpoint(self, func_name): """ Get the endpoint for a given query. Parameters: base_name : Schema name, e.g. "func_name"+"_request.json" or "_response.json" """ return self.gpudb_func_to_endpoint_map[ func_name ] # end __get_endpoint def __post_then_get(self, REQ_SCHEMA, REP_SCHEMA, datum, endpoint): """ Encode the datum dict using the REQ_SCHEMA, POST to GPUdb server and decode the reply using the REP_SCHEMA. Parameters: REQ_SCHEMA : The parsed schema from avro.schema.parse() of the request. REP_SCHEMA : The parsed schema from avro.schema.parse() of the reply. datum : Request dict matching the REQ_SCHEMA. endpoint : Server path to POST to, e.g. "/add". """ encoded_datum = self.encode_datum(REQ_SCHEMA, datum) response, response_time = self.__post_to_gpudb_read(encoded_datum, endpoint) return self.__read_datum(REP_SCHEMA, response, None, response_time) # end __post_then_get def __post_then_get_cext(self, REQ_SCHEMA, REP_SCHEMA, datum, endpoint): """ Encode the datum dict using the REQ_SCHEMA, POST to GPUdb server and decode the reply using the REP_SCHEMA. Parameters: REQ_SCHEMA : The parsed schema from avro.schema.parse() of the request. REP_SCHEMA : The parsed schema from avro.schema.parse() of the reply. datum : Request dict matching the REQ_SCHEMA. endpoint : Server path to POST to, e.g. "/add". Returns: The decoded response. """ encoded_datum = self.encode_datum_cext(REQ_SCHEMA, datum) response, response_time = self.__post_to_gpudb_read(encoded_datum, endpoint) return self.__read_datum_cext(REP_SCHEMA, response, None, response_time) # end __post_then_get_cext def __post_to_hm_then_get_cext(self, REQ_SCHEMA, REP_SCHEMA, datum, endpoint): """ Encode the datum dict using the REQ_SCHEMA, POST to the host manager and decode the reply using the REP_SCHEMA. Parameters: REQ_SCHEMA : The parsed schema from avro.schema.parse() of the request. REP_SCHEMA : The parsed schema from avro.schema.parse() of the reply. datum : Request dict matching the REQ_SCHEMA. endpoint : Server path to POST to, e.g. "/add". Returns: The decoded response. """ encoded_datum = self.encode_datum_cext(REQ_SCHEMA, datum) response, response_time = self.__post_to_hm_read(encoded_datum, endpoint) return self.__read_datum_cext(REP_SCHEMA, response, None, response_time) # end __post_to_hm_then_get_cext def __post_then_get_cext_raw(self, REQ_SCHEMA, REP_SCHEMA, datum, endpoint): """ Encode the datum dict using the REQ_SCHEMA, POST to GPUdb server and decode the reply using the REP_SCHEMA. Parameters: REQ_SCHEMA : The parsed schema from avro.schema.parse() of the request. REP_SCHEMA : The parsed schema from avro.schema.parse() of the reply. datum : Request dict matching the REQ_SCHEMA. endpoint : Server path to POST to, e.g. "/add". Returns: A tuple where the first element is the decoded response, and the second element is the raw encoded response from the database. """ encoded_datum = self.encode_datum_cext(REQ_SCHEMA, datum) response, response_time = self.__post_to_gpudb_read(encoded_datum, endpoint) # Return the decoded response and the raw response return ( self.__read_datum_cext(REP_SCHEMA, response, None, response_time), response ) # end __post_then_get_cext def __post_then_get_async_cext(self, REQ_SCHEMA, REP_SCHEMA, datum, endpoint, retry_interval = 5): """ Encode the datum dict using the REQ_SCHEMA, POST to GPUdb server via the /create/job endpoint for an asynchronous call. Decode the /create/job response and return it. Parameters: REQ_SCHEMA : The Schema for the request. REP_SCHEMA : The Schema for the reply. datum : Request dict matching the REQ_SCHEMA. endpoint : Server path to POST to, e.g. "/alter/table". retry_interval : The interval period for re-trying /get/job to see if the job has completed. In seconds. Default is 5 seconds. Returns: The decoded endpoint response. """ # Encode the payload of the actual endpoint to be called encoded_datum = self.encode_datum_cext(REQ_SCHEMA, datum) # Create and encode the payload of the /create/job endpoint # which makes the asynchronous call create_job_endpoint = "/create/job" (create_job_req_schema, create_job_rsp_schema) = self.__get_schemas( create_job_endpoint ) obj = {} obj['endpoint'] = endpoint obj['request_encoding'] = 'json' if (self.encoding == 'JSON') else 'binary' if self.encoding == 'JSON': obj['data' ] = () # obj['data' ] = bytes() obj['data_str'] = encoded_datum else: obj['data_str'] = '' obj['data' ] = ( encoded_datum ) obj['options'] = {} # Make the asynchronouse /recate/job call response = self.__post_then_get_cext( create_job_req_schema, create_job_rsp_schema, obj, create_job_endpoint ) if not _Util.is_ok( response ): raise GPUdbException( "Error in creating asynchronous job for {}: {}" "".format( endpoint, _Util.get_error_msg( response ) ) ) job_id = response[ "job_id" ] # Now, every retry_interval, check if the job is done; if done, # then decode the response and return it while (True): # Try getting the job result back job_result = self.__get_async_cext( job_id, REP_SCHEMA ) if job_result: # We need to insert the status_info into the response # since the calling function may be expecting it job_result['status_info'] = response['status_info'] # Remove the data type since it won't correspond to the # actual endpoint invoked del job_result['status_info']['data_type'] # Return the job result return job_result # end inner if # Sleep a little before trying again time.sleep( retry_interval ) # end infinite loop # end __post_then_get_async_cext def __post_async_cext(self, REQ_SCHEMA, datum, endpoint ): """ Encode the datum dict using the REQ_SCHEMA, POST to GPUdb server via the /create/job endpoint for an asynchronous call. Decode the /create/job response and return it. Parameters: REQ_SCHEMA : The Schema for the request. datum : Request dict matching the REQ_SCHEMA. endpoint : Server path to POST to, e.g. "/alter/table". Returns: The decoded endpoint response. """ # Encode the payload of the actual endpoint to be called encoded_datum = self.encode_datum_cext(REQ_SCHEMA, datum) # Create and encode the payload of the /create/job endpoint # which makes the asynchronous call create_job_endpoint = "/create/job" (create_job_req_schema, create_job_rsp_schema) = self.__get_schemas( create_job_endpoint ) obj = {} obj['endpoint'] = endpoint obj['request_encoding'] = 'json' if (self.encoding == 'JSON') else 'binary' if self.encoding == 'JSON': obj['data' ] = bytes() obj['data_str'] = encoded_datum else: obj['data_str'] = '' obj['data' ] = encoded_datum obj['options'] = {} # Make the asynchronouse /recate/job call response = self.__post_then_get_cext( create_job_req_schema, create_job_rsp_schema, obj, create_job_endpoint ) if not _Util.is_ok( response ): raise GPUdbException( "Error in creating asynchronous job for {}: {}" "".format( endpoint, _Util.get_error_msg( response ) ) ) return response # end __post_async_cext def __get_async_cext(self, job_id, RSP_SCHEMA): """ Make a /get/job call using the job_id. Decode the /get/job response and return it. Parameters: REP_SCHEMA : The Schema for the reply. Returns: The decoded /get/job response. """ # Create and encode the payload of the /get/job endpoint # which looks up the status of the asynchronous job get_job_endpoint = "/get/job" (get_job_req_schema, get_job_rsp_schema) = self.__get_schemas( get_job_endpoint ) obj = {} obj['job_id'] = job_id obj['options'] = {} # Make the /get/job call response, raw_response = self.__post_then_get_cext_raw( get_job_req_schema, get_job_rsp_schema, obj, get_job_endpoint ) # response = self.__post_then_get_cext( get_job_req_schema, get_job_rsp_schema, # obj, get_job_endpoint ) if not _Util.is_ok( response ): raise GPUdbException( "Error in getting asynchronous job result for {}: {}" "".format( endpoint, _Util.get_error_msg( response ) ) ) get_job_rsp = AttrDict( response ) # If the job is done, then decode it and return the result if get_job_rsp.successful: if get_job_rsp.response_encoding == "json": # Decode the json-encoded job data job_payload = self.__read_datum_cext( RSP_SCHEMA, get_job_rsp.job_response_str ) else: # Decode the binary-encoded job data job_payload = RSP_SCHEMA.decode( get_job_rsp.job_response ) # job_payload = self.__read_datum_cext( RSP_SCHEMA, get_job_rsp.job_response ) # end inner if return job_payload elif get_job_rsp.running: # Job is still running, nothing to worry about return None # Job has been cancelled or there was an error; raise exception job_status = get_job_rsp.job_status if (job_status == C._job_cancelled): raise GPUdbException( "Job {} was cancelled" "".format( job_id ) ) elif (job_status == C._job_error): raise GPUdbException( "Job {} had an error: {}" "".format( job_id, get_job_rsp.status_map[ C._job_error_msg ] ) ) # Should never get here! raise GPUdbException( "Unknown job status for job {}: '{}'" "".format( job_id, job_status ) ) # end __get_async_cext def __sanitize_dicts( self, _dict ): """If the given options dictionary has boolean values, replace them with the strings 'true' and 'false' for consumption of the database. Return the "sanitized" dictionary. """ if not isinstance( _dict, (dict, collections.OrderedDict) ): return # Iterate over a copy of the keys so that we can modify the dict for key in _dict.keys(): val = _dict[ key ] if isinstance( val, bool ): if val: # true _dict[ key ] = 'true' else: _dict[ key ] = 'false' elif isinstance( val, (dict, collections.OrderedDict) ): _dict[ key ] = self.__sanitize_dicts( _dict[ key ] ) # end loop return _dict # end sanitize_dicts
[docs] def encode_datum(self, SCHEMA, datum, encoding = None): """ Returns an avro binary or JSON encoded dataum dict using its schema. Parameters: SCHEMA (str or avro.Schema) A parsed schema object from avro.schema.parse() or a string containing the schema. datum (dict) A dict of key-value pairs containing the data to encode (the entries must match the schema). """ # Convert the string to a parsed schema object (if needed) if isinstance( SCHEMA, basestring ): SCHEMA = schema.parse( SCHEMA ) if encoding is None: encoding = self.encoding else: encoding = encoding.upper() # Build the encoder; this output is where the data will be written if encoding == 'BINARY' or encoding == 'SNAPPY': return _Util.encode_binary_data( SCHEMA, datum, self.encoding ) elif encoding == 'JSON': return json.dumps( _Util.convert_dict_bytes_to_str( datum ) )
# end encode_datum
[docs] def encode_datum_cext(self, SCHEMA, datum, encoding = None): """ Returns an avro binary or JSON encoded dataum dict using its schema. Parameters: SCHEMA (str or avro.Schema) A parsed schema object from avro.schema.parse() or a string containing the schema. datum (dict) A dict of key-value pairs containing the data to encode (the entries must match the schema). """ if encoding is None: encoding = self.encoding else: encoding = encoding.upper() # Build the encoder; this output is where the data will be written if encoding == 'BINARY' or encoding == 'SNAPPY': return _Util.encode_binary_data_cext( SCHEMA, datum, self.encoding ) elif encoding == 'JSON': # Convert bytes to strings first datum = _Util.convert_dict_bytes_to_str( datum ) # Create an OrderedDict for the JSON since the server expects # fields in order json_datum = collections.OrderedDict() # Populate the JSON-encoded payload for field in SCHEMA.fields: name = field.name json_datum[ name ] = datum[ name ] # end loop return json.dumps( json_datum )
# end encode_datum_cext # ------------- Convenience Functions ------------------------------------ def read_trigger_msg(self, encoded_datum): RSP_SCHEMA = self.gpudb_schemas[ "trigger_notification" ]["RSP_SCHEMA"] return self.__read_orig_datum_cext(RSP_SCHEMA, encoded_datum, 'BINARY')
[docs] def logger(self, ranks, log_levels): """Convenience function to change log levels of some or all GPUdb ranks. """ REQ_SCHEMA = self.logger_request_schema RSP_SCHEMA = self.logger_response_schema datum = {} datum["ranks"] = ranks datum["log_levels"] = log_levels print('Using host: %s\n' % (self.host)) return self.__post_then_get_cext(REQ_SCHEMA, RSP_SCHEMA, datum, "/logger")
# end logger # Helper function to emulate old /add (single object insert) capability def insert_object(self, set_id, object_data, params=None): if (params): return self.insert_records(set_id, [object_data], None, params) else: return self.insert_records(set_id, [object_data], None, {"return_record_ids":"true"}) # Helper for dynamic schema responses def parse_dynamic_response(self, retobj, do_print=False, convert_nulls = True): if (retobj['status_info']['status'] == 'ERROR'): print('Error: ', retobj['status_info']['message']) return retobj my_schema = schema.parse(retobj['response_schema_str']) fields = eval(retobj['response_schema_str'])['fields'] nullable = [type(x['type']['items']) != str for x in fields] if len(retobj['binary_encoded_response']) > 0: data = retobj['binary_encoded_response'] # Use the python avro package to decode the data decoded = _Util.decode_binary_data( my_schema, data ) # Translate the column names column_lookup = decoded['column_headers'] translated = collections.OrderedDict() for i,(n,column_name) in enumerate(zip(nullable,column_lookup)): if (n and convert_nulls): # nullable - replace None with '<NULL>' col = [x if x is not None else '<NULL>' for x in decoded['column_%d'%(i+1)]] else: col = decoded['column_%d'%(i+1)] # end if translated[column_name] = col # end loop # # TODO: For 7.0, use the following block of code instead of # # the above block (which will now go inside the if block. # if "record_type" not in retobj: # # Use the python avro package to decode the data # decoded = _Util.decode_binary_data( my_schema, data ) # # Translate the column names # column_lookup = decoded['column_headers'] # translated = collections.OrderedDict() # for i,(n,column_name) in enumerate(zip(nullable,column_lookup)): # if (n and convert_nulls): # nullable - replace None with '<NULL>' # col = [x if x is not None else '<NULL>' for x in decoded['column_%d'%(i+1)]] # else: # col = decoded['column_%d'%(i+1)] # # end if # translated[column_name] = col # # end loop # else: # use the c-extension for avro decoding # record_type = retobj["record_type"] # if not isinstance( record_type, RecordType ): # raise GPUdbException( "'record_type' must be a RecordType object; given {}" # "".format( str(type( record_type )) ) ) # records = record_type.decode_dynamic_records( data ) # # For 6.2, return column-major data # # TODO: For 7.0, just return records, maybe # translated = GPUdbRecord.transpose_data_to_col_major( records ) # # end if retobj['response'] = translated else: # JSON encoding retobj['response'] = collections.OrderedDict() #note running eval here returns a standard (unordered) dict #d_resp = eval(retobj['json_encoded_response']) d_resp = json.loads(retobj['json_encoded_response']) column_lookup = d_resp['column_headers'] for i,(n,column_name) in enumerate(zip(nullable,column_lookup)): column_index_name = 'column_%d'%(i+1) #double/float conversion here #get the datatype of the underlying data data_type = my_schema.fields_dict[column_index_name].type.items.type if (data_type == 'double' or data_type == 'float'): retobj['response'][column_name] = [float(x) for x in d_resp[column_index_name]] else: retobj['response'][column_name] = d_resp[column_index_name] if (n and convert_nulls): # nullable retobj['response'][column_name] = [x if x is not None else '<NULL>' for x in retobj['response'][column_name]] if (do_print): print(tabulate(retobj['response'],headers='keys',tablefmt='psql')) return AttrDict( retobj ) # end parse_dynamic_response # ------------- END convenience functions ------------------------------------ # ----------------------------------------------------------------------- # Begin autogenerated functions # -----------------------------------------------------------------------
[docs] def load_gpudb_schemas( self ): """Saves all request and response schemas for GPUdb queries in a lookup table (lookup by query name). """ self.gpudb_schemas = {} name = "gpudb_response" RSP_SCHEMA_STR = """{"type":"record","name":"gpudb_response","fields":[{"name":"status","type":"string"},{"name":"message","type":"string"},{"name":"data_type","type":"string"},{"name":"data","type":"bytes"},{"name":"data_str","type":"string"}]}""" RSP_SCHEMA = Schema( "record", [("status", "string"), ("message", "string"), ("data_type", "string"), ("data", "object"), ("data_str", "string")] ) self.gpudb_schemas[ name ] = { "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "RSP_SCHEMA" : RSP_SCHEMA } name = "trigger_notification" RSP_SCHEMA_STR = """{"type":"record","name":"trigger_notification","fields":[{"name":"trigger_id","type":"string"},{"name":"set_id","type":"string"},{"name":"object_id","type":"string"},{"name":"object_data","type":"bytes"}]}""" RSP_SCHEMA = Schema( "record", [("trigger_id", "string"), ("set_id", "string"), ("object_id", "string"), ("object_data", "bytes")] ) self.gpudb_schemas[ name ] = { "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "RSP_SCHEMA" : RSP_SCHEMA } name = "/admin/add/ranks" REQ_SCHEMA_STR = """{"type":"record","name":"admin_add_ranks_request","fields":[{"name":"hosts","type":{"type":"array","items":"string"}},{"name":"config_params","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_add_ranks_response","fields":[{"name":"added_ranks","type":{"type":"array","items":"int"}},{"name":"results","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("hosts", "array", [("string")]), ("config_params", "array", [("map", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("added_ranks", "array", [("int")]), ("results", "array", [("string")])] ) ENDPOINT = "/admin/add/ranks" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/alter/configuration" REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_configuration_request","fields":[{"name":"config_string","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_configuration_response","fields":[{"name":"status","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("config_string", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("status", "string")] ) ENDPOINT = "/admin/alter/configuration" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/alter/jobs" REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_jobs_request","fields":[{"name":"job_ids","type":{"type":"array","items":"int"}},{"name":"action","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_jobs_response","fields":[{"name":"job_ids","type":{"type":"array","items":"int"}},{"name":"action","type":"string"},{"name":"status","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("job_ids", "array", [("int")]), ("action", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("job_ids", "array", [("int")]), ("action", "string"), ("status", "array", [("string")])] ) ENDPOINT = "/admin/alter/jobs" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/alter/shards" REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_shards_request","fields":[{"name":"version","type":"long"},{"name":"use_index","type":"boolean"},{"name":"rank","type":{"type":"array","items":"int"}},{"name":"tom","type":{"type":"array","items":"int"}},{"name":"index","type":{"type":"array","items":"int"}},{"name":"backup_map_list","type":{"type":"array","items":"int"}},{"name":"backup_map_values","type":{"type":"array","items":{"type":"array","items":"int"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_shards_response","fields":[{"name":"version","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("version", "long"), ("use_index", "boolean"), ("rank", "array", [("int")]), ("tom", "array", [("int")]), ("index", "array", [("int")]), ("backup_map_list", "array", [("int")]), ("backup_map_values", "array", [("array", [("int")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("version", "long")] ) ENDPOINT = "/admin/alter/shards" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/offline" REQ_SCHEMA_STR = """{"type":"record","name":"admin_offline_request","fields":[{"name":"offline","type":"boolean"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_offline_response","fields":[{"name":"is_offline","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("offline", "boolean"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("is_offline", "boolean")] ) ENDPOINT = "/admin/offline" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/rebalance" REQ_SCHEMA_STR = """{"type":"record","name":"admin_rebalance_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"action","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_rebalance_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"message","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("action", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("message", "array", [("string")])] ) ENDPOINT = "/admin/rebalance" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/remove/ranks" REQ_SCHEMA_STR = """{"type":"record","name":"admin_remove_ranks_request","fields":[{"name":"ranks","type":{"type":"array","items":"int"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_remove_ranks_response","fields":[{"name":"removed_ranks","type":{"type":"array","items":"int"}},{"name":"results","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("ranks", "array", [("int")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("removed_ranks", "array", [("int")]), ("results", "array", [("string")])] ) ENDPOINT = "/admin/remove/ranks" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/show/alerts" REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_alerts_request","fields":[{"name":"num_alerts","type":"int"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_alerts_response","fields":[{"name":"timestamps","type":{"type":"array","items":"string"}},{"name":"types","type":{"type":"array","items":"string"}},{"name":"params","type":{"type":"array","items":{"type":"map","values":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("num_alerts", "int"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("timestamps", "array", [("string")]), ("types", "array", [("string")]), ("params", "array", [("map", [("string")])])] ) ENDPOINT = "/admin/show/alerts" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/show/configuration" REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_configuration_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_configuration_response","fields":[{"name":"config_string","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("config_string", "string")] ) ENDPOINT = "/admin/show/configuration" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/show/jobs" REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_jobs_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_jobs_response","fields":[{"name":"job_id","type":{"type":"array","items":"int"}},{"name":"status","type":{"type":"array","items":"string"}},{"name":"endpoint_name","type":{"type":"array","items":"string"}},{"name":"time_received","type":{"type":"array","items":"long"}},{"name":"auth_id","type":{"type":"array","items":"string"}},{"name":"user_data","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("job_id", "array", [("int")]), ("status", "array", [("string")]), ("endpoint_name", "array", [("string")]), ("time_received", "array", [("long")]), ("auth_id", "array", [("string")]), ("user_data", "array", [("string")])] ) ENDPOINT = "/admin/show/jobs" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/show/shards" REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_shards_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_shards_response","fields":[{"name":"version","type":"long"},{"name":"rank","type":{"type":"array","items":"int"}},{"name":"tom","type":{"type":"array","items":"int"}}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("version", "long"), ("rank", "array", [("int")]), ("tom", "array", [("int")])] ) ENDPOINT = "/admin/show/shards" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/shutdown" REQ_SCHEMA_STR = """{"type":"record","name":"admin_shutdown_request","fields":[{"name":"exit_type","type":"string"},{"name":"authorization","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_shutdown_response","fields":[{"name":"exit_status","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("exit_type", "string"), ("authorization", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("exit_status", "string")] ) ENDPOINT = "/admin/shutdown" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/admin/verifydb" REQ_SCHEMA_STR = """{"type":"record","name":"admin_verify_db_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_verify_db_response","fields":[{"name":"verified_ok","type":"boolean"},{"name":"error_list","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("verified_ok", "boolean"), ("error_list", "array", [("string")])] ) ENDPOINT = "/admin/verifydb" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/convexhull" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_convex_hull_request","fields":[{"name":"table_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_convex_hull_response","fields":[{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"count","type":"int"},{"name":"is_valid","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("x_column_name", "string"), ("y_column_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("x_vector", "array", [("double")]), ("y_vector", "array", [("double")]), ("count", "int"), ("is_valid", "boolean")] ) ENDPOINT = "/aggregate/convexhull" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/groupby" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_group_by_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_group_by_response","fields":[{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) RSP_SCHEMA_CEXT = Schema( "record", [("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) ENDPOINT = "/aggregate/groupby" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/aggregate/histogram" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_histogram_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"start","type":"double"},{"name":"end","type":"double"},{"name":"interval","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_histogram_response","fields":[{"name":"counts","type":{"type":"array","items":"double"}},{"name":"start","type":"double"},{"name":"end","type":"double"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("start", "double"), ("end", "double"), ("interval", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("counts", "array", [("double")]), ("start", "double"), ("end", "double")] ) ENDPOINT = "/aggregate/histogram" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/kmeans" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_k_means_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"k","type":"int"},{"name":"tolerance","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_k_means_response","fields":[{"name":"means","type":{"type":"array","items":{"type":"array","items":"double"}}},{"name":"counts","type":{"type":"array","items":"long"}},{"name":"rms_dists","type":{"type":"array","items":"double"}},{"name":"count","type":"long"},{"name":"rms_dist","type":"double"},{"name":"tolerance","type":"double"},{"name":"num_iters","type":"int"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("k", "int"), ("tolerance", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("means", "array", [("array", [("double")])]), ("counts", "array", [("long")]), ("rms_dists", "array", [("double")]), ("count", "long"), ("rms_dist", "double"), ("tolerance", "double"), ("num_iters", "int")] ) ENDPOINT = "/aggregate/kmeans" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/minmax" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_response","fields":[{"name":"min","type":"double"},{"name":"max","type":"double"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("min", "double"), ("max", "double")] ) ENDPOINT = "/aggregate/minmax" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/minmax/geometry" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_geometry_response","fields":[{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double")] ) ENDPOINT = "/aggregate/minmax/geometry" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/statistics" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"stats","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_response","fields":[{"name":"stats","type":{"type":"map","values":"double"}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("stats", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("stats", "map", [("double")])] ) ENDPOINT = "/aggregate/statistics" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/statistics/byrange" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_by_range_request","fields":[{"name":"table_name","type":"string"},{"name":"select_expression","type":"string"},{"name":"column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"stats","type":"string"},{"name":"start","type":"double"},{"name":"end","type":"double"},{"name":"interval","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_by_range_response","fields":[{"name":"stats","type":{"type":"map","values":{"type":"array","items":"double"}}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("select_expression", "string"), ("column_name", "string"), ("value_column_name", "string"), ("stats", "string"), ("start", "double"), ("end", "double"), ("interval", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("stats", "map", [("array", [("double")])])] ) ENDPOINT = "/aggregate/statistics/byrange" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/aggregate/unique" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_unique_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_unique_response","fields":[{"name":"table_name","type":"string"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"has_more_records","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("has_more_records", "boolean")] ) RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("has_more_records", "boolean")] ) ENDPOINT = "/aggregate/unique" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/aggregate/unpivot" REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_unpivot_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"variable_column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"pivoted_columns","type":{"type":"array","items":"string"}},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_unpivot_response","fields":[{"name":"table_name","type":"string"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("variable_column_name", "string"), ("value_column_name", "string"), ("pivoted_columns", "array", [("string")]), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) ENDPOINT = "/aggregate/unpivot" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/alter/system/properties" REQ_SCHEMA_STR = """{"type":"record","name":"alter_system_properties_request","fields":[{"name":"property_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"alter_system_properties_response","fields":[{"name":"updated_properties_map","type":{"type":"map","values":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("property_updates_map", "map", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("updated_properties_map", "map", [("string")])] ) ENDPOINT = "/alter/system/properties" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/alter/table" REQ_SCHEMA_STR = """{"type":"record","name":"alter_table_request","fields":[{"name":"table_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"alter_table_response","fields":[{"name":"table_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"label","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("action", "string"), ("value", "string"), ("type_id", "string"), ("type_definition", "string"), ("properties", "map", [("array", [("string")])]), ("label", "string")] ) ENDPOINT = "/alter/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/alter/table/metadata" REQ_SCHEMA_STR = """{"type":"record","name":"alter_table_metadata_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"metadata_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"alter_table_metadata_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"metadata_map","type":{"type":"map","values":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("metadata_map", "map", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("metadata_map", "map", [("string")])] ) ENDPOINT = "/alter/table/metadata" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/alter/user" REQ_SCHEMA_STR = """{"type":"record","name":"alter_user_request","fields":[{"name":"name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"alter_user_response","fields":[{"name":"name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string")] ) ENDPOINT = "/alter/user" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/append/records" REQ_SCHEMA_STR = """{"type":"record","name":"append_records_request","fields":[{"name":"table_name","type":"string"},{"name":"source_table_name","type":"string"},{"name":"field_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"append_records_response","fields":[{"name":"table_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("source_table_name", "string"), ("field_map", "map", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string")] ) ENDPOINT = "/append/records" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/clear/statistics" REQ_SCHEMA_STR = """{"type":"record","name":"clear_statistics_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"clear_statistics_response","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string")] ) ENDPOINT = "/clear/statistics" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/clear/table" REQ_SCHEMA_STR = """{"type":"record","name":"clear_table_request","fields":[{"name":"table_name","type":"string"},{"name":"authorization","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"clear_table_response","fields":[{"name":"table_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("authorization", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string")] ) ENDPOINT = "/clear/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/clear/tablemonitor" REQ_SCHEMA_STR = """{"type":"record","name":"clear_table_monitor_request","fields":[{"name":"topic_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"clear_table_monitor_response","fields":[{"name":"topic_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("topic_id", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("topic_id", "string")] ) ENDPOINT = "/clear/tablemonitor" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/clear/trigger" REQ_SCHEMA_STR = """{"type":"record","name":"clear_trigger_request","fields":[{"name":"trigger_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"clear_trigger_response","fields":[{"name":"trigger_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("trigger_id", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("trigger_id", "string")] ) ENDPOINT = "/clear/trigger" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/collect/statistics" REQ_SCHEMA_STR = """{"type":"record","name":"collect_statistics_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"collect_statistics_response","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")])] ) ENDPOINT = "/collect/statistics" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/job" REQ_SCHEMA_STR = """{"type":"record","name":"create_job_request","fields":[{"name":"endpoint","type":"string"},{"name":"request_encoding","type":"string"},{"name":"data","type":"bytes"},{"name":"data_str","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_job_response","fields":[{"name":"job_id","type":"int"}]}""" REQ_SCHEMA = Schema( "record", [("endpoint", "string"), ("request_encoding", "string"), ("data", "bytes"), ("data_str", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("job_id", "int")] ) ENDPOINT = "/create/job" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/jointable" REQ_SCHEMA_STR = """{"type":"record","name":"create_join_table_request","fields":[{"name":"join_table_name","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"expressions","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_join_table_response","fields":[{"name":"join_table_name","type":"string"},{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("join_table_name", "string"), ("table_names", "array", [("string")]), ("column_names", "array", [("string")]), ("expressions", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("join_table_name", "string"), ("count", "long")] ) ENDPOINT = "/create/jointable" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/materializedview" REQ_SCHEMA_STR = """{"type":"record","name":"create_materialized_view_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_materialized_view_response","fields":[{"name":"table_name","type":"string"},{"name":"view_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("view_id", "string")] ) ENDPOINT = "/create/materializedview" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/proc" REQ_SCHEMA_STR = """{"type":"record","name":"create_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"execution_mode","type":"string"},{"name":"files","type":{"type":"map","values":"bytes"}},{"name":"command","type":"string"},{"name":"args","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_proc_response","fields":[{"name":"proc_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("execution_mode", "string"), ("files", "map", [("bytes")]), ("command", "string"), ("args", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("proc_name", "string")] ) ENDPOINT = "/create/proc" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/projection" REQ_SCHEMA_STR = """{"type":"record","name":"create_projection_request","fields":[{"name":"table_name","type":"string"},{"name":"projection_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_projection_response","fields":[{"name":"projection_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("projection_name", "string"), ("column_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("projection_name", "string")] ) ENDPOINT = "/create/projection" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/role" REQ_SCHEMA_STR = """{"type":"record","name":"create_role_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_role_response","fields":[{"name":"name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string")] ) ENDPOINT = "/create/role" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/table" REQ_SCHEMA_STR = """{"type":"record","name":"create_table_request","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_table_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"is_collection","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("is_collection", "boolean")] ) ENDPOINT = "/create/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/tablemonitor" REQ_SCHEMA_STR = """{"type":"record","name":"create_table_monitor_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_table_monitor_response","fields":[{"name":"topic_id","type":"string"},{"name":"table_name","type":"string"},{"name":"type_schema","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("topic_id", "string"), ("table_name", "string"), ("type_schema", "string")] ) ENDPOINT = "/create/tablemonitor" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/trigger/byarea" REQ_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_area_request","fields":[{"name":"request_id","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_column_name","type":"string"},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_area_response","fields":[{"name":"trigger_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("request_id", "string"), ("table_names", "array", [("string")]), ("x_column_name", "string"), ("x_vector", "array", [("double")]), ("y_column_name", "string"), ("y_vector", "array", [("double")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("trigger_id", "string")] ) ENDPOINT = "/create/trigger/byarea" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/trigger/byrange" REQ_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_range_request","fields":[{"name":"request_id","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"column_name","type":"string"},{"name":"min","type":"double"},{"name":"max","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_range_response","fields":[{"name":"trigger_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("request_id", "string"), ("table_names", "array", [("string")]), ("column_name", "string"), ("min", "double"), ("max", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("trigger_id", "string")] ) ENDPOINT = "/create/trigger/byrange" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/type" REQ_SCHEMA_STR = """{"type":"record","name":"create_type_request","fields":[{"name":"type_definition","type":"string"},{"name":"label","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_type_response","fields":[{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"label","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("type_definition", "string"), ("label", "string"), ("properties", "map", [("array", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("type_id", "string"), ("type_definition", "string"), ("label", "string"), ("properties", "map", [("array", [("string")])])] ) ENDPOINT = "/create/type" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/union" REQ_SCHEMA_STR = """{"type":"record","name":"create_union_request","fields":[{"name":"table_name","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"input_column_names","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"output_column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_union_response","fields":[{"name":"table_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("table_names", "array", [("string")]), ("input_column_names", "array", [("array", [("string")])]), ("output_column_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string")] ) ENDPOINT = "/create/union" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/user/external" REQ_SCHEMA_STR = """{"type":"record","name":"create_user_external_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_user_external_response","fields":[{"name":"name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string")] ) ENDPOINT = "/create/user/external" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/create/user/internal" REQ_SCHEMA_STR = """{"type":"record","name":"create_user_internal_request","fields":[{"name":"name","type":"string"},{"name":"password","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"create_user_internal_response","fields":[{"name":"name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("password", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string")] ) ENDPOINT = "/create/user/internal" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/delete/proc" REQ_SCHEMA_STR = """{"type":"record","name":"delete_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"delete_proc_response","fields":[{"name":"proc_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("proc_name", "string")] ) ENDPOINT = "/delete/proc" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/delete/records" REQ_SCHEMA_STR = """{"type":"record","name":"delete_records_request","fields":[{"name":"table_name","type":"string"},{"name":"expressions","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"delete_records_response","fields":[{"name":"count_deleted","type":"long"},{"name":"counts_deleted","type":{"type":"array","items":"long"}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("expressions", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count_deleted", "long"), ("counts_deleted", "array", [("long")])] ) ENDPOINT = "/delete/records" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/delete/role" REQ_SCHEMA_STR = """{"type":"record","name":"delete_role_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"delete_role_response","fields":[{"name":"name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string")] ) ENDPOINT = "/delete/role" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/delete/user" REQ_SCHEMA_STR = """{"type":"record","name":"delete_user_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"delete_user_response","fields":[{"name":"name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string")] ) ENDPOINT = "/delete/user" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/execute/proc" REQ_SCHEMA_STR = """{"type":"record","name":"execute_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"params","type":{"type":"map","values":"string"}},{"name":"bin_params","type":{"type":"map","values":"bytes"}},{"name":"input_table_names","type":{"type":"array","items":"string"}},{"name":"input_column_names","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"output_table_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"execute_proc_response","fields":[{"name":"run_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("params", "map", [("string")]), ("bin_params", "map", [("bytes")]), ("input_table_names", "array", [("string")]), ("input_column_names", "map", [("array", [("string")])]), ("output_table_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("run_id", "string")] ) ENDPOINT = "/execute/proc" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/execute/sql" REQ_SCHEMA_STR = """{"type":"record","name":"execute_sql_request","fields":[{"name":"Query","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"execute_sql_response","fields":[{"name":"query_execution_plan","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("Query", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("query_execution_plan", "string")] ) ENDPOINT = "/execute/sql" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter" REQ_SCHEMA_STR = """{"type":"record","name":"filter_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"expression","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("expression", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byarea" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_area_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_column_name","type":"string"},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_area_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("x_column_name", "string"), ("x_vector", "array", [("double")]), ("y_column_name", "string"), ("y_vector", "array", [("double")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byarea" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byarea/geometry" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_area_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_area_geometry_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("x_vector", "array", [("double")]), ("y_vector", "array", [("double")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byarea/geometry" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/bybox" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_box_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"y_column_name","type":"string"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_box_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("x_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("y_column_name", "string"), ("min_y", "double"), ("max_y", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/bybox" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/bybox/geometry" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_box_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_box_geometry_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/bybox/geometry" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/bygeometry" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"input_wkt","type":"string"},{"name":"operation","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_geometry_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("input_wkt", "string"), ("operation", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/bygeometry" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/bylist" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_list_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_values_map","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_list_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_values_map", "map", [("array", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/bylist" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byradius" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"x_center","type":"double"},{"name":"y_column_name","type":"string"},{"name":"y_center","type":"double"},{"name":"radius","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("x_column_name", "string"), ("x_center", "double"), ("y_column_name", "string"), ("y_center", "double"), ("radius", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byradius" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byradius/geometry" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"x_center","type":"double"},{"name":"y_center","type":"double"},{"name":"radius","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_geometry_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("x_center", "double"), ("y_center", "double"), ("radius", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byradius/geometry" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byrange" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_range_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"lower_bound","type":"double"},{"name":"upper_bound","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_range_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("lower_bound", "double"), ("upper_bound", "double"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byrange" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byseries" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_series_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"track_id","type":"string"},{"name":"target_track_ids","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_series_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("track_id", "string"), ("target_track_ids", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byseries" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/bystring" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_string_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"expression","type":"string"},{"name":"mode","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_string_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("expression", "string"), ("mode", "string"), ("column_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/bystring" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/bytable" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_table_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"source_table_name","type":"string"},{"name":"source_table_column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_table_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("source_table_name", "string"), ("source_table_column_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/bytable" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/filter/byvalue" REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_value_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"is_string","type":"boolean"},{"name":"value","type":"double"},{"name":"value_str","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_value_response","fields":[{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("is_string", "boolean"), ("value", "double"), ("value_str", "string"), ("column_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "long")] ) ENDPOINT = "/filter/byvalue" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/get/job" REQ_SCHEMA_STR = """{"type":"record","name":"get_job_request","fields":[{"name":"job_id","type":"int"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"get_job_response","fields":[{"name":"endpoint","type":"string"},{"name":"job_status","type":"string"},{"name":"running","type":"boolean"},{"name":"progress","type":"int"},{"name":"successful","type":"boolean"},{"name":"response_encoding","type":"string"},{"name":"job_response","type":"bytes"},{"name":"job_response_str","type":"string"},{"name":"status_map","type":{"type":"map","values":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("job_id", "int"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("endpoint", "string"), ("job_status", "string"), ("running", "boolean"), ("progress", "int"), ("successful", "boolean"), ("response_encoding", "string"), ("job_response", "bytes"), ("job_response_str", "string"), ("status_map", "map", [("string")])] ) ENDPOINT = "/get/job" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/get/records" REQ_SCHEMA_STR = """{"type":"record","name":"get_records_request","fields":[{"name":"table_name","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"get_records_response","fields":[{"name":"table_name","type":"string"},{"name":"type_name","type":"string"},{"name":"type_schema","type":"string"},{"name":"records_binary","type":{"type":"array","items":"bytes"}},{"name":"records_json","type":{"type":"array","items":"string"}},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_name", "string"), ("type_schema", "string"), ("records_binary", "array", [("bytes")]), ("records_json", "array", [("string")]), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("type_name", "string"), ("type_schema", "string"), ("records_binary", "object_array"), ("records_json", "array", [("string")]), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) ENDPOINT = "/get/records" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/get/records/bycolumn" REQ_SCHEMA_STR = """{"type":"record","name":"get_records_by_column_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"get_records_by_column_response","fields":[{"name":"table_name","type":"string"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean")] ) ENDPOINT = "/get/records/bycolumn" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/get/records/byseries" REQ_SCHEMA_STR = """{"type":"record","name":"get_records_by_series_request","fields":[{"name":"table_name","type":"string"},{"name":"world_table_name","type":"string"},{"name":"offset","type":"int"},{"name":"limit","type":"int"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"get_records_by_series_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"type_names","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"list_records_binary","type":{"type":"array","items":{"type":"array","items":"bytes"}}},{"name":"list_records_json","type":{"type":"array","items":{"type":"array","items":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("world_table_name", "string"), ("offset", "int"), ("limit", "int"), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("type_names", "array", [("string")]), ("type_schemas", "array", [("string")]), ("list_records_binary", "array", [("array", [("bytes")])]), ("list_records_json", "array", [("array", [("string")])])] ) RSP_SCHEMA_CEXT = Schema( "record", [("table_names", "array", [("string")]), ("type_names", "array", [("string")]), ("type_schemas", "array", [("string")]), ("list_records_binary", "array", [("object_array")]), ("list_records_json", "array", [("array", [("string")])])] ) ENDPOINT = "/get/records/byseries" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/get/records/fromcollection" REQ_SCHEMA_STR = """{"type":"record","name":"get_records_from_collection_request","fields":[{"name":"table_name","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"get_records_from_collection_response","fields":[{"name":"table_name","type":"string"},{"name":"type_names","type":{"type":"array","items":"string"}},{"name":"records_binary","type":{"type":"array","items":"bytes"}},{"name":"records_json","type":{"type":"array","items":"string"}},{"name":"record_ids","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_names", "array", [("string")]), ("records_binary", "array", [("bytes")]), ("records_json", "array", [("string")]), ("record_ids", "array", [("string")])] ) RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("type_names", "array", [("string")]), ("records_binary", "object_array"), ("records_json", "array", [("string")]), ("record_ids", "array", [("string")])] ) ENDPOINT = "/get/records/fromcollection" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT, "ENDPOINT" : ENDPOINT } name = "/grant/permission/system" REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_system_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_system_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string")] ) ENDPOINT = "/grant/permission/system" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/grant/permission/table" REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_table_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"filter_expression","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_table_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"filter_expression","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("filter_expression", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("filter_expression", "string")] ) ENDPOINT = "/grant/permission/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/grant/role" REQ_SCHEMA_STR = """{"type":"record","name":"grant_role_request","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"grant_role_response","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("role", "string"), ("member", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("role", "string"), ("member", "string")] ) ENDPOINT = "/grant/role" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/has/proc" REQ_SCHEMA_STR = """{"type":"record","name":"has_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"has_proc_response","fields":[{"name":"proc_name","type":"string"},{"name":"proc_exists","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("proc_name", "string"), ("proc_exists", "boolean")] ) ENDPOINT = "/has/proc" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/has/table" REQ_SCHEMA_STR = """{"type":"record","name":"has_table_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"has_table_response","fields":[{"name":"table_name","type":"string"},{"name":"table_exists","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("table_exists", "boolean")] ) ENDPOINT = "/has/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/has/type" REQ_SCHEMA_STR = """{"type":"record","name":"has_type_request","fields":[{"name":"type_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"has_type_response","fields":[{"name":"type_id","type":"string"},{"name":"type_exists","type":"boolean"}]}""" REQ_SCHEMA = Schema( "record", [("type_id", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("type_id", "string"), ("type_exists", "boolean")] ) ENDPOINT = "/has/type" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/insert/records" REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_request","fields":[{"name":"table_name","type":"string"},{"name":"list","type":{"type":"array","items":"bytes"}},{"name":"list_str","type":{"type":"array","items":"string"}},{"name":"list_encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_response","fields":[{"name":"record_ids","type":{"type":"array","items":"string"}},{"name":"count_inserted","type":"int"},{"name":"count_updated","type":"int"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("list", "array", [("bytes")]), ("list_str", "array", [("string")]), ("list_encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("record_ids", "array", [("string")]), ("count_inserted", "int"), ("count_updated", "int")] ) REQ_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("list", "object_array"), ("list_str", "array", [("string")]), ("list_encoding", "string"), ("options", "map", [("string")])] ) ENDPOINT = "/insert/records" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "REQ_SCHEMA_CEXT" : REQ_SCHEMA_CEXT, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/insert/records/random" REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_random_request","fields":[{"name":"table_name","type":"string"},{"name":"count","type":"long"},{"name":"options","type":{"type":"map","values":{"type":"map","values":"double"}}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_random_response","fields":[{"name":"table_name","type":"string"},{"name":"count","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("count", "long"), ("options", "map", [("map", [("double")])])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("count", "long")] ) ENDPOINT = "/insert/records/random" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/insert/symbol" REQ_SCHEMA_STR = """{"type":"record","name":"insert_symbol_request","fields":[{"name":"symbol_id","type":"string"},{"name":"symbol_format","type":"string"},{"name":"symbol_data","type":"bytes"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"insert_symbol_response","fields":[{"name":"symbol_id","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("symbol_id", "string"), ("symbol_format", "string"), ("symbol_data", "bytes"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("symbol_id", "string")] ) ENDPOINT = "/insert/symbol" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/kill/proc" REQ_SCHEMA_STR = """{"type":"record","name":"kill_proc_request","fields":[{"name":"run_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"kill_proc_response","fields":[{"name":"run_ids","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("run_id", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("run_ids", "array", [("string")])] ) ENDPOINT = "/kill/proc" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/lock/table" REQ_SCHEMA_STR = """{"type":"record","name":"lock_table_request","fields":[{"name":"table_name","type":"string"},{"name":"lock_type","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"lock_table_response","fields":[{"name":"lock_type","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("lock_type", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("lock_type", "string")] ) ENDPOINT = "/lock/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/merge/records" REQ_SCHEMA_STR = """{"type":"record","name":"merge_records_request","fields":[{"name":"table_name","type":"string"},{"name":"source_table_names","type":{"type":"array","items":"string"}},{"name":"field_maps","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"merge_records_response","fields":[{"name":"table_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("source_table_names", "array", [("string")]), ("field_maps", "array", [("map", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string")] ) ENDPOINT = "/merge/records" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/replace/tom" REQ_SCHEMA_STR = """{"type":"record","name":"admin_replace_tom_request","fields":[{"name":"old_rank_tom","type":"long"},{"name":"new_rank_tom","type":"long"}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"admin_replace_tom_response","fields":[{"name":"old_rank_tom","type":"long"},{"name":"new_rank_tom","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("old_rank_tom", "long"), ("new_rank_tom", "long")] ) RSP_SCHEMA = Schema( "record", [("old_rank_tom", "long"), ("new_rank_tom", "long")] ) ENDPOINT = "/replace/tom" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/revoke/permission/system" REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_system_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_system_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string")] ) ENDPOINT = "/revoke/permission/system" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/revoke/permission/table" REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_table_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_table_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string")] ) ENDPOINT = "/revoke/permission/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/revoke/role" REQ_SCHEMA_STR = """{"type":"record","name":"revoke_role_request","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"revoke_role_response","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"}]}""" REQ_SCHEMA = Schema( "record", [("role", "string"), ("member", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("role", "string"), ("member", "string")] ) ENDPOINT = "/revoke/role" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/proc" REQ_SCHEMA_STR = """{"type":"record","name":"show_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_proc_response","fields":[{"name":"proc_names","type":{"type":"array","items":"string"}},{"name":"execution_modes","type":{"type":"array","items":"string"}},{"name":"files","type":{"type":"array","items":{"type":"map","values":"bytes"}}},{"name":"commands","type":{"type":"array","items":"string"}},{"name":"args","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"array","items":{"type":"map","values":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("proc_names", "array", [("string")]), ("execution_modes", "array", [("string")]), ("files", "array", [("map", [("bytes")])]), ("commands", "array", [("string")]), ("args", "array", [("array", [("string")])]), ("options", "array", [("map", [("string")])])] ) ENDPOINT = "/show/proc" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/proc/status" REQ_SCHEMA_STR = """{"type":"record","name":"show_proc_status_request","fields":[{"name":"run_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_proc_status_response","fields":[{"name":"proc_names","type":{"type":"map","values":"string"}},{"name":"params","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"bin_params","type":{"type":"map","values":{"type":"map","values":"bytes"}}},{"name":"input_table_names","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"input_column_names","type":{"type":"map","values":{"type":"map","values":{"type":"array","items":"string"}}}},{"name":"output_table_names","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"overall_statuses","type":{"type":"map","values":"string"}},{"name":"statuses","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"messages","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"results","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":"string"}}}},{"name":"bin_results","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":"bytes"}}}},{"name":"timings","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":"long"}}}}]}""" REQ_SCHEMA = Schema( "record", [("run_id", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("proc_names", "map", [("string")]), ("params", "map", [("map", [("string")])]), ("bin_params", "map", [("map", [("bytes")])]), ("input_table_names", "map", [("array", [("string")])]), ("input_column_names", "map", [("map", [("array", [("string")])])]), ("output_table_names", "map", [("array", [("string")])]), ("options", "map", [("map", [("string")])]), ("overall_statuses", "map", [("string")]), ("statuses", "map", [("map", [("string")])]), ("messages", "map", [("map", [("string")])]), ("results", "map", [("map", [("map", [("string")])])]), ("bin_results", "map", [("map", [("map", [("bytes")])])]), ("timings", "map", [("map", [("map", [("long")])])])] ) ENDPOINT = "/show/proc/status" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/security" REQ_SCHEMA_STR = """{"type":"record","name":"show_security_request","fields":[{"name":"names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_security_response","fields":[{"name":"types","type":{"type":"map","values":"string"}},{"name":"roles","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"permissions","type":{"type":"map","values":{"type":"array","items":{"type":"map","values":"string"}}}}]}""" REQ_SCHEMA = Schema( "record", [("names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("types", "map", [("string")]), ("roles", "map", [("array", [("string")])]), ("permissions", "map", [("array", [("map", [("string")])])])] ) ENDPOINT = "/show/security" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/statistics" REQ_SCHEMA_STR = """{"type":"record","name":"show_statistics_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_statistics_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"stastistics_map","type":{"type":"array","items":{"type":"array","items":{"type":"map","values":"string"}}}}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("stastistics_map", "array", [("array", [("map", [("string")])])])] ) ENDPOINT = "/show/statistics" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/system/properties" REQ_SCHEMA_STR = """{"type":"record","name":"show_system_properties_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_system_properties_response","fields":[{"name":"property_map","type":{"type":"map","values":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("property_map", "map", [("string")])] ) ENDPOINT = "/show/system/properties" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/system/status" REQ_SCHEMA_STR = """{"type":"record","name":"show_system_status_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_system_status_response","fields":[{"name":"status_map","type":{"type":"map","values":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("status_map", "map", [("string")])] ) ENDPOINT = "/show/system/status" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/system/timing" REQ_SCHEMA_STR = """{"type":"record","name":"show_system_timing_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_system_timing_response","fields":[{"name":"endpoints","type":{"type":"array","items":"string"}},{"name":"time_in_ms","type":{"type":"array","items":"float"}},{"name":"jobIds","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("endpoints", "array", [("string")]), ("time_in_ms", "array", [("float")]), ("jobIds", "array", [("string")])] ) ENDPOINT = "/show/system/timing" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/table" REQ_SCHEMA_STR = """{"type":"record","name":"show_table_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_table_response","fields":[{"name":"table_name","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"table_descriptions","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"type_ids","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"type_labels","type":{"type":"array","items":"string"}},{"name":"properties","type":{"type":"array","items":{"type":"map","values":{"type":"array","items":"string"}}}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"sizes","type":{"type":"array","items":"long"}},{"name":"full_sizes","type":{"type":"array","items":"long"}},{"name":"join_sizes","type":{"type":"array","items":"double"}},{"name":"total_size","type":"long"},{"name":"total_full_size","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("table_names", "array", [("string")]), ("table_descriptions", "array", [("array", [("string")])]), ("type_ids", "array", [("string")]), ("type_schemas", "array", [("string")]), ("type_labels", "array", [("string")]), ("properties", "array", [("map", [("array", [("string")])])]), ("additional_info", "array", [("map", [("string")])]), ("sizes", "array", [("long")]), ("full_sizes", "array", [("long")]), ("join_sizes", "array", [("double")]), ("total_size", "long"), ("total_full_size", "long")] ) ENDPOINT = "/show/table" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/table/metadata" REQ_SCHEMA_STR = """{"type":"record","name":"show_table_metadata_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_table_metadata_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"metadata_maps","type":{"type":"array","items":{"type":"map","values":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("metadata_maps", "array", [("map", [("string")])])] ) ENDPOINT = "/show/table/metadata" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/tables/bytype" REQ_SCHEMA_STR = """{"type":"record","name":"show_tables_by_type_request","fields":[{"name":"type_id","type":"string"},{"name":"label","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_tables_by_type_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}}]}""" REQ_SCHEMA = Schema( "record", [("type_id", "string"), ("label", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")])] ) ENDPOINT = "/show/tables/bytype" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/triggers" REQ_SCHEMA_STR = """{"type":"record","name":"show_triggers_request","fields":[{"name":"trigger_ids","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_triggers_response","fields":[{"name":"trigger_map","type":{"type":"map","values":{"type":"map","values":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("trigger_ids", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("trigger_map", "map", [("map", [("string")])])] ) ENDPOINT = "/show/triggers" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/show/types" REQ_SCHEMA_STR = """{"type":"record","name":"show_types_request","fields":[{"name":"type_id","type":"string"},{"name":"label","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"show_types_response","fields":[{"name":"type_ids","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"labels","type":{"type":"array","items":"string"}},{"name":"properties","type":{"type":"array","items":{"type":"map","values":{"type":"array","items":"string"}}}}]}""" REQ_SCHEMA = Schema( "record", [("type_id", "string"), ("label", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("type_ids", "array", [("string")]), ("type_schemas", "array", [("string")]), ("labels", "array", [("string")]), ("properties", "array", [("map", [("array", [("string")])])])] ) ENDPOINT = "/show/types" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/update/records" REQ_SCHEMA_STR = """{"type":"record","name":"update_records_request","fields":[{"name":"table_name","type":"string"},{"name":"expressions","type":{"type":"array","items":"string"}},{"name":"new_values_maps","type":{"type":"array","items":{"type":"map","values":["string","null"]}}},{"name":"records_to_insert","type":{"type":"array","items":"bytes"}},{"name":"records_to_insert_str","type":{"type":"array","items":"string"}},{"name":"record_encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"update_records_response","fields":[{"name":"count_updated","type":"long"},{"name":"counts_updated","type":{"type":"array","items":"long"}},{"name":"count_inserted","type":"long"},{"name":"counts_inserted","type":{"type":"array","items":"long"}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("expressions", "array", [("string")]), ("new_values_maps", "array", [("map", [("nullable", [("string")])])]), ("records_to_insert", "array", [("bytes")]), ("records_to_insert_str", "array", [("string")]), ("record_encoding", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count_updated", "long"), ("counts_updated", "array", [("long")]), ("count_inserted", "long"), ("counts_inserted", "array", [("long")])] ) REQ_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("expressions", "array", [("string")]), ("new_values_maps", "array", [("map", [("nullable", [("string")])])]), ("records_to_insert", "object_array"), ("records_to_insert_str", "array", [("string")]), ("record_encoding", "string"), ("options", "map", [("string")])] ) ENDPOINT = "/update/records" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "REQ_SCHEMA_CEXT" : REQ_SCHEMA_CEXT, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/update/records/byseries" REQ_SCHEMA_STR = """{"type":"record","name":"update_records_by_series_request","fields":[{"name":"table_name","type":"string"},{"name":"world_table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"reserved","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"update_records_by_series_response","fields":[{"name":"count","type":"int"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("world_table_name", "string"), ("view_name", "string"), ("reserved", "array", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("count", "int")] ) ENDPOINT = "/update/records/byseries" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/image" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"world_table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"track_ids","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"bg_color","type":"long"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("world_table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("geometry_column_name", "string"), ("track_ids", "array", [("array", [("string")])]), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("bg_color", "long"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("image_data", "bytes")] ) ENDPOINT = "/visualize/image" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/image/chart" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_chart_request","fields":[{"name":"table_name","type":"string"},{"name":"x_column_names","type":{"type":"array","items":"string"}},{"name":"y_column_names","type":{"type":"array","items":"string"}},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"string"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_chart_response","fields":[{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"string"},{"name":"image_data","type":"bytes"},{"name":"axes_info","type":{"type":"map","values":{"type":"array","items":"string"}}}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("x_column_names", "array", [("string")]), ("y_column_names", "array", [("string")]), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("bg_color", "string"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("bg_color", "string"), ("image_data", "bytes"), ("axes_info", "map", [("array", [("string")])])] ) ENDPOINT = "/visualize/image/chart" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/image/classbreak" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_classbreak_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"world_table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"track_ids","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"cb_attr","type":"string"},{"name":"cb_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointcolor_attr","type":"string"},{"name":"cb_pointcolor_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointsize_attr","type":"string"},{"name":"cb_pointsize_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointshape_attr","type":"string"},{"name":"cb_pointshape_vals","type":{"type":"array","items":"string"}},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"bg_color","type":"long"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_classbreak_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("world_table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("geometry_column_name", "string"), ("track_ids", "array", [("array", [("string")])]), ("cb_attr", "string"), ("cb_vals", "array", [("string")]), ("cb_pointcolor_attr", "string"), ("cb_pointcolor_vals", "array", [("string")]), ("cb_pointsize_attr", "string"), ("cb_pointsize_vals", "array", [("string")]), ("cb_pointshape_attr", "string"), ("cb_pointshape_vals", "array", [("string")]), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("bg_color", "long"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("image_data", "bytes")] ) ENDPOINT = "/visualize/image/classbreak" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/image/contour" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_contour_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"style_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_contour_response","fields":[{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"grid_data","type":"bytes"},{"name":"fill_n0","type":"double"},{"name":"fill_nn","type":"double"},{"name":"min_level","type":"double"},{"name":"max_level","type":"double"},{"name":"samples_used","type":"long"}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("value_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("style_options", "map", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "int"), ("height", "int"), ("bg_color", "long"), ("image_data", "bytes"), ("grid_data", "bytes"), ("fill_n0", "double"), ("fill_nn", "double"), ("min_level", "double"), ("max_level", "double"), ("samples_used", "long")] ) ENDPOINT = "/visualize/image/contour" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/image/heatmap" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_heatmap_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"style_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_heatmap_response","fields":[{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("value_column_name", "string"), ("geometry_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("style_options", "map", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "int"), ("height", "int"), ("bg_color", "long"), ("image_data", "bytes")] ) ENDPOINT = "/visualize/image/heatmap" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/image/labels" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_labels_request","fields":[{"name":"table_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"x_offset","type":"string"},{"name":"y_offset","type":"string"},{"name":"text_string","type":"string"},{"name":"font","type":"string"},{"name":"text_color","type":"string"},{"name":"text_angle","type":"string"},{"name":"text_scale","type":"string"},{"name":"draw_box","type":"string"},{"name":"draw_leader","type":"string"},{"name":"line_width","type":"string"},{"name":"line_color","type":"string"},{"name":"fill_color","type":"string"},{"name":"leader_x_column_name","type":"string"},{"name":"leader_y_column_name","type":"string"},{"name":"filter","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_labels_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"}]}""" REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("x_column_name", "string"), ("y_column_name", "string"), ("x_offset", "string"), ("y_offset", "string"), ("text_string", "string"), ("font", "string"), ("text_color", "string"), ("text_angle", "string"), ("text_scale", "string"), ("draw_box", "string"), ("draw_leader", "string"), ("line_width", "string"), ("line_color", "string"), ("fill_color", "string"), ("leader_x_column_name", "string"), ("leader_y_column_name", "string"), ("filter", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("image_data", "bytes")] ) ENDPOINT = "/visualize/image/labels" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/video" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_video_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"world_table_names","type":{"type":"array","items":"string"}},{"name":"track_ids","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"bg_color","type":"long"},{"name":"time_intervals","type":{"type":"array","items":{"type":"array","items":"double"}}},{"name":"video_style","type":"string"},{"name":"session_key","type":"string"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_video_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"num_frames","type":"int"},{"name":"session_key","type":"string"},{"name":"data","type":{"type":"array","items":"bytes"}}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("world_table_names", "array", [("string")]), ("track_ids", "array", [("array", [("string")])]), ("x_column_name", "string"), ("y_column_name", "string"), ("geometry_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("bg_color", "long"), ("time_intervals", "array", [("array", [("double")])]), ("video_style", "string"), ("session_key", "string"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("num_frames", "int"), ("session_key", "string"), ("data", "array", [("bytes")])] ) ENDPOINT = "/visualize/video" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT } name = "/visualize/video/heatmap" REQ_SCHEMA_STR = """{"type":"record","name":"visualize_video_heatmap_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"time_intervals","type":{"type":"array","items":{"type":"array","items":"double"}}},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"video_style","type":"string"},{"name":"session_key","type":"string"},{"name":"style_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}""" RSP_SCHEMA_STR = """{"type":"record","name":"visualize_video_heatmap_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"num_frames","type":"int"},{"name":"session_key","type":"string"},{"name":"data","type":{"type":"array","items":"bytes"}}]}""" REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("time_intervals", "array", [("array", [("double")])]), ("width", "int"), ("height", "int"), ("projection", "string"), ("video_style", "string"), ("session_key", "string"), ("style_options", "map", [("string")]), ("options", "map", [("string")])] ) RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("num_frames", "int"), ("session_key", "string"), ("data", "array", [("bytes")])] ) ENDPOINT = "/visualize/video/heatmap" self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR, "RSP_SCHEMA_STR" : RSP_SCHEMA_STR, "REQ_SCHEMA" : REQ_SCHEMA, "RSP_SCHEMA" : RSP_SCHEMA, "ENDPOINT" : ENDPOINT }
# end load_gpudb_schemas
[docs] def load_gpudb_func_to_endpoint_map( self ): """Saves a mapping of rest endpoint function names to endpoints in a dictionary. """ self.gpudb_func_to_endpoint_map = {} self.gpudb_func_to_endpoint_map["admin_add_ranks"] = "/admin/add/ranks" self.gpudb_func_to_endpoint_map["admin_alter_configuration"] = "/admin/alter/configuration" self.gpudb_func_to_endpoint_map["admin_alter_jobs"] = "/admin/alter/jobs" self.gpudb_func_to_endpoint_map["admin_alter_shards"] = "/admin/alter/shards" self.gpudb_func_to_endpoint_map["admin_offline"] = "/admin/offline" self.gpudb_func_to_endpoint_map["admin_rebalance"] = "/admin/rebalance" self.gpudb_func_to_endpoint_map["admin_remove_ranks"] = "/admin/remove/ranks" self.gpudb_func_to_endpoint_map["admin_show_alerts"] = "/admin/show/alerts" self.gpudb_func_to_endpoint_map["admin_show_configuration"] = "/admin/show/configuration" self.gpudb_func_to_endpoint_map["admin_show_jobs"] = "/admin/show/jobs" self.gpudb_func_to_endpoint_map["admin_show_shards"] = "/admin/show/shards" self.gpudb_func_to_endpoint_map["admin_shutdown"] = "/admin/shutdown" self.gpudb_func_to_endpoint_map["admin_verify_db"] = "/admin/verifydb" self.gpudb_func_to_endpoint_map["aggregate_convex_hull"] = "/aggregate/convexhull" self.gpudb_func_to_endpoint_map["aggregate_group_by"] = "/aggregate/groupby" self.gpudb_func_to_endpoint_map["aggregate_histogram"] = "/aggregate/histogram" self.gpudb_func_to_endpoint_map["aggregate_k_means"] = "/aggregate/kmeans" self.gpudb_func_to_endpoint_map["aggregate_min_max"] = "/aggregate/minmax" self.gpudb_func_to_endpoint_map["aggregate_min_max_geometry"] = "/aggregate/minmax/geometry" self.gpudb_func_to_endpoint_map["aggregate_statistics"] = "/aggregate/statistics" self.gpudb_func_to_endpoint_map["aggregate_statistics_by_range"] = "/aggregate/statistics/byrange" self.gpudb_func_to_endpoint_map["aggregate_unique"] = "/aggregate/unique" self.gpudb_func_to_endpoint_map["aggregate_unpivot"] = "/aggregate/unpivot" self.gpudb_func_to_endpoint_map["alter_system_properties"] = "/alter/system/properties" self.gpudb_func_to_endpoint_map["alter_table"] = "/alter/table" self.gpudb_func_to_endpoint_map["alter_table_metadata"] = "/alter/table/metadata" self.gpudb_func_to_endpoint_map["alter_user"] = "/alter/user" self.gpudb_func_to_endpoint_map["append_records"] = "/append/records" self.gpudb_func_to_endpoint_map["clear_statistics"] = "/clear/statistics" self.gpudb_func_to_endpoint_map["clear_table"] = "/clear/table" self.gpudb_func_to_endpoint_map["clear_table_monitor"] = "/clear/tablemonitor" self.gpudb_func_to_endpoint_map["clear_trigger"] = "/clear/trigger" self.gpudb_func_to_endpoint_map["collect_statistics"] = "/collect/statistics" self.gpudb_func_to_endpoint_map["create_job"] = "/create/job" self.gpudb_func_to_endpoint_map["create_join_table"] = "/create/jointable" self.gpudb_func_to_endpoint_map["create_materialized_view"] = "/create/materializedview" self.gpudb_func_to_endpoint_map["create_proc"] = "/create/proc" self.gpudb_func_to_endpoint_map["create_projection"] = "/create/projection" self.gpudb_func_to_endpoint_map["create_role"] = "/create/role" self.gpudb_func_to_endpoint_map["create_table"] = "/create/table" self.gpudb_func_to_endpoint_map["create_table_monitor"] = "/create/tablemonitor" self.gpudb_func_to_endpoint_map["create_trigger_by_area"] = "/create/trigger/byarea" self.gpudb_func_to_endpoint_map["create_trigger_by_range"] = "/create/trigger/byrange" self.gpudb_func_to_endpoint_map["create_type"] = "/create/type" self.gpudb_func_to_endpoint_map["create_union"] = "/create/union" self.gpudb_func_to_endpoint_map["create_user_external"] = "/create/user/external" self.gpudb_func_to_endpoint_map["create_user_internal"] = "/create/user/internal" self.gpudb_func_to_endpoint_map["delete_proc"] = "/delete/proc" self.gpudb_func_to_endpoint_map["delete_records"] = "/delete/records" self.gpudb_func_to_endpoint_map["delete_role"] = "/delete/role" self.gpudb_func_to_endpoint_map["delete_user"] = "/delete/user" self.gpudb_func_to_endpoint_map["execute_proc"] = "/execute/proc" self.gpudb_func_to_endpoint_map["execute_sql"] = "/execute/sql" self.gpudb_func_to_endpoint_map["filter"] = "/filter" self.gpudb_func_to_endpoint_map["filter_by_area"] = "/filter/byarea" self.gpudb_func_to_endpoint_map["filter_by_area_geometry"] = "/filter/byarea/geometry" self.gpudb_func_to_endpoint_map["filter_by_box"] = "/filter/bybox" self.gpudb_func_to_endpoint_map["filter_by_box_geometry"] = "/filter/bybox/geometry" self.gpudb_func_to_endpoint_map["filter_by_geometry"] = "/filter/bygeometry" self.gpudb_func_to_endpoint_map["filter_by_list"] = "/filter/bylist" self.gpudb_func_to_endpoint_map["filter_by_radius"] = "/filter/byradius" self.gpudb_func_to_endpoint_map["filter_by_radius_geometry"] = "/filter/byradius/geometry" self.gpudb_func_to_endpoint_map["filter_by_range"] = "/filter/byrange" self.gpudb_func_to_endpoint_map["filter_by_series"] = "/filter/byseries" self.gpudb_func_to_endpoint_map["filter_by_string"] = "/filter/bystring" self.gpudb_func_to_endpoint_map["filter_by_table"] = "/filter/bytable" self.gpudb_func_to_endpoint_map["filter_by_value"] = "/filter/byvalue" self.gpudb_func_to_endpoint_map["get_job"] = "/get/job" self.gpudb_func_to_endpoint_map["get_records"] = "/get/records" self.gpudb_func_to_endpoint_map["get_records_by_column"] = "/get/records/bycolumn" self.gpudb_func_to_endpoint_map["get_records_by_series"] = "/get/records/byseries" self.gpudb_func_to_endpoint_map["get_records_from_collection"] = "/get/records/fromcollection" self.gpudb_func_to_endpoint_map["grant_permission_system"] = "/grant/permission/system" self.gpudb_func_to_endpoint_map["grant_permission_table"] = "/grant/permission/table" self.gpudb_func_to_endpoint_map["grant_role"] = "/grant/role" self.gpudb_func_to_endpoint_map["has_proc"] = "/has/proc" self.gpudb_func_to_endpoint_map["has_table"] = "/has/table" self.gpudb_func_to_endpoint_map["has_type"] = "/has/type" self.gpudb_func_to_endpoint_map["insert_records"] = "/insert/records" self.gpudb_func_to_endpoint_map["insert_records_random"] = "/insert/records/random" self.gpudb_func_to_endpoint_map["insert_symbol"] = "/insert/symbol" self.gpudb_func_to_endpoint_map["kill_proc"] = "/kill/proc" self.gpudb_func_to_endpoint_map["lock_table"] = "/lock/table" self.gpudb_func_to_endpoint_map["merge_records"] = "/merge/records" self.gpudb_func_to_endpoint_map["admin_replace_tom"] = "/replace/tom" self.gpudb_func_to_endpoint_map["revoke_permission_system"] = "/revoke/permission/system" self.gpudb_func_to_endpoint_map["revoke_permission_table"] = "/revoke/permission/table" self.gpudb_func_to_endpoint_map["revoke_role"] = "/revoke/role" self.gpudb_func_to_endpoint_map["show_proc"] = "/show/proc" self.gpudb_func_to_endpoint_map["show_proc_status"] = "/show/proc/status" self.gpudb_func_to_endpoint_map["show_security"] = "/show/security" self.gpudb_func_to_endpoint_map["show_statistics"] = "/show/statistics" self.gpudb_func_to_endpoint_map["show_system_properties"] = "/show/system/properties" self.gpudb_func_to_endpoint_map["show_system_status"] = "/show/system/status" self.gpudb_func_to_endpoint_map["show_system_timing"] = "/show/system/timing" self.gpudb_func_to_endpoint_map["show_table"] = "/show/table" self.gpudb_func_to_endpoint_map["show_table_metadata"] = "/show/table/metadata" self.gpudb_func_to_endpoint_map["show_tables_by_type"] = "/show/tables/bytype" self.gpudb_func_to_endpoint_map["show_triggers"] = "/show/triggers" self.gpudb_func_to_endpoint_map["show_types"] = "/show/types" self.gpudb_func_to_endpoint_map["update_records"] = "/update/records" self.gpudb_func_to_endpoint_map["update_records_by_series"] = "/update/records/byseries" self.gpudb_func_to_endpoint_map["visualize_image"] = "/visualize/image" self.gpudb_func_to_endpoint_map["visualize_image_chart"] = "/visualize/image/chart" self.gpudb_func_to_endpoint_map["visualize_image_classbreak"] = "/visualize/image/classbreak" self.gpudb_func_to_endpoint_map["visualize_image_contour"] = "/visualize/image/contour" self.gpudb_func_to_endpoint_map["visualize_image_heatmap"] = "/visualize/image/heatmap" self.gpudb_func_to_endpoint_map["visualize_image_labels"] = "/visualize/image/labels" self.gpudb_func_to_endpoint_map["visualize_video"] = "/visualize/video" self.gpudb_func_to_endpoint_map["visualize_video_heatmap"] = "/visualize/video/heatmap"
# end load_gpudb_func_to_endpoint_map # begin admin_alter_jobs
[docs] def admin_alter_jobs( self, job_ids = None, action = None, options = {} ): """Perform the requested action on a list of one or more job(s). Based on the type of job and the current state of execution, the action may not be successfully executed. The final result of the attempted actions for each specified job is returned in the status array of the response. See `Job Manager <../../../gpudbAdmin/job_manager.html>`_ for more information. Parameters: job_ids (list of ints) Jobs to be modified. The user can provide a single element (which will be automatically promoted to a list internally) or a list. action (str) Action to be performed on the jobs specified by job_ids. Allowed values are: * cancel options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- job_ids (list of ints) Jobs on which the action was performed. action (str) Action requested on the jobs. status (list of str) Status of the requested action for each job. """ job_ids = job_ids if isinstance( job_ids, list ) else ( [] if (job_ids is None) else [ job_ids ] ) assert isinstance( action, (basestring)), "admin_alter_jobs(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__ assert isinstance( options, (dict)), "admin_alter_jobs(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/alter/jobs" ) obj = {} obj['job_ids'] = job_ids obj['action'] = action obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/alter/jobs' ) return AttrDict( response )
# end admin_alter_jobs # begin admin_offline
[docs] def admin_offline( self, offline = None, options = {} ): """Take the system offline. When the system is offline, no user operations can be performed with the exception of a system shutdown. Parameters: offline (bool) Set to true if desired state is offline. Allowed values are: * true * false options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **flush_to_disk** -- Flush to disk when going offline Allowed values are: * true * false Returns: A dict with the following entries-- is_offline (bool) Returns true if the system is offline, or false otherwise. """ assert isinstance( offline, (bool)), "admin_offline(): Argument 'offline' must be (one) of type(s) '(bool)'; given %s" % type( offline ).__name__ assert isinstance( options, (dict)), "admin_offline(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/offline" ) obj = {} obj['offline'] = offline obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/offline' ) return AttrDict( response )
# end admin_offline # begin admin_show_alerts
[docs] def admin_show_alerts( self, num_alerts = None, options = {} ): """Retrieves a list of the most recent alerts generated. The number of alerts to retrieve is specified in this request. Returns lists of alert data, earliest to latest Parameters: num_alerts (int) Number of most recent alerts to request. The response will return input parameter *num_alerts* alerts, or less if there are less in the system. A value of 0 returns all stored alerts. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- timestamps (list of str) System alert timestamps. The array is sorted from earliest to latest. Each array entry corresponds with the entries at the same index in output parameter *types* and output parameter *params*. types (list of str) System alert types. The array is sorted from earliest to latest. Each array entry corresponds with the entries at the same index in output parameter *timestamps* and output parameter *params*. params (list of dicts of str to str) Parameters for each alert. The array is sorted from earliest to latest. Each array entry corresponds with the entries at the same index in output parameter *timestamps* and output parameter *types*. """ assert isinstance( num_alerts, (int, long, float)), "admin_show_alerts(): Argument 'num_alerts' must be (one) of type(s) '(int, long, float)'; given %s" % type( num_alerts ).__name__ assert isinstance( options, (dict)), "admin_show_alerts(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/show/alerts" ) obj = {} obj['num_alerts'] = num_alerts obj['options'] = self.__sanitize_dicts( options ) response = self.__post_to_hm_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/show/alerts' ) return AttrDict( response )
# end admin_show_alerts # begin admin_show_jobs
[docs] def admin_show_jobs( self, options = {} ): """Get a list of the current jobs in GPUdb. Parameters: options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * show_details Returns: A dict with the following entries-- job_id (list of ints) status (list of str) endpoint_name (list of str) time_received (list of longs) auth_id (list of str) user_data (list of str) """ assert isinstance( options, (dict)), "admin_show_jobs(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/show/jobs" ) obj = {} obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/show/jobs' ) return AttrDict( response )
# end admin_show_jobs # begin admin_show_shards
[docs] def admin_show_shards( self, options = {} ): """Show the mapping of shards to the corresponding rank and tom. The response message contains list of 16384 (total number of shards in the system) Rank and TOM numbers corresponding to each shard. Parameters: options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- version (long) Current shard array version number. rank (list of ints) Array of ranks indexed by the shard number. tom (list of ints) Array of toms to which the corresponding shard belongs. """ assert isinstance( options, (dict)), "admin_show_shards(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/show/shards" ) obj = {} obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/show/shards' ) return AttrDict( response )
# end admin_show_shards # begin admin_shutdown
[docs] def admin_shutdown( self, exit_type = None, authorization = None, options = {} ): """Exits the database server application. Parameters: exit_type (str) Reserved for future use. User can pass an empty string. authorization (str) No longer used. User can pass an empty string. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- exit_status (str) 'OK' upon (right before) successful exit. """ assert isinstance( exit_type, (basestring)), "admin_shutdown(): Argument 'exit_type' must be (one) of type(s) '(basestring)'; given %s" % type( exit_type ).__name__ assert isinstance( authorization, (basestring)), "admin_shutdown(): Argument 'authorization' must be (one) of type(s) '(basestring)'; given %s" % type( authorization ).__name__ assert isinstance( options, (dict)), "admin_shutdown(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/shutdown" ) obj = {} obj['exit_type'] = exit_type obj['authorization'] = authorization obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/shutdown' ) return AttrDict( response )
# end admin_shutdown # begin admin_verify_db
[docs] def admin_verify_db( self, options = {} ): """Verify database is in a consistent state. When inconsistencies or errors are found, the verified_ok flag in the response is set to false and the list of errors found is provided in the error_list. Parameters: options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * rebuild_on_error * verify_persist Returns: A dict with the following entries-- verified_ok (bool) True if no errors were found, false otherwise. The default value is False. error_list (list of str) List of errors found while validating the database internal state. The default value is an empty list ( [] ). """ assert isinstance( options, (dict)), "admin_verify_db(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/admin/verifydb" ) obj = {} obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/admin/verifydb' ) return AttrDict( response )
# end admin_verify_db # begin aggregate_convex_hull
[docs] def aggregate_convex_hull( self, table_name = None, x_column_name = None, y_column_name = None, options = {} ): """Calculates and returns the convex hull for the values in a table specified by input parameter *table_name*. Parameters: table_name (str) Name of table on which the operation will be performed. Must be an existing table. It cannot be a collection. x_column_name (str) Name of the column containing the x coordinates of the points for the operation being performed. y_column_name (str) Name of the column containing the y coordinates of the points for the operation being performed. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- x_vector (list of floats) Array of x coordinates of the resulting convex set. y_vector (list of floats) Array of y coordinates of the resulting convex set. count (int) Count of the number of points in the convex set. is_valid (bool) """ assert isinstance( table_name, (basestring)), "aggregate_convex_hull(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( x_column_name, (basestring)), "aggregate_convex_hull(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "aggregate_convex_hull(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( options, (dict)), "aggregate_convex_hull(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/convexhull" ) obj = {} obj['table_name'] = table_name obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/convexhull' ) return AttrDict( response )
# end aggregate_convex_hull # begin aggregate_group_by
[docs] def aggregate_group_by( self, table_name = None, column_names = None, offset = None, limit = 1000, encoding = 'binary', options = {} ): """Calculates unique combinations (groups) of values for the given columns in a given table/view/collection and computes aggregates on each unique combination. This is somewhat analogous to an SQL-style SELECT...GROUP BY. For aggregation details and examples, see `Aggregation <../../../concepts/aggregation.html>`_. For limitations, see `Aggregation Limitations <../../../concepts/aggregation.html#limitations>`_. Any column(s) can be grouped on, and all column types except unrestricted-length strings may be used for computing applicable aggregates; columns marked as `store-only <../../../concepts/types.html#data-handling>`_ are unable to be used in grouping or aggregation. The results can be paged via the input parameter *offset* and input parameter *limit* parameters. For example, to get 10 groups with the largest counts the inputs would be: limit=10, options={"sort_order":"descending", "sort_by":"value"}. Input parameter *options* can be used to customize behavior of this call e.g. filtering or sorting the results. To group by columns 'x' and 'y' and compute the number of objects within each group, use: column_names=['x','y','count(*)']. To also compute the sum of 'z' over each group, use: column_names=['x','y','count(*)','sum(z)']. Available `aggregation functions <../../../concepts/expressions.html#aggregate-expressions>`_ are: count(*), sum, min, max, avg, mean, stddev, stddev_pop, stddev_samp, var, var_pop, var_samp, arg_min, arg_max and count_distinct. Available grouping functions are `Rollup <../../../concepts/rollup.html>`_, `Cube <../../../concepts/cube.html>`_, and `Grouping Sets <../../../concepts/grouping_sets.html>`_ This service also provides support for `Pivot <../../../concepts/pivot.html>`_ operations. Filtering on aggregates is supported via expressions using `aggregation functions <../../../concepts/expressions.html#aggregate-expressions>`_ supplied to *having*. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. If a *result_table* name is specified in the input parameter *options*, the results are stored in a new table with that name--no results are returned in the response. Both the table name and resulting column names must adhere to `standard naming conventions <../../../concepts/tables.html#table>`_; column/aggregation expressions will need to be aliased. If the source table's `shard key <../../../concepts/tables.html#shard-keys>`_ is used as the grouping column(s) and all result records are selected (input parameter *offset* is 0 and input parameter *limit* is -9999), the result table will be sharded, in all other cases it will be replicated. Sorting will properly function only if the result table is replicated or if there is only one processing node and should not be relied upon in other cases. Not available when any of the values of input parameter *column_names* is an unrestricted-length string. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table/view/collection. column_names (list of str) List of one or more column names, expressions, and aggregate expressions. The user can provide a single element (which will be automatically promoted to a list internally) or a list. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 1000. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. Additionally this option is invalid if input parameter *table_name* is a collection. * **expression** -- Filter expression to apply to the table prior to computing the aggregate group by. * **having** -- Filter expression to apply to the aggregated results. * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. Allowed values are: * **ascending** -- Indicates that the returned values should be sorted in ascending order. * **descending** -- Indicates that the returned values should be sorted in descending order. The default value is 'ascending'. * **sort_by** -- String determining how the results are sorted. Allowed values are: * **key** -- Indicates that the returned values should be sorted by key, which corresponds to the grouping columns. If you have multiple grouping columns (and are sorting by key), it will first sort the first grouping column, then the second grouping column, etc. * **value** -- Indicates that the returned values should be sorted by value, which corresponds to the aggregates. If you have multiple aggregates (and are sorting by value), it will first sort by the first aggregate, then the second aggregate, etc. The default value is 'value'. * **result_table** -- The name of the table used to store the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Column names (group-by and aggregate fields) need to be given aliases e.g. ["FChar256 as fchar256", "sum(FDouble) as sfd"]. If present, no results are returned in the response. This option is not available if one of the grouping attributes is an unrestricted string (i.e.; not charN) type. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **result_table_generate_pk** -- If 'true' then set a primary key for the result table. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the result table. Must be used in combination with the *result_table* option. * **view_id** -- view this result table is part of. The default value is ''. * **materialize_on_gpu** -- If *true* then the columns of the groupby result table will be cached on the GPU. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **pivot** -- pivot column * **pivot_values** -- The value list provided will become the column headers in the output. Should be the values from the pivot_column. * **grouping_sets** -- Customize the grouping attribute sets to compute the aggregates. These sets can include ROLLUP or CUBE operartors. The attribute sets should be enclosed in paranthesis and can include composite attributes. All attributes specified in the grouping sets must present in the groupby attributes. * **rollup** -- This option is used to specify the multilevel aggregates. * **cube** -- This option is used to specify the multidimensional aggregates. Returns: A dict with the following entries-- response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. binary_encoded_response (str) Avro binary encoded response. json_encoded_response (str) Avro JSON encoded response. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. record_type (:class:`RecordType` or None) A :class:`RecordType` object using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data`. If JSON encodingis used, then None. """ assert isinstance( table_name, (basestring)), "aggregate_group_by(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( offset, (int, long, float)), "aggregate_group_by(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "aggregate_group_by(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "aggregate_group_by(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "aggregate_group_by(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/groupby" ) obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/groupby' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record type and save it in the response, if applicable if encoding == "binary": record_type = RecordType.from_dynamic_schema( response["response_schema_str"], response["binary_encoded_response"] ) response["record_type"] = record_type else: response["record_type"] = None return AttrDict( response )
# end aggregate_group_by # begin aggregate_group_by_and_decode
[docs] def aggregate_group_by_and_decode( self, table_name = None, column_names = None, offset = None, limit = 1000, encoding = 'binary', options = {}, record_type = None, force_primitive_return_types = True, get_column_major = True ): """Calculates unique combinations (groups) of values for the given columns in a given table/view/collection and computes aggregates on each unique combination. This is somewhat analogous to an SQL-style SELECT...GROUP BY. For aggregation details and examples, see `Aggregation <../../../concepts/aggregation.html>`_. For limitations, see `Aggregation Limitations <../../../concepts/aggregation.html#limitations>`_. Any column(s) can be grouped on, and all column types except unrestricted-length strings may be used for computing applicable aggregates; columns marked as `store-only <../../../concepts/types.html#data-handling>`_ are unable to be used in grouping or aggregation. The results can be paged via the input parameter *offset* and input parameter *limit* parameters. For example, to get 10 groups with the largest counts the inputs would be: limit=10, options={"sort_order":"descending", "sort_by":"value"}. Input parameter *options* can be used to customize behavior of this call e.g. filtering or sorting the results. To group by columns 'x' and 'y' and compute the number of objects within each group, use: column_names=['x','y','count(*)']. To also compute the sum of 'z' over each group, use: column_names=['x','y','count(*)','sum(z)']. Available `aggregation functions <../../../concepts/expressions.html#aggregate-expressions>`_ are: count(*), sum, min, max, avg, mean, stddev, stddev_pop, stddev_samp, var, var_pop, var_samp, arg_min, arg_max and count_distinct. Available grouping functions are `Rollup <../../../concepts/rollup.html>`_, `Cube <../../../concepts/cube.html>`_, and `Grouping Sets <../../../concepts/grouping_sets.html>`_ This service also provides support for `Pivot <../../../concepts/pivot.html>`_ operations. Filtering on aggregates is supported via expressions using `aggregation functions <../../../concepts/expressions.html#aggregate-expressions>`_ supplied to *having*. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. If a *result_table* name is specified in the input parameter *options*, the results are stored in a new table with that name--no results are returned in the response. Both the table name and resulting column names must adhere to `standard naming conventions <../../../concepts/tables.html#table>`_; column/aggregation expressions will need to be aliased. If the source table's `shard key <../../../concepts/tables.html#shard-keys>`_ is used as the grouping column(s) and all result records are selected (input parameter *offset* is 0 and input parameter *limit* is -9999), the result table will be sharded, in all other cases it will be replicated. Sorting will properly function only if the result table is replicated or if there is only one processing node and should not be relied upon in other cases. Not available when any of the values of input parameter *column_names* is an unrestricted-length string. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table/view/collection. column_names (list of str) List of one or more column names, expressions, and aggregate expressions. The user can provide a single element (which will be automatically promoted to a list internally) or a list. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 1000. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. Additionally this option is invalid if input parameter *table_name* is a collection. * **expression** -- Filter expression to apply to the table prior to computing the aggregate group by. * **having** -- Filter expression to apply to the aggregated results. * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. Allowed values are: * **ascending** -- Indicates that the returned values should be sorted in ascending order. * **descending** -- Indicates that the returned values should be sorted in descending order. The default value is 'ascending'. * **sort_by** -- String determining how the results are sorted. Allowed values are: * **key** -- Indicates that the returned values should be sorted by key, which corresponds to the grouping columns. If you have multiple grouping columns (and are sorting by key), it will first sort the first grouping column, then the second grouping column, etc. * **value** -- Indicates that the returned values should be sorted by value, which corresponds to the aggregates. If you have multiple aggregates (and are sorting by value), it will first sort by the first aggregate, then the second aggregate, etc. The default value is 'value'. * **result_table** -- The name of the table used to store the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Column names (group-by and aggregate fields) need to be given aliases e.g. ["FChar256 as fchar256", "sum(FDouble) as sfd"]. If present, no results are returned in the response. This option is not available if one of the grouping attributes is an unrestricted string (i.e.; not charN) type. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **result_table_generate_pk** -- If 'true' then set a primary key for the result table. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the result table. Must be used in combination with the *result_table* option. * **view_id** -- view this result table is part of. The default value is ''. * **materialize_on_gpu** -- If *true* then the columns of the groupby result table will be cached on the GPU. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **pivot** -- pivot column * **pivot_values** -- The value list provided will become the column headers in the output. Should be the values from the pivot_column. * **grouping_sets** -- Customize the grouping attribute sets to compute the aggregates. These sets can include ROLLUP or CUBE operartors. The attribute sets should be enclosed in paranthesis and can include composite attributes. All attributes specified in the grouping sets must present in the groupby attributes. * **rollup** -- This option is used to specify the multilevel aggregates. * **cube** -- This option is used to specify the multidimensional aggregates. record_type (:class:`RecordType` or None) The record type expected in the results, or None to determinethe appropriate type automatically. If known, providing thismay improve performance in binary mode. Not used in JSON mode.The default value is None. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A dict with the following entries-- response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "aggregate_group_by_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( offset, (int, long, float)), "aggregate_group_by_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "aggregate_group_by_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "aggregate_group_by_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "aggregate_group_by_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "aggregate_group_by_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ assert isinstance(force_primitive_return_types, bool), "aggregate_group_by_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ assert isinstance(get_column_major, bool), "aggregate_group_by_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/aggregate/groupby", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/aggregate/groupby' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): record_type = record_type if record_type else RecordType.from_dynamic_schema( response["response_schema_str"], raw_response, response["binary_encoded_response"] ) records = record_type.decode_dynamic_records( raw_response, response["binary_encoded_response"] ) if force_primitive_return_types: records = _Util.convert_cext_records_to_ordered_dicts( records ) # Transpose the data to column-major, if requested by the user if get_column_major: records = GPUdbRecord.transpose_data_to_col_major( records ) response["records"] = records else: records = json.loads( response["json_encoded_response"] ) if get_column_major: # Get column-major data records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response["response_schema_str"] ) else: # Get row-major data records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response["response_schema_str"] ) response["records"] = records # end if del response["binary_encoded_response"] del response["json_encoded_response"] return AttrDict( response )
# end aggregate_group_by_and_decode # begin aggregate_histogram
[docs] def aggregate_histogram( self, table_name = None, column_name = None, start = None, end = None, interval = None, options = {} ): """Performs a histogram calculation given a table, a column, and an interval function. The input parameter *interval* is used to produce bins of that size and the result, computed over the records falling within each bin, is returned. For each bin, the start value is inclusive, but the end value is exclusive--except for the very last bin for which the end value is also inclusive. The value returned for each bin is the number of records in it, except when a column name is provided as a *value_column*. In this latter case the sum of the values corresponding to the *value_column* is used as the result instead. The total number of bins requested cannot exceed 10,000. NOTE: The Kinetica instance being accessed must be running a CUDA (GPU-based) build to service a request that specifies a *value_column* option. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table or collection. column_name (str) Name of a column or an expression of one or more column names over which the histogram will be calculated. start (float) Lower end value of the histogram interval, inclusive. end (float) Upper end value of the histogram interval, inclusive. interval (float) The size of each bin within the start and end parameters. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **value_column** -- The name of the column to use when calculating the bin values (values are summed). The column must be a numerical type (int, double, long, float). Returns: A dict with the following entries-- counts (list of floats) The array of calculated values that represents the histogram data points. start (float) Value of input parameter *start*. end (float) Value of input parameter *end*. """ assert isinstance( table_name, (basestring)), "aggregate_histogram(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( column_name, (basestring)), "aggregate_histogram(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( start, (int, long, float)), "aggregate_histogram(): Argument 'start' must be (one) of type(s) '(int, long, float)'; given %s" % type( start ).__name__ assert isinstance( end, (int, long, float)), "aggregate_histogram(): Argument 'end' must be (one) of type(s) '(int, long, float)'; given %s" % type( end ).__name__ assert isinstance( interval, (int, long, float)), "aggregate_histogram(): Argument 'interval' must be (one) of type(s) '(int, long, float)'; given %s" % type( interval ).__name__ assert isinstance( options, (dict)), "aggregate_histogram(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/histogram" ) obj = {} obj['table_name'] = table_name obj['column_name'] = column_name obj['start'] = start obj['end'] = end obj['interval'] = interval obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/histogram' ) return AttrDict( response )
# end aggregate_histogram # begin aggregate_k_means
[docs] def aggregate_k_means( self, table_name = None, column_names = None, k = None, tolerance = None, options = {} ): """This endpoint runs the k-means algorithm - a heuristic algorithm that attempts to do k-means clustering. An ideal k-means clustering algorithm selects k points such that the sum of the mean squared distances of each member of the set to the nearest of the k points is minimized. The k-means algorithm however does not necessarily produce such an ideal cluster. It begins with a randomly selected set of k points and then refines the location of the points iteratively and settles to a local minimum. Various parameters and options are provided to control the heuristic search. NOTE: The Kinetica instance being accessed must be running a CUDA (GPU-based) build to service this request. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table or collection. column_names (list of str) List of column names on which the operation would be performed. If n columns are provided then each of the k result points will have n dimensions corresponding to the n columns. The user can provide a single element (which will be automatically promoted to a list internally) or a list. k (int) The number of mean points to be determined by the algorithm. tolerance (float) Stop iterating when the distances between successive points is less than the given tolerance. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **whiten** -- When set to 1 each of the columns is first normalized by its stdv - default is not to whiten. * **max_iters** -- Number of times to try to hit the tolerance limit before giving up - default is 10. * **num_tries** -- Number of times to run the k-means algorithm with a different randomly selected starting points - helps avoid local minimum. Default is 1. Returns: A dict with the following entries-- means (list of lists of floats) The k-mean values found. counts (list of longs) The number of elements in the cluster closest the corresponding k-means values. rms_dists (list of floats) The root mean squared distance of the elements in the cluster for each of the k-means values. count (long) The total count of all the clusters - will be the size of the input table. rms_dist (float) The sum of all the rms_dists - the value the k-means algorithm is attempting to minimize. tolerance (float) The distance between the last two iterations of the algorithm before it quit. num_iters (int) The number of iterations the algorithm executed before it quit. """ assert isinstance( table_name, (basestring)), "aggregate_k_means(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( k, (int, long, float)), "aggregate_k_means(): Argument 'k' must be (one) of type(s) '(int, long, float)'; given %s" % type( k ).__name__ assert isinstance( tolerance, (int, long, float)), "aggregate_k_means(): Argument 'tolerance' must be (one) of type(s) '(int, long, float)'; given %s" % type( tolerance ).__name__ assert isinstance( options, (dict)), "aggregate_k_means(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/kmeans" ) obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['k'] = k obj['tolerance'] = tolerance obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/kmeans' ) return AttrDict( response )
# end aggregate_k_means # begin aggregate_min_max
[docs] def aggregate_min_max( self, table_name = None, column_name = None, options = {} ): """Calculates and returns the minimum and maximum values of a particular column in a table. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table. column_name (str) Name of a column or an expression of one or more column on which the min-max will be calculated. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- min (float) Minimum value of the input parameter *column_name*. max (float) Maximum value of the input parameter *column_name*. """ assert isinstance( table_name, (basestring)), "aggregate_min_max(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( column_name, (basestring)), "aggregate_min_max(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( options, (dict)), "aggregate_min_max(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/minmax" ) obj = {} obj['table_name'] = table_name obj['column_name'] = column_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/minmax' ) return AttrDict( response )
# end aggregate_min_max # begin aggregate_min_max_geometry
[docs] def aggregate_min_max_geometry( self, table_name = None, column_name = None, options = {} ): """Calculates and returns the minimum and maximum x- and y-coordinates of a particular geospatial geometry column in a table. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table. column_name (str) Name of a geospatial geometry column on which the min-max will be calculated. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- min_x (float) Minimum x-coordinate value of the input parameter *column_name*. max_x (float) Maximum x-coordinate value of the input parameter *column_name*. min_y (float) Minimum y-coordinate value of the input parameter *column_name*. max_y (float) Maximum y-coordinate value of the input parameter *column_name*. """ assert isinstance( table_name, (basestring)), "aggregate_min_max_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( column_name, (basestring)), "aggregate_min_max_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( options, (dict)), "aggregate_min_max_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/minmax/geometry" ) obj = {} obj['table_name'] = table_name obj['column_name'] = column_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/minmax/geometry' ) return AttrDict( response )
# end aggregate_min_max_geometry # begin aggregate_statistics
[docs] def aggregate_statistics( self, table_name = None, column_name = None, stats = None, options = {} ): """Calculates the requested statistics of the given column(s) in a given table. The available statistics are *count* (number of total objects), *mean*, *stdv* (standard deviation), *variance*, *skew*, *kurtosis*, *sum*, *min*, *max*, *weighted_average*, *cardinality* (unique count), *estimated_cardinality*, *percentile* and *percentile_rank*. Estimated cardinality is calculated by using the hyperloglog approximation technique. Percentiles and percentile ranks are approximate and are calculated using the t-digest algorithm. They must include the desired *percentile*/*percentile_rank*. To compute multiple percentiles each value must be specified separately (i.e. 'percentile(75.0),percentile(99.0),percentile_rank(1234.56),percentile_rank(-5)'). A second, comma-separated value can be added to the *percentile* statistic to calculate percentile resolution, e.g., a 50th percentile with 200 resolution would be 'percentile(50,200)'. The weighted average statistic requires a *weight_column_name* to be specified in input parameter *options*. The weighted average is then defined as the sum of the products of input parameter *column_name* times the *weight_column_name* values divided by the sum of the *weight_column_name* values. Additional columns can be used in the calculation of statistics via the *additional_column_names* option. Values in these columns will be included in the overall aggregate calculation--individual aggregates will not be calculated per additional column. For instance, requesting the *count* & *mean* of input parameter *column_name* x and *additional_column_names* y & z, where x holds the numbers 1-10, y holds 11-20, and z holds 21-30, would return the total number of x, y, & z values (30), and the single average value across all x, y, & z values (15.5). The response includes a list of key/value pairs of each statistic requested and its corresponding value. Parameters: table_name (str) Name of the table on which the statistics operation will be performed. column_name (str) Name of the primary column for which the statistics are to be calculated. stats (str) Comma separated list of the statistics to calculate, e.g. "sum,mean". Allowed values are: * **count** -- Number of objects (independent of the given column(s)). * **mean** -- Arithmetic mean (average), equivalent to sum/count. * **stdv** -- Sample standard deviation (denominator is count-1). * **variance** -- Unbiased sample variance (denominator is count-1). * **skew** -- Skewness (third standardized moment). * **kurtosis** -- Kurtosis (fourth standardized moment). * **sum** -- Sum of all values in the column(s). * **min** -- Minimum value of the column(s). * **max** -- Maximum value of the column(s). * **weighted_average** -- Weighted arithmetic mean (using the option *weight_column_name* as the weighting column). * **cardinality** -- Number of unique values in the column(s). * **estimated_cardinality** -- Estimate (via hyperloglog technique) of the number of unique values in the column(s). * **percentile** -- Estimate (via t-digest) of the given percentile of the column(s) (percentile(50.0) will be an approximation of the median). Add a second, comma-separated value to calculate percentile resolution, e.g., 'percentile(75,150)' * **percentile_rank** -- Estimate (via t-digest) of the percentile rank of the given value in the column(s) (if the given value is the median of the column(s), percentile_rank(<median>) will return approximately 50.0). options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **additional_column_names** -- A list of comma separated column names over which statistics can be accumulated along with the primary column. All columns listed and input parameter *column_name* must be of the same type. Must not include the column specified in input parameter *column_name* and no column can be listed twice. * **weight_column_name** -- Name of column used as weighting attribute for the weighted average statistic. Returns: A dict with the following entries-- stats (dict of str to floats) (statistic name, double value) pairs of the requested statistics, including the total count by default. """ assert isinstance( table_name, (basestring)), "aggregate_statistics(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( column_name, (basestring)), "aggregate_statistics(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( stats, (basestring)), "aggregate_statistics(): Argument 'stats' must be (one) of type(s) '(basestring)'; given %s" % type( stats ).__name__ assert isinstance( options, (dict)), "aggregate_statistics(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/statistics" ) obj = {} obj['table_name'] = table_name obj['column_name'] = column_name obj['stats'] = stats obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/statistics' ) return AttrDict( response )
# end aggregate_statistics # begin aggregate_statistics_by_range
[docs] def aggregate_statistics_by_range( self, table_name = None, select_expression = '', column_name = None, value_column_name = None, stats = None, start = None, end = None, interval = None, options = {} ): """Divides the given set into bins and calculates statistics of the values of a value-column in each bin. The bins are based on the values of a given binning-column. The statistics that may be requested are mean, stdv (standard deviation), variance, skew, kurtosis, sum, min, max, first, last and weighted average. In addition to the requested statistics the count of total samples in each bin is returned. This counts vector is just the histogram of the column used to divide the set members into bins. The weighted average statistic requires a weight_column to be specified in input parameter *options*. The weighted average is then defined as the sum of the products of the value column times the weight column divided by the sum of the weight column. There are two methods for binning the set members. In the first, which can be used for numeric valued binning-columns, a min, max and interval are specified. The number of bins, nbins, is the integer upper bound of (max-min)/interval. Values that fall in the range [min+n*interval,min+(n+1)*interval) are placed in the nth bin where n ranges from 0..nbin-2. The final bin is [min+(nbin-1)*interval,max]. In the second method, input parameter *options* bin_values specifies a list of binning column values. Binning-columns whose value matches the nth member of the bin_values list are placed in the nth bin. When a list is provided the binning-column must be of type string or int. NOTE: The Kinetica instance being accessed must be running a CUDA (GPU-based) build to service this request. Parameters: table_name (str) Name of the table on which the ranged-statistics operation will be performed. select_expression (str) For a non-empty expression statistics are calculated for those records for which the expression is true. The default value is ''. column_name (str) Name of the binning-column used to divide the set samples into bins. value_column_name (str) Name of the value-column for which statistics are to be computed. stats (str) A string of comma separated list of the statistics to calculate, e.g. 'sum,mean'. Available statistics: mean, stdv (standard deviation), variance, skew, kurtosis, sum. start (float) The lower bound of the binning-column. end (float) The upper bound of the binning-column. interval (float) The interval of a bin. Set members fall into bin i if the binning-column falls in the range [start+interval*i, start+interval*(i+1)). options (dict of str to str) Map of optional parameters:. The default value is an empty dict ( {} ). Allowed keys are: * **additional_column_names** -- A list of comma separated value-column names over which statistics can be accumulated along with the primary value_column. * **bin_values** -- A list of comma separated binning-column values. Values that match the nth bin_values value are placed in the nth bin. * **weight_column_name** -- Name of the column used as weighting column for the weighted_average statistic. * **order_column_name** -- Name of the column used for candlestick charting techniques. Returns: A dict with the following entries-- stats (dict of str to lists of floats) A map with a key for each statistic in the stats input parameter having a value that is a vector of the corresponding value-column bin statistics. In a addition the key count has a value that is a histogram of the binning-column. """ assert isinstance( table_name, (basestring)), "aggregate_statistics_by_range(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( select_expression, (basestring)), "aggregate_statistics_by_range(): Argument 'select_expression' must be (one) of type(s) '(basestring)'; given %s" % type( select_expression ).__name__ assert isinstance( column_name, (basestring)), "aggregate_statistics_by_range(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( value_column_name, (basestring)), "aggregate_statistics_by_range(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__ assert isinstance( stats, (basestring)), "aggregate_statistics_by_range(): Argument 'stats' must be (one) of type(s) '(basestring)'; given %s" % type( stats ).__name__ assert isinstance( start, (int, long, float)), "aggregate_statistics_by_range(): Argument 'start' must be (one) of type(s) '(int, long, float)'; given %s" % type( start ).__name__ assert isinstance( end, (int, long, float)), "aggregate_statistics_by_range(): Argument 'end' must be (one) of type(s) '(int, long, float)'; given %s" % type( end ).__name__ assert isinstance( interval, (int, long, float)), "aggregate_statistics_by_range(): Argument 'interval' must be (one) of type(s) '(int, long, float)'; given %s" % type( interval ).__name__ assert isinstance( options, (dict)), "aggregate_statistics_by_range(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/statistics/byrange" ) obj = {} obj['table_name'] = table_name obj['select_expression'] = select_expression obj['column_name'] = column_name obj['value_column_name'] = value_column_name obj['stats'] = stats obj['start'] = start obj['end'] = end obj['interval'] = interval obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/statistics/byrange' ) return AttrDict( response )
# end aggregate_statistics_by_range # begin aggregate_unique
[docs] def aggregate_unique( self, table_name = None, column_name = None, offset = None, limit = 10000, encoding = 'binary', options = {} ): """Returns all the unique values from a particular column (specified by input parameter *column_name*) of a particular table or collection (specified by input parameter *table_name*). If input parameter *column_name* is a numeric column the values will be in output parameter *binary_encoded_response*. Otherwise if input parameter *column_name* is a string column the values will be in output parameter *json_encoded_response*. The results can be paged via the input parameter *offset* and input parameter *limit* parameters. Columns marked as `store-only <../../../concepts/types.html#data-handling>`_ are unable to be used with this function. To get the first 10 unique values sorted in descending order input parameter *options* would be:: {"limit":"10","sort_order":"descending"}. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. If a *result_table* name is specified in the input parameter *options*, the results are stored in a new table with that name--no results are returned in the response. Both the table name and resulting column name must adhere to `standard naming conventions <../../../concepts/tables.html#table>`_; any column expression will need to be aliased. If the source table's `shard key <../../../concepts/tables.html#shard-keys>`_ is used as the input parameter *column_name*, the result table will be sharded, in all other cases it will be replicated. Sorting will properly function only if the result table is replicated or if there is only one processing node and should not be relied upon in other cases. Not available if input parameter *table_name* is a collection or when the value of input parameter *column_name* is an unrestricted-length string. Parameters: table_name (str) Name of an existing table/collection on which the operation will be performed. column_name (str) Name of the column or an expression containing one or more column names on which the unique function would be applied. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. Additionally this option is invalid if input parameter *table_name* is a collection. * **expression** -- Optional filter expression to apply to the table. * **sort_order** -- String indicating how the returned values should be sorted. Allowed values are: * ascending * descending The default value is 'ascending'. * **result_table** -- The name of the table used to store the results. If present, no results are returned in the response. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Not available if input parameter *table_name* is a collection or when input parameter *column_name* is an unrestricted-length string. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **result_table_generate_pk** -- If 'true' then set a primary key for the result table. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **view_id** -- view this result table is part of. The default value is ''. Returns: A dict with the following entries-- table_name (str) The same table name as was passed in the parameter list. response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. binary_encoded_response (str) Avro binary encoded response. json_encoded_response (str) Avro JSON encoded response. has_more_records (bool) Too many records. Returned a partial set. record_type (:class:`RecordType` or None) A :class:`RecordType` object using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data`. If JSON encodingis used, then None. """ assert isinstance( table_name, (basestring)), "aggregate_unique(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( column_name, (basestring)), "aggregate_unique(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( offset, (int, long, float)), "aggregate_unique(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "aggregate_unique(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "aggregate_unique(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "aggregate_unique(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/unique" ) obj = {} obj['table_name'] = table_name obj['column_name'] = column_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/unique' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record type and save it in the response, if applicable if encoding == "binary": record_type = RecordType.from_dynamic_schema( response["response_schema_str"], response["binary_encoded_response"] ) response["record_type"] = record_type else: response["record_type"] = None return AttrDict( response )
# end aggregate_unique # begin aggregate_unique_and_decode
[docs] def aggregate_unique_and_decode( self, table_name = None, column_name = None, offset = None, limit = 10000, encoding = 'binary', options = {}, record_type = None, force_primitive_return_types = True, get_column_major = True ): """Returns all the unique values from a particular column (specified by input parameter *column_name*) of a particular table or collection (specified by input parameter *table_name*). If input parameter *column_name* is a numeric column the values will be in output parameter *binary_encoded_response*. Otherwise if input parameter *column_name* is a string column the values will be in output parameter *json_encoded_response*. The results can be paged via the input parameter *offset* and input parameter *limit* parameters. Columns marked as `store-only <../../../concepts/types.html#data-handling>`_ are unable to be used with this function. To get the first 10 unique values sorted in descending order input parameter *options* would be:: {"limit":"10","sort_order":"descending"}. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. If a *result_table* name is specified in the input parameter *options*, the results are stored in a new table with that name--no results are returned in the response. Both the table name and resulting column name must adhere to `standard naming conventions <../../../concepts/tables.html#table>`_; any column expression will need to be aliased. If the source table's `shard key <../../../concepts/tables.html#shard-keys>`_ is used as the input parameter *column_name*, the result table will be sharded, in all other cases it will be replicated. Sorting will properly function only if the result table is replicated or if there is only one processing node and should not be relied upon in other cases. Not available if input parameter *table_name* is a collection or when the value of input parameter *column_name* is an unrestricted-length string. Parameters: table_name (str) Name of an existing table/collection on which the operation will be performed. column_name (str) Name of the column or an expression containing one or more column names on which the unique function would be applied. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. Additionally this option is invalid if input parameter *table_name* is a collection. * **expression** -- Optional filter expression to apply to the table. * **sort_order** -- String indicating how the returned values should be sorted. Allowed values are: * ascending * descending The default value is 'ascending'. * **result_table** -- The name of the table used to store the results. If present, no results are returned in the response. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Not available if input parameter *table_name* is a collection or when input parameter *column_name* is an unrestricted-length string. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **result_table_generate_pk** -- If 'true' then set a primary key for the result table. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **view_id** -- view this result table is part of. The default value is ''. record_type (:class:`RecordType` or None) The record type expected in the results, or None to determinethe appropriate type automatically. If known, providing thismay improve performance in binary mode. Not used in JSON mode.The default value is None. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A dict with the following entries-- table_name (str) The same table name as was passed in the parameter list. response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "aggregate_unique_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( column_name, (basestring)), "aggregate_unique_and_decode(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( offset, (int, long, float)), "aggregate_unique_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "aggregate_unique_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "aggregate_unique_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "aggregate_unique_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "aggregate_unique_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ assert isinstance(force_primitive_return_types, bool), "aggregate_unique_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ assert isinstance(get_column_major, bool), "aggregate_unique_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/aggregate/unique", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['column_name'] = column_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/aggregate/unique' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): record_type = record_type if record_type else RecordType.from_dynamic_schema( response["response_schema_str"], raw_response, response["binary_encoded_response"] ) records = record_type.decode_dynamic_records( raw_response, response["binary_encoded_response"] ) if force_primitive_return_types: records = _Util.convert_cext_records_to_ordered_dicts( records ) # Transpose the data to column-major, if requested by the user if get_column_major: records = GPUdbRecord.transpose_data_to_col_major( records ) response["records"] = records else: records = json.loads( response["json_encoded_response"] ) if get_column_major: # Get column-major data records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response["response_schema_str"] ) else: # Get row-major data records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response["response_schema_str"] ) response["records"] = records # end if del response["binary_encoded_response"] del response["json_encoded_response"] return AttrDict( response )
# end aggregate_unique_and_decode # begin aggregate_unpivot
[docs] def aggregate_unpivot( self, table_name = None, column_names = None, variable_column_name = '', value_column_name = '', pivoted_columns = None, encoding = 'binary', options = {} ): """Rotate the column values into rows values. For unpivot details and examples, see `Unpivot <../../../concepts/unpivot.html>`_. For limitations, see `Unpivot Limitations <../../../concepts/unpivot.html#limitations>`_. Unpivot is used to normalize tables that are built for cross tabular reporting purposes. The unpivot operator rotates the column values for all the pivoted columns. A variable column, value column and all columns from the source table except the unpivot columns are projected into the result table. The variable column and value columns in the result table indicate the pivoted column name and values respectively. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table/view. column_names (list of str) List of column names or expressions. A wildcard '*' can be used to include all the non-pivoted columns from the source table. The user can provide a single element (which will be automatically promoted to a list internally) or a list. variable_column_name (str) Specifies the variable/parameter column name. The default value is ''. value_column_name (str) Specifies the value column name. The default value is ''. pivoted_columns (list of str) List of one or more values typically the column names of the input table. All the columns in the source table must have the same data type. The user can provide a single element (which will be automatically promoted to a list internally) or a list. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. * **result_table** -- The name of the table used to store the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. If present, no results are returned in the response. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **expression** -- Filter expression to apply to the table prior to unpivot processing. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The columns specified must be present in input table. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **limit** -- The number of records to keep. The default value is ''. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **view_id** -- view this result table is part of. The default value is ''. * **materialize_on_gpu** -- If *true* then the output columns will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the table specified in *result_table*. The columns specified must be present in output column names. If any alias is given for any column name, the alias must be used, rather than the original column name. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) Typically shows the result-table name if provided in the request (Ignore otherwise). response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. binary_encoded_response (str) Avro binary encoded response. json_encoded_response (str) Avro JSON encoded response. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. record_type (:class:`RecordType` or None) A :class:`RecordType` object using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data`. If JSON encodingis used, then None. """ assert isinstance( table_name, (basestring)), "aggregate_unpivot(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( variable_column_name, (basestring)), "aggregate_unpivot(): Argument 'variable_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( variable_column_name ).__name__ assert isinstance( value_column_name, (basestring)), "aggregate_unpivot(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__ pivoted_columns = pivoted_columns if isinstance( pivoted_columns, list ) else ( [] if (pivoted_columns is None) else [ pivoted_columns ] ) assert isinstance( encoding, (basestring)), "aggregate_unpivot(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "aggregate_unpivot(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/aggregate/unpivot" ) obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['variable_column_name'] = variable_column_name obj['value_column_name'] = value_column_name obj['pivoted_columns'] = pivoted_columns obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/aggregate/unpivot' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record type and save it in the response, if applicable if encoding == "binary": record_type = RecordType.from_dynamic_schema( response["response_schema_str"], response["binary_encoded_response"] ) response["record_type"] = record_type else: response["record_type"] = None return AttrDict( response )
# end aggregate_unpivot # begin aggregate_unpivot_and_decode
[docs] def aggregate_unpivot_and_decode( self, table_name = None, column_names = None, variable_column_name = '', value_column_name = '', pivoted_columns = None, encoding = 'binary', options = {}, record_type = None, force_primitive_return_types = True, get_column_major = True ): """Rotate the column values into rows values. For unpivot details and examples, see `Unpivot <../../../concepts/unpivot.html>`_. For limitations, see `Unpivot Limitations <../../../concepts/unpivot.html#limitations>`_. Unpivot is used to normalize tables that are built for cross tabular reporting purposes. The unpivot operator rotates the column values for all the pivoted columns. A variable column, value column and all columns from the source table except the unpivot columns are projected into the result table. The variable column and value columns in the result table indicate the pivoted column name and values respectively. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. Parameters: table_name (str) Name of the table on which the operation will be performed. Must be an existing table/view. column_names (list of str) List of column names or expressions. A wildcard '*' can be used to include all the non-pivoted columns from the source table. The user can provide a single element (which will be automatically promoted to a list internally) or a list. variable_column_name (str) Specifies the variable/parameter column name. The default value is ''. value_column_name (str) Specifies the value column name. The default value is ''. pivoted_columns (list of str) List of one or more values typically the column names of the input table. All the columns in the source table must have the same data type. The user can provide a single element (which will be automatically promoted to a list internally) or a list. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. * **result_table** -- The name of the table used to store the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. If present, no results are returned in the response. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **expression** -- Filter expression to apply to the table prior to unpivot processing. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The columns specified must be present in input table. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **limit** -- The number of records to keep. The default value is ''. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **view_id** -- view this result table is part of. The default value is ''. * **materialize_on_gpu** -- If *true* then the output columns will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the table specified in *result_table*. The columns specified must be present in output column names. If any alias is given for any column name, the alias must be used, rather than the original column name. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. record_type (:class:`RecordType` or None) The record type expected in the results, or None to determinethe appropriate type automatically. If known, providing thismay improve performance in binary mode. Not used in JSON mode.The default value is None. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A dict with the following entries-- table_name (str) Typically shows the result-table name if provided in the request (Ignore otherwise). response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "aggregate_unpivot_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( variable_column_name, (basestring)), "aggregate_unpivot_and_decode(): Argument 'variable_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( variable_column_name ).__name__ assert isinstance( value_column_name, (basestring)), "aggregate_unpivot_and_decode(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__ pivoted_columns = pivoted_columns if isinstance( pivoted_columns, list ) else ( [] if (pivoted_columns is None) else [ pivoted_columns ] ) assert isinstance( encoding, (basestring)), "aggregate_unpivot_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "aggregate_unpivot_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "aggregate_unpivot_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ assert isinstance(force_primitive_return_types, bool), "aggregate_unpivot_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ assert isinstance(get_column_major, bool), "aggregate_unpivot_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/aggregate/unpivot", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['variable_column_name'] = variable_column_name obj['value_column_name'] = value_column_name obj['pivoted_columns'] = pivoted_columns obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/aggregate/unpivot' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): record_type = record_type if record_type else RecordType.from_dynamic_schema( response["response_schema_str"], raw_response, response["binary_encoded_response"] ) records = record_type.decode_dynamic_records( raw_response, response["binary_encoded_response"] ) if force_primitive_return_types: records = _Util.convert_cext_records_to_ordered_dicts( records ) # Transpose the data to column-major, if requested by the user if get_column_major: records = GPUdbRecord.transpose_data_to_col_major( records ) response["records"] = records else: records = json.loads( response["json_encoded_response"] ) if get_column_major: # Get column-major data records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response["response_schema_str"] ) else: # Get row-major data records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response["response_schema_str"] ) response["records"] = records # end if del response["binary_encoded_response"] del response["json_encoded_response"] return AttrDict( response )
# end aggregate_unpivot_and_decode # begin alter_system_properties
[docs] def alter_system_properties( self, property_updates_map = None, options = {} ): """The :meth:`.alter_system_properties` endpoint is primarily used to simplify the testing of the system and is not expected to be used during normal execution. Commands are given through the input parameter *property_updates_map* whose keys are commands and values are strings representing integer values (for example '8000') or boolean values ('true' or 'false'). Parameters: property_updates_map (dict of str to str) Map containing the properties of the system to be updated. Error if empty. Allowed keys are: * **sm_omp_threads** -- Set the number of OpenMP threads that will be used to service filter & aggregation requests against collections to the specified integer value. * **kernel_omp_threads** -- Set the number of kernel OpenMP threads to the specified integer value. * **concurrent_kernel_execution** -- Enables concurrent kernel execution if the value is *true* and disables it if the value is *false*. Allowed values are: * true * false * **chunk_size** -- Sets the chunk size of all new sets to the specified integer value. * **execution_mode** -- Sets the execution_mode for kernel executions to the specified string value. Possible values are host, device, default (engine decides) or an integer value that indicates max chunk size to exec on host * **flush_to_disk** -- Flushes any changes to any tables to the persistent store. These changes include updates to the vector store, object store, and text search store, Value string is ignored * **clear_cache** -- Clears cached results. Useful to allow repeated timing of endpoints. Value string is ignored * **communicator_test** -- Invoke the communicator test and report timing results. Value string is is a comma separated list of <key>=<value> expressions. Expressions are: num_transactions=<num> where num is the number of request reply transactions to invoke per test; message_size=<bytes> where bytes is the size of the messages to send in bytes; check_values=<enabled> where if enabled is true the value of the messages received are verified. * **set_message_timers_enabled** -- Enables the communicator test to collect additional timing statistics when the value string is *true*. Disables the collection when the value string is *false* Allowed values are: * true * false * **bulk_add_test** -- Invoke the bulk add test and report timing results. Value string is ignored. * **network_speed** -- Invoke the network speed test and report timing results. Value string is a semicolon-separated list of <key>=<value> expressions. Valid expressions are: seconds=<time> where time is the time in seconds to run the test; data_size=<size> where size is the size in bytes of the block to be transferred; threads=<number of threads>; to_ranks=<space-separated list of ranks> where the list of ranks is the ranks that rank 0 will send data to and get data from. If to_ranks is unspecified then all worker ranks are used. * **request_timeout** -- Number of minutes after which filtering (e.g., :meth:`.filter`) and aggregating (e.g., :meth:`.aggregate_group_by`) queries will timeout. The default value is '20'. * **max_get_records_size** -- The maximum number of records the database will serve for a given data retrieval call. The default value is '20000'. * **memory_allocation_limit_mb** -- Set the memory allocation limit for all rank processes in megabytes, 0 means no limit. Overrides any individual rank memory allocation limits. The default value is '0'. * **enable_audit** -- Enable or disable auditing. * **audit_headers** -- Enable or disable auditing of request headers. * **audit_body** -- Enable or disable auditing of request bodies. * **audit_data** -- Enable or disable auditing of request data. * **enable_job_manager** -- Enable JobManager to enforce processing of requests in the order received. * **chunk_cache_enabled** -- Enable chunk level query caching. Flushes the chunk cache when value is false * **chunk_cache_size** -- Size of the chunk cache in bytes. The default value is '10000000'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- updated_properties_map (dict of str to str) map of values updated, For speed tests a map of values measured to the measurement """ assert isinstance( property_updates_map, (dict)), "alter_system_properties(): Argument 'property_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( property_updates_map ).__name__ assert isinstance( options, (dict)), "alter_system_properties(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/alter/system/properties" ) obj = {} obj['property_updates_map'] = self.__sanitize_dicts( property_updates_map ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/alter/system/properties' ) return AttrDict( response )
# end alter_system_properties # begin alter_table
[docs] def alter_table( self, table_name = None, action = None, value = None, options = {} ): """Apply various modifications to a table, view, or collection. The available modifications include the following: Create or delete an `index <../../../concepts/indexes.html#column-index>`_ on a particular column. This can speed up certain operations when using expressions containing equality or relational operators on indexed columns. This only applies to tables. Set the `time-to-live (TTL) <../../../concepts/ttl.html>`_. This can be applied to tables, views, or collections. When applied to collections, every contained table & view that is not protected will have its TTL set to the given value. Set the global access mode (i.e. locking) for a table. This setting trumps any role-based access controls that may be in place; e.g., a user with write access to a table marked read-only will not be able to insert records into it. The mode can be set to read-only, write-only, read/write, and no access. Change the `protection <../../../concepts/protection.html>`_ mode to prevent or allow automatic expiration. This can be applied to tables, views, and collections. Allow homogeneous tables within a collection. Manage a table's columns--a column can be added, removed, or have its `type and properties <../../../concepts/types.html>`_ modified. Set or unset `compression <../../../concepts/compression.html>`_ for a column. Parameters: table_name (str) Table on which the operation will be performed. Must be an existing table, view, or collection. action (str) Modification operation to be applied Allowed values are: * **allow_homogeneous_tables** -- Sets whether homogeneous tables are allowed in the given collection. This action is only valid if input parameter *table_name* is a collection. The input parameter *value* must be either 'true' or 'false'. * **create_index** -- Creates an `index <../../../concepts/indexes.html#column-index>`_ on the column name specified in input parameter *value*. If this column is already indexed, an error will be returned. * **delete_index** -- Deletes an existing `index <../../../concepts/indexes.html#column-index>`_ on the column name specified in input parameter *value*. If this column does not have indexing turned on, an error will be returned. * **move_to_collection** -- Moves a table into a collection input parameter *value*. * **protected** -- Sets whether the given input parameter *table_name* should be `protected <../../../concepts/protection.html>`_ or not. The input parameter *value* must be either 'true' or 'false'. * **rename_table** -- Renames a table, view or collection to input parameter *value*. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. * **ttl** -- Sets the `time-to-live <../../../concepts/ttl.html>`_ in minutes of the table, view, or collection specified in input parameter *table_name*. * **memory_ttl** -- Sets the time-to-live in minutes for the individual chunks of the columns of the table, view, or collection specified in input parameter *table_name* to free their memory if unused longer than the given time. Specify an empty string to restore the global memory_ttl setting and a value of '-1' for an infinite timeout. * **add_column** -- Adds the column specified in input parameter *value* to the table specified in input parameter *table_name*. Use *column_type* and *column_properties* in input parameter *options* to set the column's type and properties, respectively. * **change_column** -- Changes type and properties of the column specified in input parameter *value*. Use *column_type* and *column_properties* in input parameter *options* to set the column's type and properties, respectively. Note that primary key and/or shard key columns cannot be changed. All unchanging column properties must be listed for the change to take place, e.g., to add dictionary encoding to an existing 'char4' column, both 'char4' and 'dict' must be specified in the input parameter *options* map. * **set_column_compression** -- Modifies the `compression <../../../concepts/compression.html>`_ setting on the column specified in input parameter *value*. * **delete_column** -- Deletes the column specified in input parameter *value* from the table specified in input parameter *table_name*. * **create_foreign_key** -- Creates a `foreign key <../../../concepts/tables.html#foreign-key>`_ using the format '(source_column_name [, ...]) references target_table_name(primary_key_column_name [, ...]) [as foreign_key_name]'. * **delete_foreign_key** -- Deletes a `foreign key <../../../concepts/tables.html#foreign-key>`_. The input parameter *value* should be the foreign_key_name specified when creating the key or the complete string used to define it. * **set_global_access_mode** -- Sets the global access mode (i.e. locking) for the table specified in input parameter *table_name*. Specify the access mode in input parameter *value*. Valid modes are 'no_access', 'read_only', 'write_only' and 'read_write'. * **refresh** -- Replays all the table creation commands required to create this `materialized view <../../../concepts/materialized_views.html>`_. * **set_refresh_method** -- Sets the method by which this `materialized view <../../../concepts/materialized_views.html>`_ is refreshed - one of 'manual', 'periodic', 'on_change'. * **set_refresh_start_time** -- Sets the time to start periodic refreshes of this `materialized view <../../../concepts/materialized_views.html>`_ to datetime string with format 'YYYY-MM-DD HH:MM:SS'. Subsequent refreshes occur at the specified time + N * the refresh period. * **set_refresh_period** -- Sets the time interval in seconds at which to refresh this `materialized view <../../../concepts/materialized_views.html>`_. Also, sets the refresh method to periodic if not alreay set. * **remove_text_search_attributes** -- remove text_search attribute from all columns, if exists. value (str) The value of the modification. May be a column name, 'true' or 'false', a TTL, or the global access mode depending on input parameter *action*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **column_default_value** -- When adding a column, set a default value for existing records. For nullable columns, the default value will be null, regardless of data type. * **column_properties** -- When adding or changing a column, set the column properties (strings, separated by a comma: data, store_only, text_search, char8, int8 etc). * **column_type** -- When adding or changing a column, set the column type (strings, separated by a comma: int, double, string, null etc). * **compression_type** -- When setting column compression (*set_column_compression* for input parameter *action*), compression type to use: *none* (to use no compression) or a valid compression type. Allowed values are: * none * snappy * lz4 * lz4hc The default value is 'snappy'. * **copy_values_from_column** -- please see add_column_expression instead. * **rename_column** -- When changing a column, specify new column name. * **validate_change_column** -- When changing a column, validate the change before applying it. If *true*, then validate all values. A value too large (or too long) for the new type will prevent any change. If *false*, then when a value is too large or long, it will be truncated. Allowed values are: * **true** -- true * **false** -- false The default value is 'true'. * **update_last_access_time** -- Indicates whether need to update the last_access_time. Allowed values are: * true * false The default value is 'true'. * **add_column_expression** -- expression for new column's values (optional with add_column). Any valid expressions including existing columns. Returns: A dict with the following entries-- table_name (str) Table on which the operation was performed. action (str) Modification operation that was performed. value (str) The value of the modification that was performed. type_id (str) return the type_id (when changing a table, a new type may be created) type_definition (str) return the type_definition (when changing a table, a new type may be created) properties (dict of str to lists of str) return the type properties (when changing a table, a new type may be created) label (str) return the type label (when changing a table, a new type may be created) """ assert isinstance( table_name, (basestring)), "alter_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( action, (basestring)), "alter_table(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__ assert isinstance( value, (basestring)), "alter_table(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__ assert isinstance( options, (dict)), "alter_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/alter/table" ) obj = {} obj['table_name'] = table_name obj['action'] = action obj['value'] = value obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/alter/table' ) return AttrDict( response )
# end alter_table # begin alter_table_metadata
[docs] def alter_table_metadata( self, table_names = None, metadata_map = None, options = {} ): """Updates (adds or changes) metadata for tables. The metadata key and values must both be strings. This is an easy way to annotate whole tables rather than single records within tables. Some examples of metadata are owner of the table, table creation timestamp etc. Parameters: table_names (list of str) Names of the tables whose metadata will be updated. All specified tables must exist, or an error will be returned. The user can provide a single element (which will be automatically promoted to a list internally) or a list. metadata_map (dict of str to str) A map which contains the metadata of the tables that are to be updated. Note that only one map is provided for all the tables; so the change will be applied to every table. If the provided map is empty, then all existing metadata for the table(s) will be cleared. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- table_names (list of str) Value of input parameter *table_names*. metadata_map (dict of str to str) Value of input parameter *metadata_map*. """ table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( metadata_map, (dict)), "alter_table_metadata(): Argument 'metadata_map' must be (one) of type(s) '(dict)'; given %s" % type( metadata_map ).__name__ assert isinstance( options, (dict)), "alter_table_metadata(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/alter/table/metadata" ) obj = {} obj['table_names'] = table_names obj['metadata_map'] = self.__sanitize_dicts( metadata_map ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/alter/table/metadata' ) return AttrDict( response )
# end alter_table_metadata # begin alter_user
[docs] def alter_user( self, name = None, action = None, value = None, options = {} ): """Alters a user. Parameters: name (str) Name of the user to be altered. Must be an existing user. action (str) Modification operation to be applied to the user. Allowed values are: * **set_password** -- Sets the password of the user. The user must be an internal user. value (str) The value of the modification, depending on input parameter *action*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. """ assert isinstance( name, (basestring)), "alter_user(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( action, (basestring)), "alter_user(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__ assert isinstance( value, (basestring)), "alter_user(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__ assert isinstance( options, (dict)), "alter_user(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/alter/user" ) obj = {} obj['name'] = name obj['action'] = action obj['value'] = value obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/alter/user' ) return AttrDict( response )
# end alter_user # begin append_records
[docs] def append_records( self, table_name = None, source_table_name = None, field_map = None, options = {} ): """Append (or insert) all records from a source table (specified by input parameter *source_table_name*) to a particular target table (specified by input parameter *table_name*). The field map (specified by input parameter *field_map*) holds the user specified map of target table column names with their mapped source column names. Parameters: table_name (str) The table name for the records to be appended. Must be an existing table. source_table_name (str) The source table name to get records from. Must be an existing table name. field_map (dict of str to str) Contains the mapping of column names from the target table (specified by input parameter *table_name*) as the keys, and corresponding column names or expressions (e.g., 'col_name+1') from the source table (specified by input parameter *source_table_name*). Must be existing column names in source table and target table, and their types must be matched. For details on using expressions, see `Expressions <../../../concepts/expressions.html>`_. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **offset** -- A positive integer indicating the number of initial results to skip from source table (specified by input parameter *source_table_name*). Default is 0. The minimum allowed value is 0. The maximum allowed value is MAX_INT. The default value is '0'. * **limit** -- A positive integer indicating the maximum number of results to be returned from source table (specified by input parameter *source_table_name*). Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is '-9999'. * **expression** -- Optional filter expression to apply to the source table (specified by input parameter *source_table_name*). Empty by default. The default value is ''. * **order_by** -- Comma-separated list of the columns and expressions to be sorted by from the source table (specified by input parameter *source_table_name*); e.g. 'timestamp asc, x desc'. The *order_by* columns do not have to be present in input parameter *field_map*. The default value is ''. * **update_on_existing_pk** -- Specifies the record collision policy for inserting the source table records (specified by input parameter *source_table_name*) into the target table (specified by input parameter *table_name*) table with a `primary key <../../../concepts/tables.html#primary-keys>`_. If set to *true*, any existing target table record with primary key values that match those of a source table record being inserted will be replaced by that new record. If set to *false*, any existing target table record with primary key values that match those of a source table record being inserted will remain unchanged and the new record discarded. If the specified table does not have a primary key, then this option is ignored. Allowed values are: * true * false The default value is 'false'. * **truncate_strings** -- If set to *true*, it allows inserting unrestricted length strings into charN string columns by truncating the unrestricted length strings to fit. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) """ assert isinstance( table_name, (basestring)), "append_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( source_table_name, (basestring)), "append_records(): Argument 'source_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( source_table_name ).__name__ assert isinstance( field_map, (dict)), "append_records(): Argument 'field_map' must be (one) of type(s) '(dict)'; given %s" % type( field_map ).__name__ assert isinstance( options, (dict)), "append_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/append/records" ) obj = {} obj['table_name'] = table_name obj['source_table_name'] = source_table_name obj['field_map'] = self.__sanitize_dicts( field_map ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/append/records' ) return AttrDict( response )
# end append_records # begin clear_table
[docs] def clear_table( self, table_name = '', authorization = '', options = {} ): """Clears (drops) one or all tables in the database cluster. The operation is synchronous meaning that the table will be cleared before the function returns. The response payload returns the status of the operation along with the name of the table that was cleared. Parameters: table_name (str) Name of the table to be cleared. Must be an existing table. Empty string clears all available tables, though this behavior is be prevented by default via gpudb.conf parameter 'disable_clear_all'. The default value is ''. authorization (str) No longer used. User can pass an empty string. The default value is ''. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **no_error_if_not_exists** -- If *true* and if the table specified in input parameter *table_name* does not exist no error is returned. If *false* and if the table specified in input parameter *table_name* does not exist then an error is returned. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name* for a given table, or 'ALL CLEARED' in case of clearing all tables. """ assert isinstance( table_name, (basestring)), "clear_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( authorization, (basestring)), "clear_table(): Argument 'authorization' must be (one) of type(s) '(basestring)'; given %s" % type( authorization ).__name__ assert isinstance( options, (dict)), "clear_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/clear/table" ) obj = {} obj['table_name'] = table_name obj['authorization'] = authorization obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/clear/table' ) return AttrDict( response )
# end clear_table # begin clear_table_monitor
[docs] def clear_table_monitor( self, topic_id = None, options = {} ): """Deactivates a table monitor previously created with :meth:`.create_table_monitor`. Parameters: topic_id (str) The topic ID returned by :meth:`.create_table_monitor`. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- topic_id (str) Value of input parameter *topic_id*. """ assert isinstance( topic_id, (basestring)), "clear_table_monitor(): Argument 'topic_id' must be (one) of type(s) '(basestring)'; given %s" % type( topic_id ).__name__ assert isinstance( options, (dict)), "clear_table_monitor(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/clear/tablemonitor" ) obj = {} obj['topic_id'] = topic_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/clear/tablemonitor' ) return AttrDict( response )
# end clear_table_monitor # begin clear_trigger
[docs] def clear_trigger( self, trigger_id = None, options = {} ): """Clears or cancels the trigger identified by the specified handle. The output returns the handle of the trigger cleared as well as indicating success or failure of the trigger deactivation. Parameters: trigger_id (str) ID for the trigger to be deactivated. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- trigger_id (str) Value of input parameter *trigger_id*. """ assert isinstance( trigger_id, (basestring)), "clear_trigger(): Argument 'trigger_id' must be (one) of type(s) '(basestring)'; given %s" % type( trigger_id ).__name__ assert isinstance( options, (dict)), "clear_trigger(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/clear/trigger" ) obj = {} obj['trigger_id'] = trigger_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/clear/trigger' ) return AttrDict( response )
# end clear_trigger # begin create_job
[docs] def create_job( self, endpoint = None, request_encoding = 'binary', data = None, data_str = None, options = {} ): """Create a job which will run asynchronously. The response returns a job ID, which can be used to query the status and result of the job. The status and the result of the job upon completion can be requested by :meth:`.get_job`. Parameters: endpoint (str) Indicates which endpoint to execute, e.g. '/alter/table'. request_encoding (str) The encoding of the request payload for the job. Allowed values are: * binary * json * snappy The default value is 'binary'. data (str) Binary-encoded payload for the job to be run asynchronously. The payload must contain the relevant input parameters for the endpoint indicated in input parameter *endpoint*. Please see the documentation for the appropriate endpoint to see what values must (or can) be specified. If this parameter is used, then input parameter *request_encoding* must be *binary* or *snappy*. data_str (str) JSON-encoded payload for the job to be run asynchronously. The payload must contain the relevant input parameters for the endpoint indicated in input parameter *endpoint*. Please see the documentation for the appropriate endpoint to see what values must (or can) be specified. If this parameter is used, then input parameter *request_encoding* must be *json*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- job_id (int) An identifier for the job created by this call. """ assert isinstance( endpoint, (basestring)), "create_job(): Argument 'endpoint' must be (one) of type(s) '(basestring)'; given %s" % type( endpoint ).__name__ assert isinstance( request_encoding, (basestring)), "create_job(): Argument 'request_encoding' must be (one) of type(s) '(basestring)'; given %s" % type( request_encoding ).__name__ assert isinstance( data, (basestring)), "create_job(): Argument 'data' must be (one) of type(s) '(basestring)'; given %s" % type( data ).__name__ assert isinstance( data_str, (basestring)), "create_job(): Argument 'data_str' must be (one) of type(s) '(basestring)'; given %s" % type( data_str ).__name__ assert isinstance( options, (dict)), "create_job(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/job" ) obj = {} obj['endpoint'] = endpoint obj['request_encoding'] = request_encoding obj['data'] = data obj['data_str'] = data_str obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/job' ) return AttrDict( response )
# end create_job # begin create_join_table
[docs] def create_join_table( self, join_table_name = None, table_names = None, column_names = None, expressions = [], options = {} ): """Creates a table that is the result of a SQL JOIN. For join details and examples see: `Joins <../../../concepts/joins.html>`_. For limitations, see `Join Limitations and Cautions <../../../concepts/joins.html#limitations-cautions>`_. Parameters: join_table_name (str) Name of the join table to be created. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. table_names (list of str) The list of table names composing the join. Corresponds to a SQL statement FROM clause. The user can provide a single element (which will be automatically promoted to a list internally) or a list. column_names (list of str) List of member table columns or column expressions to be included in the join. Columns can be prefixed with 'table_id.column_name', where 'table_id' is the table name or alias. Columns can be aliased via the syntax 'column_name as alias'. Wild cards '*' can be used to include all columns across member tables or 'table_id.*' for all of a single table's columns. Columns and column expressions composing the join must be uniquely named or aliased--therefore, the '*' wild card cannot be used if column names aren't unique across all tables. The user can provide a single element (which will be automatically promoted to a list internally) or a list. expressions (list of str) An optional list of expressions to combine and filter the joined tables. Corresponds to a SQL statement WHERE clause. For details see: `expressions <../../../concepts/expressions.html>`_. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the join. If the collection provided is non-existent, the collection will be automatically created. If empty, then the join will be at the top level. The default value is ''. * **max_query_dimensions** -- The maximum number of tables in a join that can be accessed by a query and are not equated by a foreign-key to primary-key equality predicate * **optimize_lookups** -- Use more memory to speed up the joining of tables. Allowed values are: * true * false The default value is 'false'. * **refresh_method** -- Method by which the join can be refreshed when the data in underlying member tables have changed. Allowed values are: * **manual** -- refresh only occurs when manually requested by calling this endpoint with refresh option set to *refresh* or *full_refresh* * **on_query** -- incrementally refresh (refresh just those records added) whenever a new query is issued and new data is inserted into the base table. A full refresh of all the records occurs when a new query is issued and there have been inserts to any non-base-tables since the last query. `TTL <../../../concepts/ttl.html>`_ will be set to not expire; any *ttl* specified will be ignored. * **on_insert** -- incrementally refresh (refresh just those records added) whenever new data is inserted into a base table. A full refresh of all the records occurs when a new query is issued and there have been inserts to any non-base-tables since the last query. `TTL <../../../concepts/ttl.html>`_ will be set to not expire; any *ttl* specified will be ignored. The default value is 'manual'. * **refresh** -- Do a manual refresh of the join if it exists - throws an error otherwise. Allowed values are: * **no_refresh** -- don't refresh * **refresh** -- incrementally refresh (refresh just those records added) if new data has been inserted into the base table. A full refresh of all the records occurs if there have been inserts to any non-base-tables since the last refresh * **full_refresh** -- always refresh even if no new records have been added. Only refresh method guaranteed to do a full refresh (refresh all the records) if a delete or update has occurred since the last refresh. The default value is 'no_refresh'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the join table specified in input parameter *join_table_name*. Ignored if *refresh_method* is either *on_insert* or *on_query*. * **view_id** -- view this projection is part of. The default value is ''. * **no_count** -- return a count of 0 for the join table for logging and for show_table. optimization needed for large overlapped equi-join stencils. The default value is 'false'. * **chunk_size** -- Maximum size of a joined-chunk for this table. Defaults to the gpudb.conf file chunk size * **allow_right_primary_key_join** -- When true allows right joins from a key to a primary key to be done as primary key joins. Such a join table cannot be joined to other join tables. When false the right join shall be done as an equi-join. The default value is 'false'. Returns: A dict with the following entries-- join_table_name (str) Value of input parameter *join_table_name*. count (long) The number of records in the join table filtered by the given select expression. """ assert isinstance( join_table_name, (basestring)), "create_join_table(): Argument 'join_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( join_table_name ).__name__ table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) expressions = expressions if isinstance( expressions, list ) else ( [] if (expressions is None) else [ expressions ] ) assert isinstance( options, (dict)), "create_join_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/jointable" ) obj = {} obj['join_table_name'] = join_table_name obj['table_names'] = table_names obj['column_names'] = column_names obj['expressions'] = expressions obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/jointable' ) return AttrDict( response )
# end create_join_table # begin create_materialized_view
[docs] def create_materialized_view( self, table_name = None, options = {} ): """Initiates the process of creating a materialized view, reserving the view's name to prevent other views or tables from being created with that name. For materialized view details and examples, see `Materialized Views <../../../concepts/materialized_views.html>`_. The response contains output parameter *view_id*, which is used to tag each subsequent operation (projection, union, aggregation, filter, or join) that will compose the view. Parameters: table_name (str) Name of the table to be created that is the top-level table of the materialized view. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created table will be a top-level table. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in input parameter *table_name*. * **persist** -- If *true*, then the materialized view specified in input parameter *table_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the materialized view will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **refresh_method** -- Method by which the join can be refreshed when the data in underlying member tables have changed. Allowed values are: * **manual** -- Refresh only occurs when manually requested by calling :meth:`.alter_table` with an 'action' of 'refresh' * **on_query** -- For future use. * **on_change** -- If possible, incrementally refresh (refresh just those records added) whenever an insert, update, delete or refresh of input table is done. A full refresh is done if an incremental refresh is not possible. * **periodic** -- Refresh table periodically at rate specified by *refresh_period* The default value is 'manual'. * **refresh_period** -- When *refresh_method* is *periodic*, specifies the period in seconds at which refresh occurs * **refresh_start_time** -- When *refresh_method* is *periodic*, specifies the first time at which a refresh is to be done. Value is a datetime string with format 'YYYY-MM-DD HH:MM:SS'. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. view_id (str) Value of view_id. """ assert isinstance( table_name, (basestring)), "create_materialized_view(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( options, (dict)), "create_materialized_view(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/materializedview" ) obj = {} obj['table_name'] = table_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/materializedview' ) return AttrDict( response )
# end create_materialized_view # begin create_proc
[docs] def create_proc( self, proc_name = None, execution_mode = 'distributed', files = {}, command = '', args = [], options = {} ): """Creates an instance (proc) of the user-defined function (UDF) specified by the given command, options, and files, and makes it available for execution. For details on UDFs, see: `User-Defined Functions <../../../concepts/udf.html>`_ Parameters: proc_name (str) Name of the proc to be created. Must not be the name of a currently existing proc. execution_mode (str) The execution mode of the proc. Allowed values are: * **distributed** -- Input table data will be divided into data segments that are distributed across all nodes in the cluster, and the proc command will be invoked once per data segment in parallel. Output table data from each invocation will be saved to the same node as the corresponding input data. * **nondistributed** -- The proc command will be invoked only once per execution, and will not have access to any input or output table data. The default value is 'distributed'. files (dict of str to str) A map of the files that make up the proc. The keys of the map are file names, and the values are the binary contents of the files. The file names may include subdirectory names (e.g. 'subdir/file') but must not resolve to a directory above the root for the proc. The default value is an empty dict ( {} ). command (str) The command (excluding arguments) that will be invoked when the proc is executed. It will be invoked from the directory containing the proc input parameter *files* and may be any command that can be resolved from that directory. It need not refer to a file actually in that directory; for example, it could be 'java' if the proc is a Java application; however, any necessary external programs must be preinstalled on every database node. If the command refers to a file in that directory, it must be preceded with './' as per Linux convention. If not specified, and exactly one file is provided in input parameter *files*, that file will be invoked. The default value is ''. args (list of str) An array of command-line arguments that will be passed to input parameter *command* when the proc is executed. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **max_concurrency_per_node** -- The maximum number of concurrent instances of the proc that will be executed per node. 0 allows unlimited concurrency. The default value is '0'. Returns: A dict with the following entries-- proc_name (str) Value of input parameter *proc_name*. """ assert isinstance( proc_name, (basestring)), "create_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__ assert isinstance( execution_mode, (basestring)), "create_proc(): Argument 'execution_mode' must be (one) of type(s) '(basestring)'; given %s" % type( execution_mode ).__name__ assert isinstance( files, (dict)), "create_proc(): Argument 'files' must be (one) of type(s) '(dict)'; given %s" % type( files ).__name__ assert isinstance( command, (basestring)), "create_proc(): Argument 'command' must be (one) of type(s) '(basestring)'; given %s" % type( command ).__name__ args = args if isinstance( args, list ) else ( [] if (args is None) else [ args ] ) assert isinstance( options, (dict)), "create_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/proc" ) obj = {} obj['proc_name'] = proc_name obj['execution_mode'] = execution_mode obj['files'] = self.__sanitize_dicts( files ) obj['command'] = command obj['args'] = args obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/proc' ) return AttrDict( response )
# end create_proc # begin create_projection
[docs] def create_projection( self, table_name = None, projection_name = None, column_names = None, options = {} ): """Creates a new `projection <../../../concepts/projections.html>`_ of an existing table. A projection represents a subset of the columns (potentially including derived columns) of a table. For projection details and examples, see `Projections <../../../concepts/projections.html>`_. For limitations, see `Projection Limitations and Cautions <../../../concepts/projections.html#limitations-and-cautions>`_. `Window functions <../../../concepts/window.html>`_, which can perform operations like moving averages, are available through this endpoint as well as :meth:`.get_records_by_column`. A projection can be created with a different `shard key <../../../concepts/tables.html#shard-keys>`_ than the source table. By specifying *shard_key*, the projection will be sharded according to the specified columns, regardless of how the source table is sharded. The source table can even be unsharded or replicated. Parameters: table_name (str) Name of the existing table on which the projection is to be applied. projection_name (str) Name of the projection to be created. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. column_names (list of str) List of columns from input parameter *table_name* to be included in the projection. Can include derived columns. Can be specified as aliased via the syntax 'column_name as alias'. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a `collection <../../../concepts/collections.html>`_ to which the projection is to be assigned as a child. If the collection provided is non-existent, the collection will be automatically created. If empty, then the projection will be at the top level. The default value is ''. * **expression** -- An optional filter `expression <../../../concepts/expressions.html>`_ to be applied to the source table prior to the projection. The default value is ''. * **is_replicated** -- If *true* then the projection will be replicated even if the source table is not. Allowed values are: * true * false The default value is 'false'. * **limit** -- The number of records to keep. The default value is ''. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **materialize_on_gpu** -- If *true* then the columns of the projection will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **chunk_size** -- Indicates the chunk size to be used for this table. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the output table. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the projection specified in input parameter *projection_name*. * **shard_key** -- Comma-separated list of the columns to be sharded on; e.g. 'column1, column2'. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **persist** -- If *true*, then the projection specified in input parameter *projection_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the projection will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **preserve_dict_encoding** -- If *true*, then columns that were dict encoded in the source table will be dict encoded in the projection table. Allowed values are: * true * false The default value is 'false'. * **view_id** -- view this projection is part of. The default value is ''. Returns: A dict with the following entries-- projection_name (str) Value of input parameter *projection_name*. """ assert isinstance( table_name, (basestring)), "create_projection(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( projection_name, (basestring)), "create_projection(): Argument 'projection_name' must be (one) of type(s) '(basestring)'; given %s" % type( projection_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( options, (dict)), "create_projection(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/projection" ) obj = {} obj['table_name'] = table_name obj['projection_name'] = projection_name obj['column_names'] = column_names obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/projection' ) return AttrDict( response )
# end create_projection # begin create_role
[docs] def create_role( self, name = None, options = {} ): """Creates a new role. Parameters: name (str) Name of the role to be created. Must contain only lowercase letters, digits, and underscores, and cannot begin with a digit. Must not be the same name as an existing user or role. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. """ assert isinstance( name, (basestring)), "create_role(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( options, (dict)), "create_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/role" ) obj = {} obj['name'] = name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/role' ) return AttrDict( response )
# end create_role # begin create_table
[docs] def create_table( self, table_name = None, type_id = None, options = {} ): """Creates a new table or collection. If a new table is being created, the type of the table is given by input parameter *type_id*, which must the be the ID of a currently registered type (i.e. one created via :meth:`.create_type`). The table will be created inside a collection if the option *collection_name* is specified. If that collection does not already exist, it will be created. To create a new collection, specify the name of the collection in input parameter *table_name* and set the *is_collection* option to *true*; input parameter *type_id* will be ignored. Parameters: table_name (str) Name of the table to be created. Error for requests with existing table of the same name and type id may be suppressed by using the *no_error_if_exists* option. See `Tables <../../../concepts/tables.html>`_ for naming restrictions. type_id (str) ID of a currently registered type. All objects added to the newly created table will be of this type. Ignored if *is_collection* is *true*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **no_error_if_exists** -- If *true*, prevents an error from occurring if the table already exists and is of the given type. If a table with the same ID but a different type exists, it is still an error. Allowed values are: * true * false The default value is 'false'. * **collection_name** -- Name of a collection which is to contain the newly created table. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created table will be a top-level table. * **is_collection** -- Indicates whether the new table to be created will be a collection. Allowed values are: * true * false The default value is 'false'. * **disallow_homogeneous_tables** -- For a collection, indicates whether the collection prohibits containment of multiple tables of exactly the same data type. Allowed values are: * true * false The default value is 'false'. * **is_replicated** -- For a table, indicates the `distribution scheme <../../../concepts/tables.html#distribution>`_ for the table's data. If true, the table will be `replicated <../../../concepts/tables.html#replication>`_. If false, the table will be `sharded <../../../concepts/tables.html#sharding>`_ according to the `shard key <../../../concepts/tables.html#shard-keys>`_ specified in the given input parameter *type_id*, or `randomly sharded <../../../concepts/tables.html#random-sharding>`_, if no shard key is specified. Allowed values are: * true * false The default value is 'false'. * **foreign_keys** -- Semicolon-separated list of `foreign keys <../../../concepts/tables.html#foreign-keys>`_, of the format '(source_column_name [, ...]) references target_table_name(primary_key_column_name [, ...]) [as foreign_key_name]'. * **foreign_shard_key** -- Foreign shard key of the format 'source_column references shard_by_column from target_table(primary_key_column)' * **ttl** -- For a table, sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in input parameter *table_name*. * **chunk_size** -- Indicates the chunk size to be used for this table. * **is_result_table** -- For a table, indicates whether the table is an in-memory table. A result table cannot contain store_only, text_search, or string columns (charN columns are acceptable), and it will not be retained if the server is restarted. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. type_id (str) Value of input parameter *type_id*. is_collection (bool) Indicates if the created entity is a collection. """ assert isinstance( table_name, (basestring)), "create_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( type_id, (basestring)), "create_table(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__ assert isinstance( options, (dict)), "create_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/table" ) obj = {} obj['table_name'] = table_name obj['type_id'] = type_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/table' ) return AttrDict( response )
# end create_table # begin create_table_monitor
[docs] def create_table_monitor( self, table_name = None, options = {} ): """Creates a monitor that watches for new records inserted into a particular table (identified by input parameter *table_name*) and forwards copies to subscribers via ZMQ. After this call completes, subscribe to the returned output parameter *topic_id* on the ZMQ table monitor port (default 9002). Each time an insert operation on the table completes, a multipart message is published for that topic; the first part contains only the topic ID, and each subsequent part contains one binary-encoded Avro object that was inserted. The monitor will continue to run (regardless of whether or not there are any subscribers) until deactivated with :meth:`.clear_table_monitor`. Parameters: table_name (str) Name of the table to monitor. Must not refer to a collection. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- topic_id (str) The ZMQ topic ID to subscribe to for inserted records. table_name (str) Value of input parameter *table_name*. type_schema (str) JSON Avro schema of the table, for use in decoding published records. """ assert isinstance( table_name, (basestring)), "create_table_monitor(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( options, (dict)), "create_table_monitor(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/tablemonitor" ) obj = {} obj['table_name'] = table_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/tablemonitor' ) return AttrDict( response )
# end create_table_monitor # begin create_trigger_by_area
[docs] def create_trigger_by_area( self, request_id = None, table_names = None, x_column_name = None, x_vector = None, y_column_name = None, y_vector = None, options = {} ): """Sets up an area trigger mechanism for two column_names for one or more tables. (This function is essentially the two-dimensional version of :meth:`.create_trigger_by_range`.) Once the trigger has been activated, any record added to the listed tables(s) via :meth:`.insert_records` with the chosen columns' values falling within the specified region will trip the trigger. All such records will be queued at the trigger port (by default '9001' but able to be retrieved via :meth:`.show_system_status`) for any listening client to collect. Active triggers can be cancelled by using the :meth:`.clear_trigger` endpoint or by clearing all relevant tables. The output returns the trigger handle as well as indicating success or failure of the trigger activation. Parameters: request_id (str) User-created ID for the trigger. The ID can be alphanumeric, contain symbols, and must contain at least one character. table_names (list of str) Names of the tables on which the trigger will be activated and maintained. The user can provide a single element (which will be automatically promoted to a list internally) or a list. x_column_name (str) Name of a numeric column on which the trigger is activated. Usually 'x' for geospatial data points. x_vector (list of floats) The respective coordinate values for the region on which the trigger is activated. This usually translates to the x-coordinates of a geospatial region. The user can provide a single element (which will be automatically promoted to a list internally) or a list. y_column_name (str) Name of a second numeric column on which the trigger is activated. Usually 'y' for geospatial data points. y_vector (list of floats) The respective coordinate values for the region on which the trigger is activated. This usually translates to the y-coordinates of a geospatial region. Must be the same length as xvals. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- trigger_id (str) Value of input parameter *request_id*. """ assert isinstance( request_id, (basestring)), "create_trigger_by_area(): Argument 'request_id' must be (one) of type(s) '(basestring)'; given %s" % type( request_id ).__name__ table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( x_column_name, (basestring)), "create_trigger_by_area(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ x_vector = x_vector if isinstance( x_vector, list ) else ( [] if (x_vector is None) else [ x_vector ] ) assert isinstance( y_column_name, (basestring)), "create_trigger_by_area(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ y_vector = y_vector if isinstance( y_vector, list ) else ( [] if (y_vector is None) else [ y_vector ] ) assert isinstance( options, (dict)), "create_trigger_by_area(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/trigger/byarea" ) obj = {} obj['request_id'] = request_id obj['table_names'] = table_names obj['x_column_name'] = x_column_name obj['x_vector'] = x_vector obj['y_column_name'] = y_column_name obj['y_vector'] = y_vector obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/trigger/byarea' ) return AttrDict( response )
# end create_trigger_by_area # begin create_trigger_by_range
[docs] def create_trigger_by_range( self, request_id = None, table_names = None, column_name = None, min = None, max = None, options = {} ): """Sets up a simple range trigger for a column_name for one or more tables. Once the trigger has been activated, any record added to the listed tables(s) via :meth:`.insert_records` with the chosen column_name's value falling within the specified range will trip the trigger. All such records will be queued at the trigger port (by default '9001' but able to be retrieved via :meth:`.show_system_status`) for any listening client to collect. Active triggers can be cancelled by using the :meth:`.clear_trigger` endpoint or by clearing all relevant tables. The output returns the trigger handle as well as indicating success or failure of the trigger activation. Parameters: request_id (str) User-created ID for the trigger. The ID can be alphanumeric, contain symbols, and must contain at least one character. table_names (list of str) Tables on which the trigger will be active. The user can provide a single element (which will be automatically promoted to a list internally) or a list. column_name (str) Name of a numeric column_name on which the trigger is activated. min (float) The lower bound (inclusive) for the trigger range. max (float) The upper bound (inclusive) for the trigger range. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- trigger_id (str) Value of input parameter *request_id*. """ assert isinstance( request_id, (basestring)), "create_trigger_by_range(): Argument 'request_id' must be (one) of type(s) '(basestring)'; given %s" % type( request_id ).__name__ table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( column_name, (basestring)), "create_trigger_by_range(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( min, (int, long, float)), "create_trigger_by_range(): Argument 'min' must be (one) of type(s) '(int, long, float)'; given %s" % type( min ).__name__ assert isinstance( max, (int, long, float)), "create_trigger_by_range(): Argument 'max' must be (one) of type(s) '(int, long, float)'; given %s" % type( max ).__name__ assert isinstance( options, (dict)), "create_trigger_by_range(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/trigger/byrange" ) obj = {} obj['request_id'] = request_id obj['table_names'] = table_names obj['column_name'] = column_name obj['min'] = min obj['max'] = max obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/trigger/byrange' ) return AttrDict( response )
# end create_trigger_by_range # begin create_type
[docs] def create_type( self, type_definition = None, label = None, properties = {}, options = {} ): """Creates a new type describing the layout or schema of a table. The type definition is a JSON string describing the fields (i.e. columns) of the type. Each field consists of a name and a data type. Supported data types are: double, float, int, long, string, and bytes. In addition one or more properties can be specified for each column which customize the memory usage and query availability of that column. Note that some properties are mutually exclusive--i.e. they cannot be specified for any given column simultaneously. One example of mutually exclusive properties are *data* and *store_only*. A single `primary key <../../../concepts/tables.html#primary-keys>`_ and/or single `shard key <../../../concepts/tables.html#shard-keys>`_ can be set across one or more columns. If a primary key is specified, then a uniqueness constraint is enforced, in that only a single object can exist with a given primary key. When :meth:`inserting <.insert_records>` data into a table with a primary key, depending on the parameters in the request, incoming objects with primary key values that match existing objects will either overwrite (i.e. update) the existing object or will be skipped and not added into the set. Example of a type definition with some of the parameters:: {"type":"record", "name":"point", "fields":[{"name":"msg_id","type":"string"}, {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"TIMESTAMP","type":"double"}, {"name":"source","type":"string"}, {"name":"group_id","type":"string"}, {"name":"OBJECT_ID","type":"string"}] } Properties:: {"group_id":["store_only"], "msg_id":["store_only","text_search"] } Parameters: type_definition (str) a JSON string describing the columns of the type to be registered. label (str) A user-defined description string which can be used to differentiate between tables and types with otherwise identical schemas. properties (dict of str to lists of str) Each key-value pair specifies the properties to use for a given column where the key is the column name. All keys used must be relevant column names for the given table. Specifying any property overrides the default properties for that column (which is based on the column's data type). Allowed values are: * **data** -- Default property for all numeric and string type columns; makes the column available for GPU queries. * **text_search** -- Valid only for 'string' columns. Enables full text search for string columns. Can be set independently of *data* and *store_only*. * **store_only** -- Persist the column value but do not make it available to queries (e.g. :meth:`.filter`)-i.e. it is mutually exclusive to the *data* property. Any 'bytes' type column must have a *store_only* property. This property reduces system memory usage. * **disk_optimized** -- Works in conjunction with the *data* property for string columns. This property reduces system disk usage by disabling reverse string lookups. Queries like :meth:`.filter`, :meth:`.filter_by_list`, and :meth:`.filter_by_value` work as usual but :meth:`.aggregate_unique`, :meth:`.aggregate_group_by` and :meth:`.get_records_by_column` are not allowed on columns with this property. * **timestamp** -- Valid only for 'long' columns. Indicates that this field represents a timestamp and will be provided in milliseconds since the Unix epoch: 00:00:00 Jan 1 1970. Dates represented by a timestamp must fall between the year 1000 and the year 2900. * **decimal** -- Valid only for 'string' columns. It represents a SQL type NUMERIC(19, 4) data type. There can be up to 15 digits before the decimal point and up to four digits in the fractional part. The value can be positive or negative (indicated by a minus sign at the beginning). This property is mutually exclusive with the *text_search* property. * **date** -- Valid only for 'string' columns. Indicates that this field represents a date and will be provided in the format 'YYYY-MM-DD'. The allowable range is 1000-01-01 through 2900-01-01. This property is mutually exclusive with the *text_search* property. * **time** -- Valid only for 'string' columns. Indicates that this field represents a time-of-day and will be provided in the format 'HH:MM:SS.mmm'. The allowable range is 00:00:00.000 through 23:59:59.999. This property is mutually exclusive with the *text_search* property. * **datetime** -- Valid only for 'string' columns. Indicates that this field represents a datetime and will be provided in the format 'YYYY-MM-DD HH:MM:SS.mmm'. The allowable range is 1000-01-01 00:00:00.000 through 2900-01-01 23:59:59.999. This property is mutually exclusive with the *text_search* property. * **char1** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 1 character. * **char2** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 2 characters. * **char4** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 4 characters. * **char8** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 8 characters. * **char16** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 16 characters. * **char32** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 32 characters. * **char64** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 64 characters. * **char128** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 128 characters. * **char256** -- This property provides optimized memory, disk and query performance for string columns. Strings with this property must be no longer than 256 characters. * **int8** -- This property provides optimized memory and query performance for int columns. Ints with this property must be between -128 and +127 (inclusive) * **int16** -- This property provides optimized memory and query performance for int columns. Ints with this property must be between -32768 and +32767 (inclusive) * **ipv4** -- This property provides optimized memory, disk and query performance for string columns representing IPv4 addresses (i.e. 192.168.1.1). Strings with this property must be of the form: A.B.C.D where A, B, C and D are in the range of 0-255. * **wkt** -- Valid only for 'string' and 'bytes' columns. Indicates that this field contains geospatial geometry objects in Well-Known Text (WKT) or Well-Known Binary (WKB) format. * **primary_key** -- This property indicates that this column will be part of (or the entire) `primary key <../../../concepts/tables.html#primary-keys>`_. * **shard_key** -- This property indicates that this column will be part of (or the entire) `shard key <../../../concepts/tables.html#shard-keys>`_. * **nullable** -- This property indicates that this column is nullable. However, setting this property is insufficient for making the column nullable. The user must declare the type of the column as a union between its regular type and 'null' in the avro schema for the record type in input parameter *type_definition*. For example, if a column is of type integer and is nullable, then the entry for the column in the avro schema must be: ['int', 'null']. The C++, C#, Java, and Python APIs have built-in convenience for bypassing setting the avro schema by hand. For those languages, one can use this property as usual and not have to worry about the avro schema for the record. * **dict** -- This property indicates that this column should be dictionary encoded. It can only be used in conjunction with string columns marked with a charN or date property or with int or long columns. This property is appropriate for columns where the cardinality (the number of unique values) is expected to be low, and can save a large amount of memory. * **init_with_now** -- For columns with attributes of date, time, datetime or timestamp, at insert time, replace empty strings and invalid timestamps with NOW() The default value is an empty dict ( {} ). options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- type_id (str) An identifier representing the created type. This type_id can be used in subsequent calls to :meth:`create a table <.create_table>` type_definition (str) Value of input parameter *type_definition*. label (str) Value of input parameter *label*. properties (dict of str to lists of str) Value of input parameter *properties*. """ assert isinstance( type_definition, (basestring)), "create_type(): Argument 'type_definition' must be (one) of type(s) '(basestring)'; given %s" % type( type_definition ).__name__ assert isinstance( label, (basestring)), "create_type(): Argument 'label' must be (one) of type(s) '(basestring)'; given %s" % type( label ).__name__ assert isinstance( properties, (dict)), "create_type(): Argument 'properties' must be (one) of type(s) '(dict)'; given %s" % type( properties ).__name__ assert isinstance( options, (dict)), "create_type(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/type" ) obj = {} obj['type_definition'] = type_definition obj['label'] = label obj['properties'] = self.__sanitize_dicts( properties ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/type' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create a record type for this type and save it record_type = RecordType.from_type_schema( response["label"], response["type_definition"], response["properties"] ) self.save_known_type( response["type_id"], record_type) return AttrDict( response )
# end create_type # begin create_union
[docs] def create_union( self, table_name = None, table_names = None, input_column_names = None, output_column_names = None, options = {} ): """Merges data from one or more tables with comparable data types into a new table. The following merges are supported: UNION (DISTINCT/ALL) - For data set union details and examples, see `Union <../../../concepts/unions.html>`_. For limitations, see `Union Limitations and Cautions <../../../concepts/unions.html#limitations-and-cautions>`_. INTERSECT (DISTINCT/ALL) - For data set intersection details and examples, see `Intersect <../../../concepts/intersect.html>`_. For limitations, see `Intersect Limitations <../../../concepts/intersect.html#limitations>`_. EXCEPT (DISTINCT/ALL) - For data set subtraction details and examples, see `Except <../../../concepts/except.html>`_. For limitations, see `Except Limitations <../../../concepts/except.html#limitations>`_. MERGE VIEWS - For a given set of `filtered views <../../../concepts/filtered_views.html>`_ on a single table, creates a single filtered view containing all of the unique records across all of the given filtered data sets. Non-charN 'string' and 'bytes' column types cannot be merged, nor can columns marked as `store-only <../../../concepts/types.html#data-handling>`_. Parameters: table_name (str) Name of the table to be created. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. table_names (list of str) The list of table names to merge. Must contain the names of one or more existing tables. The user can provide a single element (which will be automatically promoted to a list internally) or a list. input_column_names (list of lists of str) The list of columns from each of the corresponding input tables. The user can provide a single element (which will be automatically promoted to a list internally) or a list. output_column_names (list of str) The list of names of the columns to be stored in the output table. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the output table. If the collection provided is non-existent, the collection will be automatically created. If empty, the output table will be a top-level table. The default value is ''. * **materialize_on_gpu** -- If *true*, then the columns of the output table will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **mode** -- If *merge_views*, then this operation will merge the provided views. All input parameter *table_names* must be views from the same underlying base table. Allowed values are: * **union_all** -- Retains all rows from the specified tables. * **union** -- Retains all unique rows from the specified tables (synonym for *union_distinct*). * **union_distinct** -- Retains all unique rows from the specified tables. * **except** -- Retains all unique rows from the first table that do not appear in the second table (only works on 2 tables). * **except_all** -- Retains all rows(including duplicates) from the first table that do not appear in the second table (only works on 2 tables). * **intersect** -- Retains all unique rows that appear in both of the specified tables (only works on 2 tables). * **intersect_all** -- Retains all rows(including duplicates) that appear in both of the specified tables (only works on 2 tables). * **merge_views** -- Merge two or more views (or views of views) of the same base data set into a new view. If this mode is selected input parameter *input_column_names* AND input parameter *output_column_names* must be empty. The resulting view would match the results of a SQL OR operation, e.g., if filter 1 creates a view using the expression 'x = 20' and filter 2 creates a view using the expression 'x <= 10', then the merge views operation creates a new view using the expression 'x = 20 OR x <= 10'. The default value is 'union_all'. * **chunk_size** -- Indicates the chunk size to be used for this table. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the output table. The columns specified must be present in input parameter *output_column_names*. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in input parameter *table_name*. * **persist** -- If *true*, then the table specified in input parameter *table_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **view_id** -- view the output table will be a part of. The default value is ''. * **force_replicated** -- If *true*, then the table specified in input parameter *table_name* will be replicated even if the source tables are not. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. """ assert isinstance( table_name, (basestring)), "create_union(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) input_column_names = input_column_names if isinstance( input_column_names, list ) else ( [] if (input_column_names is None) else [ input_column_names ] ) output_column_names = output_column_names if isinstance( output_column_names, list ) else ( [] if (output_column_names is None) else [ output_column_names ] ) assert isinstance( options, (dict)), "create_union(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/union" ) obj = {} obj['table_name'] = table_name obj['table_names'] = table_names obj['input_column_names'] = input_column_names obj['output_column_names'] = output_column_names obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/union' ) return AttrDict( response )
# end create_union # begin create_user_external
[docs] def create_user_external( self, name = None, options = {} ): """Creates a new external user (a user whose credentials are managed by an external LDAP). Parameters: name (str) Name of the user to be created. Must exactly match the user's name in the external LDAP, prefixed with a @. Must not be the same name as an existing user. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. """ assert isinstance( name, (basestring)), "create_user_external(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( options, (dict)), "create_user_external(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/user/external" ) obj = {} obj['name'] = name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/user/external' ) return AttrDict( response )
# end create_user_external # begin create_user_internal
[docs] def create_user_internal( self, name = None, password = None, options = {} ): """Creates a new internal user (a user whose credentials are managed by the database system). Parameters: name (str) Name of the user to be created. Must contain only lowercase letters, digits, and underscores, and cannot begin with a digit. Must not be the same name as an existing user or role. password (str) Initial password of the user to be created. May be an empty string for no password. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. """ assert isinstance( name, (basestring)), "create_user_internal(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( password, (basestring)), "create_user_internal(): Argument 'password' must be (one) of type(s) '(basestring)'; given %s" % type( password ).__name__ assert isinstance( options, (dict)), "create_user_internal(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/create/user/internal" ) obj = {} obj['name'] = name obj['password'] = password obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/create/user/internal' ) return AttrDict( response )
# end create_user_internal # begin delete_proc
[docs] def delete_proc( self, proc_name = None, options = {} ): """Deletes a proc. Any currently running instances of the proc will be killed. Parameters: proc_name (str) Name of the proc to be deleted. Must be the name of a currently existing proc. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- proc_name (str) Value of input parameter *proc_name*. """ assert isinstance( proc_name, (basestring)), "delete_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__ assert isinstance( options, (dict)), "delete_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/delete/proc" ) obj = {} obj['proc_name'] = proc_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/delete/proc' ) return AttrDict( response )
# end delete_proc # begin delete_records
[docs] def delete_records( self, table_name = None, expressions = None, options = {} ): """Deletes record(s) matching the provided criteria from the given table. The record selection criteria can either be one or more input parameter *expressions* (matching multiple records), a single record identified by *record_id* options, or all records when using *delete_all_records*. Note that the three selection criteria are mutually exclusive. This operation cannot be run on a collection or a view. The operation is synchronous meaning that a response will not be available until the request is completely processed and all the matching records are deleted. Parameters: table_name (str) Name of the table from which to delete records. The set must be a currently existing table and not a collection or a view. expressions (list of str) A list of the actual predicates, one for each select; format should follow the guidelines provided `here <../../../concepts/expressions.html>`_. Specifying one or more input parameter *expressions* is mutually exclusive to specifying *record_id* in the input parameter *options*. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **global_expression** -- An optional global expression to reduce the search space of the input parameter *expressions*. The default value is ''. * **record_id** -- A record ID identifying a single record, obtained at the time of :meth:`insertion of the record <.insert_records>` or by calling :meth:`.get_records_from_collection` with the *return_record_ids* option. This option cannot be used to delete records from `replicated <../../../concepts/tables.html#replication>`_ tables. * **delete_all_records** -- If set to *true*, all records in the table will be deleted. If set to *false*, then the option is effectively ignored. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- count_deleted (long) Total number of records deleted across all expressions. counts_deleted (list of longs) Total number of records deleted per expression. """ assert isinstance( table_name, (basestring)), "delete_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ expressions = expressions if isinstance( expressions, list ) else ( [] if (expressions is None) else [ expressions ] ) assert isinstance( options, (dict)), "delete_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/delete/records" ) obj = {} obj['table_name'] = table_name obj['expressions'] = expressions obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/delete/records' ) return AttrDict( response )
# end delete_records # begin delete_role
[docs] def delete_role( self, name = None, options = {} ): """Deletes an existing role. Parameters: name (str) Name of the role to be deleted. Must be an existing role. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. """ assert isinstance( name, (basestring)), "delete_role(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( options, (dict)), "delete_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/delete/role" ) obj = {} obj['name'] = name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/delete/role' ) return AttrDict( response )
# end delete_role # begin delete_user
[docs] def delete_user( self, name = None, options = {} ): """Deletes an existing user. Parameters: name (str) Name of the user to be deleted. Must be an existing user. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. """ assert isinstance( name, (basestring)), "delete_user(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( options, (dict)), "delete_user(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/delete/user" ) obj = {} obj['name'] = name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/delete/user' ) return AttrDict( response )
# end delete_user # begin execute_proc
[docs] def execute_proc( self, proc_name = None, params = {}, bin_params = {}, input_table_names = [], input_column_names = {}, output_table_names = [], options = {} ): """Executes a proc. This endpoint is asynchronous and does not wait for the proc to complete before returning. Parameters: proc_name (str) Name of the proc to execute. Must be the name of a currently existing proc. params (dict of str to str) A map containing named parameters to pass to the proc. Each key/value pair specifies the name of a parameter and its value. The default value is an empty dict ( {} ). bin_params (dict of str to str) A map containing named binary parameters to pass to the proc. Each key/value pair specifies the name of a parameter and its value. The default value is an empty dict ( {} ). input_table_names (list of str) Names of the tables containing data to be passed to the proc. Each name specified must be the name of a currently existing table. If no table names are specified, no data will be passed to the proc. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. input_column_names (dict of str to lists of str) Map of table names from input parameter *input_table_names* to lists of names of columns from those tables that will be passed to the proc. Each column name specified must be the name of an existing column in the corresponding table. If a table name from input parameter *input_table_names* is not included, all columns from that table will be passed to the proc. The default value is an empty dict ( {} ). output_table_names (list of str) Names of the tables to which output data from the proc will be written. If a specified table does not exist, it will automatically be created with the same schema as the corresponding table (by order) from input parameter *input_table_names*, excluding any primary and shard keys. If a specified table is a non-persistent result table, it must not have primary or shard keys. If no table names are specified, no output data can be returned from the proc. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **cache_input** -- A comma-delimited list of table names from input parameter *input_table_names* from which input data will be cached for use in subsequent calls to :meth:`.execute_proc` with the *use_cached_input* option. Cached input data will be retained until the proc status is cleared with the :meth:`clear_complete <.show_proc_status>` option of :meth:`.show_proc_status` and all proc instances using the cached data have completed. The default value is ''. * **use_cached_input** -- A comma-delimited list of run IDs (as returned from prior calls to :meth:`.execute_proc`) of running or completed proc instances from which input data cached using the *cache_input* option will be used. Cached input data will not be used for any tables specified in input parameter *input_table_names*, but data from all other tables cached for the specified run IDs will be passed to the proc. If the same table was cached for multiple specified run IDs, the cached data from the first run ID specified in the list that includes that table will be used. The default value is ''. * **kifs_input_dirs** -- A comma-delimited list of KiFS directories whose local files will be made directly accessible to the proc through the API. (All KiFS files, local or not, are also accessible through the file system below the KiFS mount point.) Each name specified must the name of an existing KiFS directory. The default value is ''. Returns: A dict with the following entries-- run_id (str) The run ID of the running proc instance. This may be passed to :meth:`.show_proc_status` to obtain status information, or :meth:`.kill_proc` to kill the proc instance. """ assert isinstance( proc_name, (basestring)), "execute_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__ assert isinstance( params, (dict)), "execute_proc(): Argument 'params' must be (one) of type(s) '(dict)'; given %s" % type( params ).__name__ assert isinstance( bin_params, (dict)), "execute_proc(): Argument 'bin_params' must be (one) of type(s) '(dict)'; given %s" % type( bin_params ).__name__ input_table_names = input_table_names if isinstance( input_table_names, list ) else ( [] if (input_table_names is None) else [ input_table_names ] ) assert isinstance( input_column_names, (dict)), "execute_proc(): Argument 'input_column_names' must be (one) of type(s) '(dict)'; given %s" % type( input_column_names ).__name__ output_table_names = output_table_names if isinstance( output_table_names, list ) else ( [] if (output_table_names is None) else [ output_table_names ] ) assert isinstance( options, (dict)), "execute_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/execute/proc" ) obj = {} obj['proc_name'] = proc_name obj['params'] = self.__sanitize_dicts( params ) obj['bin_params'] = self.__sanitize_dicts( bin_params ) obj['input_table_names'] = input_table_names obj['input_column_names'] = self.__sanitize_dicts( input_column_names ) obj['output_table_names'] = output_table_names obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/execute/proc' ) return AttrDict( response )
# end execute_proc # begin filter
[docs] def filter( self, table_name = None, view_name = '', expression = None, options = {} ): """Filters data based on the specified expression. The results are stored in a `result set <../../../concepts/filtered_views.html>`_ with the given input parameter *view_name*. For details see `Expressions <../../../concepts/expressions.html>`_. The response message contains the number of points for which the expression evaluated to be true, which is equivalent to the size of the result view. Parameters: table_name (str) Name of the table to filter. This may be the ID of a collection, table or a result set (for chaining queries). If filtering a collection, all child tables where the filter expression is valid will be filtered; the filtered result tables will then be placed in a collection specified by input parameter *view_name*. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. expression (str) The select expression to filter the specified table. For details see `Expressions <../../../concepts/expressions.html>`_. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **view_id** -- view this filtered-view is part of. The default value is ''. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the view specified in input parameter *view_name*. Returns: A dict with the following entries-- count (long) The number of records that matched the given select expression. """ assert isinstance( table_name, (basestring)), "filter(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( expression, (basestring)), "filter(): Argument 'expression' must be (one) of type(s) '(basestring)'; given %s" % type( expression ).__name__ assert isinstance( options, (dict)), "filter(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['expression'] = expression obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter' ) return AttrDict( response )
# end filter # begin filter_by_area
[docs] def filter_by_area( self, table_name = None, view_name = '', x_column_name = None, x_vector = None, y_column_name = None, y_vector = None, options = {} ): """Calculates which objects from a table are within a named area of interest (NAI/polygon). The operation is synchronous, meaning that a response will not be returned until all the matching objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input NAI restriction specification is created with the name input parameter *view_name* passed in as part of the input. Parameters: table_name (str) Name of the table to filter. This may be the name of a collection, a table or a view (when chaining queries). If filtering a collection, all child tables where the filter expression is valid will be filtered; the filtered result tables will then be placed in a collection specified by input parameter *view_name*. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. x_column_name (str) Name of the column containing the x values to be filtered. x_vector (list of floats) List of x coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. y_column_name (str) Name of the column containing the y values to be filtered. y_vector (list of floats) List of y coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the area filter. """ assert isinstance( table_name, (basestring)), "filter_by_area(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_area(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( x_column_name, (basestring)), "filter_by_area(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ x_vector = x_vector if isinstance( x_vector, list ) else ( [] if (x_vector is None) else [ x_vector ] ) assert isinstance( y_column_name, (basestring)), "filter_by_area(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ y_vector = y_vector if isinstance( y_vector, list ) else ( [] if (y_vector is None) else [ y_vector ] ) assert isinstance( options, (dict)), "filter_by_area(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byarea" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['x_column_name'] = x_column_name obj['x_vector'] = x_vector obj['y_column_name'] = y_column_name obj['y_vector'] = y_vector obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byarea' ) return AttrDict( response )
# end filter_by_area # begin filter_by_area_geometry
[docs] def filter_by_area_geometry( self, table_name = None, view_name = '', column_name = None, x_vector = None, y_vector = None, options = {} ): """Calculates which geospatial geometry objects from a table intersect a named area of interest (NAI/polygon). The operation is synchronous, meaning that a response will not be returned until all the matching objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input NAI restriction specification is created with the name input parameter *view_name* passed in as part of the input. Parameters: table_name (str) Name of the table to filter. This may be the name of a collection, a table or a view (when chaining queries). If filtering a collection, all child tables where the filter expression is valid will be filtered; the filtered result tables will then be placed in a collection specified by input parameter *view_name*. view_name (str) If provided, then this will be the name of the view containing the results. Must not be an already existing collection, table or view. The default value is ''. column_name (str) Name of the geospatial geometry column to be filtered. x_vector (list of floats) List of x coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. y_vector (list of floats) List of y coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the area filter. """ assert isinstance( table_name, (basestring)), "filter_by_area_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_area_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_name, (basestring)), "filter_by_area_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ x_vector = x_vector if isinstance( x_vector, list ) else ( [] if (x_vector is None) else [ x_vector ] ) y_vector = y_vector if isinstance( y_vector, list ) else ( [] if (y_vector is None) else [ y_vector ] ) assert isinstance( options, (dict)), "filter_by_area_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byarea/geometry" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_name'] = column_name obj['x_vector'] = x_vector obj['y_vector'] = y_vector obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byarea/geometry' ) return AttrDict( response )
# end filter_by_area_geometry # begin filter_by_box
[docs] def filter_by_box( self, table_name = None, view_name = '', x_column_name = None, min_x = None, max_x = None, y_column_name = None, min_y = None, max_y = None, options = {} ): """Calculates how many objects within the given table lie in a rectangular box. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set which satisfies the input NAI restriction specification is also created when a input parameter *view_name* is passed in as part of the input payload. Parameters: table_name (str) Name of the table on which the bounding box operation will be performed. Must be an existing table. view_name (str) Optional name of the result view that will be created containing the results of the query. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. x_column_name (str) Name of the column on which to perform the bounding box query. Must be a valid numeric column. min_x (float) Lower bound for the column chosen by input parameter *x_column_name*. Must be less than or equal to input parameter *max_x*. max_x (float) Upper bound for input parameter *x_column_name*. Must be greater than or equal to input parameter *min_x*. y_column_name (str) Name of a column on which to perform the bounding box query. Must be a valid numeric column. min_y (float) Lower bound for input parameter *y_column_name*. Must be less than or equal to input parameter *max_y*. max_y (float) Upper bound for input parameter *y_column_name*. Must be greater than or equal to input parameter *min_y*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the box filter. """ assert isinstance( table_name, (basestring)), "filter_by_box(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_box(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( x_column_name, (basestring)), "filter_by_box(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( min_x, (int, long, float)), "filter_by_box(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "filter_by_box(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( y_column_name, (basestring)), "filter_by_box(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( min_y, (int, long, float)), "filter_by_box(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "filter_by_box(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( options, (dict)), "filter_by_box(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/bybox" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['x_column_name'] = x_column_name obj['min_x'] = min_x obj['max_x'] = max_x obj['y_column_name'] = y_column_name obj['min_y'] = min_y obj['max_y'] = max_y obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/bybox' ) return AttrDict( response )
# end filter_by_box # begin filter_by_box_geometry
[docs] def filter_by_box_geometry( self, table_name = None, view_name = '', column_name = None, min_x = None, max_x = None, min_y = None, max_y = None, options = {} ): """Calculates which geospatial geometry objects from a table intersect a rectangular box. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set which satisfies the input NAI restriction specification is also created when a input parameter *view_name* is passed in as part of the input payload. Parameters: table_name (str) Name of the table on which the bounding box operation will be performed. Must be an existing table. view_name (str) Optional name of the result view that will be created containing the results of the query. Must not be an already existing collection, table or view. The default value is ''. column_name (str) Name of the geospatial geometry column to be filtered. min_x (float) Lower bound for the x-coordinate of the rectangular box. Must be less than or equal to input parameter *max_x*. max_x (float) Upper bound for the x-coordinate of the rectangular box. Must be greater than or equal to input parameter *min_x*. min_y (float) Lower bound for the y-coordinate of the rectangular box. Must be less than or equal to input parameter *max_y*. max_y (float) Upper bound for the y-coordinate of the rectangular box. Must be greater than or equal to input parameter *min_y*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the box filter. """ assert isinstance( table_name, (basestring)), "filter_by_box_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_box_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_name, (basestring)), "filter_by_box_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( min_x, (int, long, float)), "filter_by_box_geometry(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "filter_by_box_geometry(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "filter_by_box_geometry(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "filter_by_box_geometry(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( options, (dict)), "filter_by_box_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/bybox/geometry" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_name'] = column_name obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/bybox/geometry' ) return AttrDict( response )
# end filter_by_box_geometry # begin filter_by_geometry
[docs] def filter_by_geometry( self, table_name = None, view_name = '', column_name = None, input_wkt = '', operation = None, options = {} ): """Applies a geometry filter against a geospatial geometry column in a given table, collection or view. The filtering geometry is provided by input parameter *input_wkt*. Parameters: table_name (str) Name of the table on which the filter by geometry will be performed. Must be an existing table, collection or view containing a geospatial geometry column. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. column_name (str) Name of the column to be used in the filter. Must be a geospatial geometry column. input_wkt (str) A geometry in WKT format that will be used to filter the objects in input parameter *table_name*. The default value is ''. operation (str) The geometric filtering operation to perform Allowed values are: * **contains** -- Matches records that contain the given WKT in input parameter *input_wkt*, i.e. the given WKT is within the bounds of a record's geometry. * **crosses** -- Matches records that cross the given WKT. * **disjoint** -- Matches records that are disjoint from the given WKT. * **equals** -- Matches records that are the same as the given WKT. * **intersects** -- Matches records that intersect the given WKT. * **overlaps** -- Matches records that overlap the given WKT. * **touches** -- Matches records that touch the given WKT. * **within** -- Matches records that are within the given WKT. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the geometry filter. """ assert isinstance( table_name, (basestring)), "filter_by_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_name, (basestring)), "filter_by_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( input_wkt, (basestring)), "filter_by_geometry(): Argument 'input_wkt' must be (one) of type(s) '(basestring)'; given %s" % type( input_wkt ).__name__ assert isinstance( operation, (basestring)), "filter_by_geometry(): Argument 'operation' must be (one) of type(s) '(basestring)'; given %s" % type( operation ).__name__ assert isinstance( options, (dict)), "filter_by_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/bygeometry" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_name'] = column_name obj['input_wkt'] = input_wkt obj['operation'] = operation obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/bygeometry' ) return AttrDict( response )
# end filter_by_geometry # begin filter_by_list
[docs] def filter_by_list( self, table_name = None, view_name = '', column_values_map = None, options = {} ): """Calculates which records from a table have values in the given list for the corresponding column. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input filter specification is also created if a input parameter *view_name* is passed in as part of the request. For example, if a type definition has the columns 'x' and 'y', then a filter by list query with the column map {"x":["10.1", "2.3"], "y":["0.0", "-31.5", "42.0"]} will return the count of all data points whose x and y values match both in the respective x- and y-lists, e.g., "x = 10.1 and y = 0.0", "x = 2.3 and y = -31.5", etc. However, a record with "x = 10.1 and y = -31.5" or "x = 2.3 and y = 0.0" would not be returned because the values in the given lists do not correspond. Parameters: table_name (str) Name of the table to filter. This may be the ID of a collection, table or a result set (for chaining queries). If filtering a collection, all child tables where the filter expression is valid will be filtered; the filtered result tables will then be placed in a collection specified by input parameter *view_name*. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. column_values_map (dict of str to lists of str) List of values for the corresponding column in the table options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **filter_mode** -- String indicating the filter mode, either 'in_list' or 'not_in_list'. Allowed values are: * **in_list** -- The filter will match all items that are in the provided list(s). * **not_in_list** -- The filter will match all items that are not in the provided list(s). The default value is 'in_list'. Returns: A dict with the following entries-- count (long) The number of records passing the list filter. """ assert isinstance( table_name, (basestring)), "filter_by_list(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_list(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_values_map, (dict)), "filter_by_list(): Argument 'column_values_map' must be (one) of type(s) '(dict)'; given %s" % type( column_values_map ).__name__ assert isinstance( options, (dict)), "filter_by_list(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/bylist" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_values_map'] = self.__sanitize_dicts( column_values_map ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/bylist' ) return AttrDict( response )
# end filter_by_list # begin filter_by_radius
[docs] def filter_by_radius( self, table_name = None, view_name = '', x_column_name = None, x_center = None, y_column_name = None, y_center = None, radius = None, options = {} ): """Calculates which objects from a table lie within a circle with the given radius and center point (i.e. circular NAI). The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input circular NAI restriction specification is also created if a input parameter *view_name* is passed in as part of the request. For track data, all track points that lie within the circle plus one point on either side of the circle (if the track goes beyond the circle) will be included in the result. Parameters: table_name (str) Name of the table on which the filter by radius operation will be performed. Must be an existing table. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. x_column_name (str) Name of the column to be used for the x-coordinate (the longitude) of the center. x_center (float) Value of the longitude of the center. Must be within [-180.0, 180.0]. The minimum allowed value is -180. The maximum allowed value is 180. y_column_name (str) Name of the column to be used for the y-coordinate-the latitude-of the center. y_center (float) Value of the latitude of the center. Must be within [-90.0, 90.0]. The minimum allowed value is -90. The maximum allowed value is 90. radius (float) The radius of the circle within which the search will be performed. Must be a non-zero positive value. It is in meters; so, for example, a value of '42000' means 42 km. The minimum allowed value is 0. The maximum allowed value is MAX_INT. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the radius filter. """ assert isinstance( table_name, (basestring)), "filter_by_radius(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_radius(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( x_column_name, (basestring)), "filter_by_radius(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( x_center, (int, long, float)), "filter_by_radius(): Argument 'x_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( x_center ).__name__ assert isinstance( y_column_name, (basestring)), "filter_by_radius(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( y_center, (int, long, float)), "filter_by_radius(): Argument 'y_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( y_center ).__name__ assert isinstance( radius, (int, long, float)), "filter_by_radius(): Argument 'radius' must be (one) of type(s) '(int, long, float)'; given %s" % type( radius ).__name__ assert isinstance( options, (dict)), "filter_by_radius(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byradius" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['x_column_name'] = x_column_name obj['x_center'] = x_center obj['y_column_name'] = y_column_name obj['y_center'] = y_center obj['radius'] = radius obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byradius' ) return AttrDict( response )
# end filter_by_radius # begin filter_by_radius_geometry
[docs] def filter_by_radius_geometry( self, table_name = None, view_name = '', column_name = None, x_center = None, y_center = None, radius = None, options = {} ): """Calculates which geospatial geometry objects from a table intersect a circle with the given radius and center point (i.e. circular NAI). The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input circular NAI restriction specification is also created if a input parameter *view_name* is passed in as part of the request. Parameters: table_name (str) Name of the table on which the filter by radius operation will be performed. Must be an existing table. view_name (str) If provided, then this will be the name of the view containing the results. Must not be an already existing collection, table or view. The default value is ''. column_name (str) Name of the geospatial geometry column to be filtered. x_center (float) Value of the longitude of the center. Must be within [-180.0, 180.0]. The minimum allowed value is -180. The maximum allowed value is 180. y_center (float) Value of the latitude of the center. Must be within [-90.0, 90.0]. The minimum allowed value is -90. The maximum allowed value is 90. radius (float) The radius of the circle within which the search will be performed. Must be a non-zero positive value. It is in meters; so, for example, a value of '42000' means 42 km. The minimum allowed value is 0. The maximum allowed value is MAX_INT. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the radius filter. """ assert isinstance( table_name, (basestring)), "filter_by_radius_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_radius_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_name, (basestring)), "filter_by_radius_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( x_center, (int, long, float)), "filter_by_radius_geometry(): Argument 'x_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( x_center ).__name__ assert isinstance( y_center, (int, long, float)), "filter_by_radius_geometry(): Argument 'y_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( y_center ).__name__ assert isinstance( radius, (int, long, float)), "filter_by_radius_geometry(): Argument 'radius' must be (one) of type(s) '(int, long, float)'; given %s" % type( radius ).__name__ assert isinstance( options, (dict)), "filter_by_radius_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byradius/geometry" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_name'] = column_name obj['x_center'] = x_center obj['y_center'] = y_center obj['radius'] = radius obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byradius/geometry' ) return AttrDict( response )
# end filter_by_radius_geometry # begin filter_by_range
[docs] def filter_by_range( self, table_name = None, view_name = '', column_name = None, lower_bound = None, upper_bound = None, options = {} ): """Calculates which objects from a table have a column that is within the given bounds. An object from the table identified by input parameter *table_name* is added to the view input parameter *view_name* if its column is within [input parameter *lower_bound*, input parameter *upper_bound*] (inclusive). The operation is synchronous. The response provides a count of the number of objects which passed the bound filter. Although this functionality can also be accomplished with the standard filter function, it is more efficient. For track objects, the count reflects how many points fall within the given bounds (which may not include all the track points of any given track). Parameters: table_name (str) Name of the table on which the filter by range operation will be performed. Must be an existing table. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. column_name (str) Name of a column on which the operation would be applied. lower_bound (float) Value of the lower bound (inclusive). upper_bound (float) Value of the upper bound (inclusive). options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the range filter. """ assert isinstance( table_name, (basestring)), "filter_by_range(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_range(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_name, (basestring)), "filter_by_range(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( lower_bound, (int, long, float)), "filter_by_range(): Argument 'lower_bound' must be (one) of type(s) '(int, long, float)'; given %s" % type( lower_bound ).__name__ assert isinstance( upper_bound, (int, long, float)), "filter_by_range(): Argument 'upper_bound' must be (one) of type(s) '(int, long, float)'; given %s" % type( upper_bound ).__name__ assert isinstance( options, (dict)), "filter_by_range(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byrange" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_name'] = column_name obj['lower_bound'] = lower_bound obj['upper_bound'] = upper_bound obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byrange' ) return AttrDict( response )
# end filter_by_range # begin filter_by_series
[docs] def filter_by_series( self, table_name = None, view_name = '', track_id = None, target_track_ids = None, options = {} ): """Filters objects matching all points of the given track (works only on track type data). It allows users to specify a particular track to find all other points in the table that fall within specified ranges-spatial and temporal-of all points of the given track. Additionally, the user can specify another track to see if the two intersect (or go close to each other within the specified ranges). The user also has the flexibility of using different metrics for the spatial distance calculation: Euclidean (flat geometry) or Great Circle (spherical geometry to approximate the Earth's surface distances). The filtered points are stored in a newly created result set. The return value of the function is the number of points in the resultant set (view). This operation is synchronous, meaning that a response will not be returned until all the objects are fully available. Parameters: table_name (str) Name of the table on which the filter by track operation will be performed. Must be a currently existing table with a `track <../../../geospatial/geo_objects.html>`_ present. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. track_id (str) The ID of the track which will act as the filtering points. Must be an existing track within the given table. target_track_ids (list of str) Up to one track ID to intersect with the "filter" track. If any provided, it must be an valid track ID within the given set. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **spatial_radius** -- A positive number passed as a string representing the radius of the search area centered around each track point's geospatial coordinates. The value is interpreted in meters. Required parameter. * **time_radius** -- A positive number passed as a string representing the maximum allowable time difference between the timestamps of a filtered object and the given track's points. The value is interpreted in seconds. Required parameter. * **spatial_distance_metric** -- A string representing the coordinate system to use for the spatial search criteria. Acceptable values are 'euclidean' and 'great_circle'. Optional parameter; default is 'euclidean'. Allowed values are: * euclidean * great_circle Returns: A dict with the following entries-- count (long) The number of records passing the series filter. """ assert isinstance( table_name, (basestring)), "filter_by_series(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_series(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( track_id, (basestring)), "filter_by_series(): Argument 'track_id' must be (one) of type(s) '(basestring)'; given %s" % type( track_id ).__name__ target_track_ids = target_track_ids if isinstance( target_track_ids, list ) else ( [] if (target_track_ids is None) else [ target_track_ids ] ) assert isinstance( options, (dict)), "filter_by_series(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byseries" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['track_id'] = track_id obj['target_track_ids'] = target_track_ids obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byseries' ) return AttrDict( response )
# end filter_by_series # begin filter_by_string
[docs] def filter_by_string( self, table_name = None, view_name = '', expression = None, mode = None, column_names = None, options = {} ): """Calculates which objects from a table, collection, or view match a string expression for the given string columns. The options 'case_sensitive' can be used to modify the behavior for all modes except 'search'. For 'search' mode details and limitations, see `Full Text Search <../../../concepts/full_text_search.html>`_. Parameters: table_name (str) Name of the table on which the filter operation will be performed. Must be an existing table, collection or view. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. expression (str) The expression with which to filter the table. mode (str) The string filtering mode to apply. See below for details. Allowed values are: * **search** -- Full text search query with wildcards and boolean operators. Note that for this mode, no column can be specified in input parameter *column_names*; all string columns of the table that have text search enabled will be searched. * **equals** -- Exact whole-string match (accelerated). * **contains** -- Partial substring match (not accelerated). If the column is a string type (non-charN) and the number of records is too large, it will return 0. * **starts_with** -- Strings that start with the given expression (not accelerated). If the column is a string type (non-charN) and the number of records is too large, it will return 0. * **regex** -- Full regular expression search (not accelerated). If the column is a string type (non-charN) and the number of records is too large, it will return 0. column_names (list of str) List of columns on which to apply the filter. Ignored for 'search' mode. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **case_sensitive** -- If 'false' then string filtering will ignore case. Does not apply to 'search' mode. Allowed values are: * true * false The default value is 'true'. Returns: A dict with the following entries-- count (long) The number of records that passed the string filter. """ assert isinstance( table_name, (basestring)), "filter_by_string(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_string(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( expression, (basestring)), "filter_by_string(): Argument 'expression' must be (one) of type(s) '(basestring)'; given %s" % type( expression ).__name__ assert isinstance( mode, (basestring)), "filter_by_string(): Argument 'mode' must be (one) of type(s) '(basestring)'; given %s" % type( mode ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( options, (dict)), "filter_by_string(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/bystring" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['expression'] = expression obj['mode'] = mode obj['column_names'] = column_names obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/bystring' ) return AttrDict( response )
# end filter_by_string # begin filter_by_table
[docs] def filter_by_table( self, table_name = None, view_name = '', column_name = None, source_table_name = None, source_table_column_name = None, options = {} ): """Filters objects in one table based on objects in another table. The user must specify matching column types from the two tables (i.e. the target table from which objects will be filtered and the source table based on which the filter will be created); the column names need not be the same. If a input parameter *view_name* is specified, then the filtered objects will then be put in a newly created view. The operation is synchronous, meaning that a response will not be returned until all objects are fully available in the result view. The return value contains the count (i.e. the size) of the resulting view. Parameters: table_name (str) Name of the table whose data will be filtered. Must be an existing table. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. column_name (str) Name of the column by whose value the data will be filtered from the table designated by input parameter *table_name*. source_table_name (str) Name of the table whose data will be compared against in the table called input parameter *table_name*. Must be an existing table. source_table_column_name (str) Name of the column in the input parameter *source_table_name* whose values will be used as the filter for table input parameter *table_name*. Must be a geospatial geometry column if in 'spatial' mode; otherwise, Must match the type of the input parameter *column_name*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **filter_mode** -- String indicating the filter mode, either *in_table* or *not_in_table*. Allowed values are: * in_table * not_in_table The default value is 'in_table'. * **mode** -- Mode - should be either *spatial* or *normal*. Allowed values are: * normal * spatial The default value is 'normal'. * **buffer** -- Buffer size, in meters. Only relevant for *spatial* mode. The default value is '0'. * **buffer_method** -- Method used to buffer polygons. Only relevant for *spatial* mode. Allowed values are: * **geos** -- Use geos 1 edge per corner algorithm The default value is 'normal'. * **max_partition_size** -- Maximum number of points in a partition. Only relevant for *spatial* mode. The default value is '0'. * **max_partition_score** -- Maximum number of points * edges in a partition. Only relevant for *spatial* mode. The default value is '8000000'. * **x_column_name** -- Name of column containing x value of point being filtered in *spatial* mode. The default value is 'x'. * **y_column_name** -- Name of column containing y value of point being filtered in *spatial* mode. The default value is 'y'. Returns: A dict with the following entries-- count (long) The number of records in input parameter *table_name* that have input parameter *column_name* values matching input parameter *source_table_column_name* values in input parameter *source_table_name*. """ assert isinstance( table_name, (basestring)), "filter_by_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_table(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( column_name, (basestring)), "filter_by_table(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( source_table_name, (basestring)), "filter_by_table(): Argument 'source_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( source_table_name ).__name__ assert isinstance( source_table_column_name, (basestring)), "filter_by_table(): Argument 'source_table_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( source_table_column_name ).__name__ assert isinstance( options, (dict)), "filter_by_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/bytable" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['column_name'] = column_name obj['source_table_name'] = source_table_name obj['source_table_column_name'] = source_table_column_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/bytable' ) return AttrDict( response )
# end filter_by_table # begin filter_by_value
[docs] def filter_by_value( self, table_name = None, view_name = '', is_string = None, value = 0, value_str = '', column_name = None, options = {} ): """Calculates which objects from a table has a particular value for a particular column. The input parameters provide a way to specify either a String or a Double valued column and a desired value for the column on which the filter is performed. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new result view which satisfies the input filter restriction specification is also created with a view name passed in as part of the input payload. Although this functionality can also be accomplished with the standard filter function, it is more efficient. Parameters: table_name (str) Name of an existing table on which to perform the calculation. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. is_string (bool) Indicates whether the value being searched for is string or numeric. value (float) The value to search for. The default value is 0. value_str (str) The string value to search for. The default value is ''. column_name (str) Name of a column on which the filter by value would be applied. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. Returns: A dict with the following entries-- count (long) The number of records passing the value filter. """ assert isinstance( table_name, (basestring)), "filter_by_value(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( view_name, (basestring)), "filter_by_value(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ assert isinstance( is_string, (bool)), "filter_by_value(): Argument 'is_string' must be (one) of type(s) '(bool)'; given %s" % type( is_string ).__name__ assert isinstance( value, (int, long, float)), "filter_by_value(): Argument 'value' must be (one) of type(s) '(int, long, float)'; given %s" % type( value ).__name__ assert isinstance( value_str, (basestring)), "filter_by_value(): Argument 'value_str' must be (one) of type(s) '(basestring)'; given %s" % type( value_str ).__name__ assert isinstance( column_name, (basestring)), "filter_by_value(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__ assert isinstance( options, (dict)), "filter_by_value(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/filter/byvalue" ) obj = {} obj['table_name'] = table_name obj['view_name'] = view_name obj['is_string'] = is_string obj['value'] = value obj['value_str'] = value_str obj['column_name'] = column_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/filter/byvalue' ) return AttrDict( response )
# end filter_by_value # begin get_job
[docs] def get_job( self, job_id = None, options = {} ): """ Parameters: job_id (int) A unique identifier for the job whose status and result is to be fetched. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- endpoint (str) The endpoint which is being executed asynchronously. E.g. '/alter/table'. job_status (str) Status of the submitted job. Allowed values are: * **RUNNING** -- The job is currently executing. * **DONE** -- The job execution has successfully completed and the response is included in the output parameter *job_response* or output parameter *job_response_str* field * **ERROR** -- The job was attempted, but an error was encountered. The output parameter *status_map* contains the details of the error in error_message * **CANCELLED** -- Job cancellation was requested while the execution was in progress. running (bool) True if the end point is still executing. progress (int) Approximate percentage of the job completed. successful (bool) True if the job execution completed and no errors were encountered. response_encoding (str) The encoding of the job result (contained in output parameter *job_response* or output parameter *job_response_str*. Allowed values are: * **binary** -- The job result is binary-encoded. It is contained in output parameter *job_response*. * **json** -- The job result is json-encoded. It is contained in output parameter *job_response_str*. job_response (str) The binary-encoded response of the job. This field is populated only when the job has completed and output parameter *response_encoding* is *binary* job_response_str (str) The json-encoded response of the job. This field is populated only when the job has completed and output parameter *response_encoding* is *json* status_map (dict of str to str) Map of various status strings for the executed job. Allowed keys are: * **error_message** -- Explains what error occurred while running the job asynchronously. This entry only exists when the job status is *ERROR*. """ assert isinstance( job_id, (int, long, float)), "get_job(): Argument 'job_id' must be (one) of type(s) '(int, long, float)'; given %s" % type( job_id ).__name__ assert isinstance( options, (dict)), "get_job(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/get/job" ) obj = {} obj['job_id'] = job_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/get/job' ) return AttrDict( response )
# end get_job # begin get_records
[docs] def get_records( self, table_name = None, offset = 0, limit = 10000, encoding = 'binary', options = {}, get_record_type = True ): """Retrieves records from a given table, optionally filtered by an expression and/or sorted by a column. This operation can be performed on tables, views, or on homogeneous collections (collections containing tables of all the same type). Records can be returned encoded as binary, json or geojson. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when paging through a table, if the table (or the underlying table in case of a view) is updated (records are inserted, deleted or modified) the records retrieved may differ between calls based on the updates applied. Parameters: table_name (str) Name of the table from which the records will be fetched. Must be a table, view or homogeneous collection. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The default value is 0.The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records. Allowed values are: * binary * json * geojson The default value is 'binary'. options (dict of str to str) The default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **fast_index_lookup** -- Indicates if indexes should be used to perform the lookup for a given expression if possible. Only applicable if there is no sorting, the expression contains only equivalence comparisons based on existing tables indexes and the range of requested values is from [0 to END_OF_SET]. Allowed values are: * true * false The default value is 'true'. * **sort_by** -- Optional column that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. get_record_type (bool) If True, deduce and return the record type for the returned records. Default is True. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. type_name (str) type_schema (str) Avro schema of output parameter *records_binary* or output parameter *records_json* records_binary (list of str) If the input parameter *encoding* was 'binary', then this list contains the binary encoded records retrieved from the table, otherwise not populated. records_json (list of str) If the input parameter *encoding* was 'json', then this list contains the JSON encoded records retrieved from the table. If the input parameter *encoding* was 'geojson' this list contains a single entry consisting of a GeoJSON FeatureCollection containing a feature per record. Otherwise not populated. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. record_type (:class:`RecordType` or None) A :class:`RecordType` object using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data`. Available only if get_record_type is True. """ assert isinstance( table_name, (basestring)), "get_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( offset, (int, long, float)), "get_records(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( isinstance(get_record_type, bool) ), "get_records: Argument 'get_record_type' must be a boolean; given %s" % type( get_record_type ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/get/records" ) obj = {} obj['table_name'] = table_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/get/records' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record type and save it in the response, if user asks for it if get_record_type: record_type = self.get_known_type( response["type_name"] ) response["record_type"] = record_type return AttrDict( response )
# end get_records # begin get_records_and_decode
[docs] def get_records_and_decode( self, table_name = None, offset = 0, limit = 10000, encoding = 'binary', options = {}, record_type = None, force_primitive_return_types = True ): """Retrieves records from a given table, optionally filtered by an expression and/or sorted by a column. This operation can be performed on tables, views, or on homogeneous collections (collections containing tables of all the same type). Records can be returned encoded as binary, json or geojson. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when paging through a table, if the table (or the underlying table in case of a view) is updated (records are inserted, deleted or modified) the records retrieved may differ between calls based on the updates applied. Parameters: table_name (str) Name of the table from which the records will be fetched. Must be a table, view or homogeneous collection. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The default value is 0.The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records. Allowed values are: * binary * json * geojson The default value is 'binary'. options (dict of str to str) The default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **fast_index_lookup** -- Indicates if indexes should be used to perform the lookup for a given expression if possible. Only applicable if there is no sorting, the expression contains only equivalence comparisons based on existing tables indexes and the range of requested values is from [0 to END_OF_SET]. Allowed values are: * true * false The default value is 'true'. * **sort_by** -- Optional column that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. record_type (:class:`RecordType` or None) The record type expected in the results, or None to determinethe appropriate type automatically. If known, providing thismay improve performance in binary mode. Not used in JSON mode.The default value is None. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. type_name (str) type_schema (str) Avro schema of output parameter *records_binary* or output parameter *records_json* total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "get_records_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( offset, (int, long, float)), "get_records_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "get_records_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ assert isinstance(force_primitive_return_types, bool), "get_records_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/get/records' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): record_type = record_type if record_type else self.get_known_type( response["type_name"] ) records = record_type.decode_records( raw_response, response["records_binary"] ) if force_primitive_return_types: records = _Util.convert_cext_records_to_ordered_dicts( records ) response["records"] = records else: response["records"] = [ json.loads(_r, object_pairs_hook = collections.OrderedDict) for _r in response["records_json"] ] # end if del response["records_binary"] del response["records_json"] return AttrDict( response )
# end get_records_and_decode # begin get_records_by_column
[docs] def get_records_by_column( self, table_name = None, column_names = None, offset = None, limit = None, encoding = 'binary', options = {} ): """For a given table, retrieves the values from the requested column(s). Maps of column name to the array of values as well as the column data type are returned. This endpoint supports pagination with the input parameter *offset* and input parameter *limit* parameters. `Window functions <../../../concepts/window.html>`_, which can perform operations like moving averages, are available through this endpoint as well as :meth:`.create_projection`. When using pagination, if the table (or the underlying table in the case of a view) is modified (records are inserted, updated, or deleted) during a call to the endpoint, the records or values retrieved may differ between calls based on the type of the update, e.g., the contiguity across pages cannot be relied upon. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. Parameters: table_name (str) Name of the table on which this operation will be performed. The table cannot be a parent set. column_names (list of str) The list of column values to retrieve. The user can provide a single element (which will be automatically promoted to a list internally) or a list. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned (if not provided the default is 10000), or END_OF_SET (-9999) to indicate that the maximum number of results allowed by the server should be returned. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) The default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **sort_by** -- Optional column(s) that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The default value is ''. * **convert_wkts_to_wkbs** -- If true, then WKT string columns will be returned as WKB bytes. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) The same table name as was passed in the parameter list. response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. binary_encoded_response (str) Avro binary encoded response. json_encoded_response (str) Avro JSON encoded response. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. record_type (:class:`RecordType` or None) A :class:`RecordType` object using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data`. If JSON encodingis used, then None. """ assert isinstance( table_name, (basestring)), "get_records_by_column(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( offset, (int, long, float)), "get_records_by_column(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_by_column(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_by_column(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_by_column(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/get/records/bycolumn" ) obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/get/records/bycolumn' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record type and save it in the response, if applicable if encoding == "binary": record_type = RecordType.from_dynamic_schema( response["response_schema_str"], response["binary_encoded_response"] ) response["record_type"] = record_type else: response["record_type"] = None return AttrDict( response )
# end get_records_by_column # begin get_records_by_column_and_decode
[docs] def get_records_by_column_and_decode( self, table_name = None, column_names = None, offset = None, limit = None, encoding = 'binary', options = {}, record_type = None, force_primitive_return_types = True, get_column_major = True ): """For a given table, retrieves the values from the requested column(s). Maps of column name to the array of values as well as the column data type are returned. This endpoint supports pagination with the input parameter *offset* and input parameter *limit* parameters. `Window functions <../../../concepts/window.html>`_, which can perform operations like moving averages, are available through this endpoint as well as :meth:`.create_projection`. When using pagination, if the table (or the underlying table in the case of a view) is modified (records are inserted, updated, or deleted) during a call to the endpoint, the records or values retrieved may differ between calls based on the type of the update, e.g., the contiguity across pages cannot be relied upon. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. Parameters: table_name (str) Name of the table on which this operation will be performed. The table cannot be a parent set. column_names (list of str) The list of column values to retrieve. The user can provide a single element (which will be automatically promoted to a list internally) or a list. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned (if not provided the default is 10000), or END_OF_SET (-9999) to indicate that the maximum number of results allowed by the server should be returned. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) The default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **sort_by** -- Optional column(s) that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The default value is ''. * **convert_wkts_to_wkbs** -- If true, then WKT string columns will be returned as WKB bytes. Allowed values are: * true * false The default value is 'false'. record_type (:class:`RecordType` or None) The record type expected in the results, or None to determinethe appropriate type automatically. If known, providing thismay improve performance in binary mode. Not used in JSON mode.The default value is None. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A dict with the following entries-- table_name (str) The same table name as was passed in the parameter list. response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "get_records_by_column_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] ) assert isinstance( offset, (int, long, float)), "get_records_by_column_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_by_column_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_by_column_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_by_column_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "get_records_by_column_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ assert isinstance(force_primitive_return_types, bool), "get_records_by_column_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ assert isinstance(get_column_major, bool), "get_records_by_column_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records/bycolumn", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['column_names'] = column_names obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/get/records/bycolumn' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): record_type = record_type if record_type else RecordType.from_dynamic_schema( response["response_schema_str"], raw_response, response["binary_encoded_response"] ) records = record_type.decode_dynamic_records( raw_response, response["binary_encoded_response"] ) if force_primitive_return_types: records = _Util.convert_cext_records_to_ordered_dicts( records ) # Transpose the data to column-major, if requested by the user if get_column_major: records = GPUdbRecord.transpose_data_to_col_major( records ) response["records"] = records else: records = json.loads( response["json_encoded_response"] ) if get_column_major: # Get column-major data records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response["response_schema_str"] ) else: # Get row-major data records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response["response_schema_str"] ) response["records"] = records # end if del response["binary_encoded_response"] del response["json_encoded_response"] return AttrDict( response )
# end get_records_by_column_and_decode # begin get_records_by_series
[docs] def get_records_by_series( self, table_name = None, world_table_name = None, offset = 0, limit = 250, encoding = 'binary', options = {} ): """Retrieves the complete series/track records from the given input parameter *world_table_name* based on the partial track information contained in the input parameter *table_name*. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. In contrast to :meth:`.get_records` this returns records grouped by series/track. So if input parameter *offset* is 0 and input parameter *limit* is 5 this operation would return the first 5 series/tracks in input parameter *table_name*. Each series/track will be returned sorted by their TIMESTAMP column. Parameters: table_name (str) Name of the collection/table/view for which series/tracks will be fetched. world_table_name (str) Name of the table containing the complete series/track information to be returned for the tracks present in the input parameter *table_name*. Typically this is used when retrieving series/tracks from a view (which contains partial series/tracks) but the user wants to retrieve the entire original series/tracks. Can be blank. offset (int) A positive integer indicating the number of initial series/tracks to skip (useful for paging through the results). The default value is 0.The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (int) A positive integer indicating the maximum number of series/tracks to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 250. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- table_names (list of str) The table name (one per series/track) of the returned series/tracks. type_names (list of str) The type IDs (one per series/track) of the returned series/tracks. This is useful when input parameter *table_name* is a collection and the returned series/tracks belong to tables with different types. type_schemas (list of str) The type schemas (one per series/track) of the returned series/tracks. list_records_binary (list of lists of str) If the encoding parameter of the request was 'binary' then this list-of-lists contains the binary encoded records for each object (inner list) in each series/track (outer list). Otherwise, empty list-of-lists. list_records_json (list of lists of str) If the encoding parameter of the request was 'json' then this list-of-lists contains the json encoded records for each object (inner list) in each series/track (outer list). Otherwise, empty list-of-lists. record_types (list of :class:`RecordType`) A list of :class:`RecordType` objects using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data` per record. """ assert isinstance( table_name, (basestring)), "get_records_by_series(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( world_table_name, (basestring)), "get_records_by_series(): Argument 'world_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( world_table_name ).__name__ assert isinstance( offset, (int, long, float)), "get_records_by_series(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_by_series(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_by_series(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_by_series(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/get/records/byseries" ) obj = {} obj['table_name'] = table_name obj['world_table_name'] = world_table_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/get/records/byseries' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record types and save them in the response record_types = [ self.get_known_type( __type_id ) for __type_id in response["type_names"] ] response["record_types"] = record_types return AttrDict( response )
# end get_records_by_series # begin get_records_by_series_and_decode
[docs] def get_records_by_series_and_decode( self, table_name = None, world_table_name = None, offset = 0, limit = 250, encoding = 'binary', options = {}, force_primitive_return_types = True ): """Retrieves the complete series/track records from the given input parameter *world_table_name* based on the partial track information contained in the input parameter *table_name*. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. In contrast to :meth:`.get_records` this returns records grouped by series/track. So if input parameter *offset* is 0 and input parameter *limit* is 5 this operation would return the first 5 series/tracks in input parameter *table_name*. Each series/track will be returned sorted by their TIMESTAMP column. Parameters: table_name (str) Name of the collection/table/view for which series/tracks will be fetched. world_table_name (str) Name of the table containing the complete series/track information to be returned for the tracks present in the input parameter *table_name*. Typically this is used when retrieving series/tracks from a view (which contains partial series/tracks) but the user wants to retrieve the entire original series/tracks. Can be blank. offset (int) A positive integer indicating the number of initial series/tracks to skip (useful for paging through the results). The default value is 0.The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (int) A positive integer indicating the maximum number of series/tracks to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 250. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A dict with the following entries-- table_names (list of str) The table name (one per series/track) of the returned series/tracks. type_names (list of str) The type IDs (one per series/track) of the returned series/tracks. This is useful when input parameter *table_name* is a collection and the returned series/tracks belong to tables with different types. type_schemas (list of str) The type schemas (one per series/track) of the returned series/tracks. records (list of list of :class:`Record`) A list of list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "get_records_by_series_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( world_table_name, (basestring)), "get_records_by_series_and_decode(): Argument 'world_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( world_table_name ).__name__ assert isinstance( offset, (int, long, float)), "get_records_by_series_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_by_series_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_by_series_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_by_series_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert isinstance(force_primitive_return_types, bool), "get_records_by_series_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records/byseries", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['world_table_name'] = world_table_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/get/records/byseries' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): _record_types = [ self.get_known_type( _type_id ) for _type_id in response["type_names"] ] records = [ _rt.decode_records( raw_response, _records ) for _rt, _records in zip( _record_types, response["list_records_binary"] ) ] if force_primitive_return_types: records = [ _Util.convert_cext_records_to_ordered_dicts( _records ) for _records in records] response["records"] = records else: response["records"] = [ [ json.loads(_record, object_pairs_hook = collections.OrderedDict) for _record in _records ] for _records in response["list_records_json"] ] # end if del response["list_records_binary"] del response["list_records_json"] return AttrDict( response )
# end get_records_by_series_and_decode # begin get_records_from_collection
[docs] def get_records_from_collection( self, table_name = None, offset = 0, limit = 10000, encoding = 'binary', options = {} ): """Retrieves records from a collection. The operation can optionally return the record IDs which can be used in certain queries such as :meth:`.delete_records`. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when using the Java API, it is not possible to retrieve records from join tables using this operation. Parameters: table_name (str) Name of the collection or table from which records are to be retrieved. Must be an existing collection or table. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The default value is 0.The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned, or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) The default value is an empty dict ( {} ). Allowed keys are: * **return_record_ids** -- If 'true' then return the internal record ID along with each returned record. Default is 'false'. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. type_names (list of str) The type IDs of the corresponding records in output parameter *records_binary* or output parameter *records_json*. This is useful when input parameter *table_name* is a heterogeneous collection (collections containing tables of different types). records_binary (list of str) If the encoding parameter of the request was 'binary' then this list contains the binary encoded records retrieved from the table/collection. Otherwise, empty list. records_json (list of str) If the encoding parameter of the request was 'json', then this list contains the JSON encoded records retrieved from the table/collection. Otherwise, empty list. record_ids (list of str) If the 'return_record_ids' option of the request was 'true', then this list contains the internal ID for each object. Otherwise it will be empty. record_types (list of :class:`RecordType`) A list of :class:`RecordType` objects using which the user can decode the binarydata by using :meth:`GPUdbRecord.decode_binary_data` per record. """ assert isinstance( table_name, (basestring)), "get_records_from_collection(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( offset, (int, long, float)), "get_records_from_collection(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_from_collection(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_from_collection(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_from_collection(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/get/records/fromcollection" ) obj = {} obj['table_name'] = table_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/get/records/fromcollection' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create the record types and save them in the response record_types = [ self.get_known_type( __type_id ) for __type_id in response["type_names"] ] response["record_types"] = record_types return AttrDict( response )
# end get_records_from_collection # begin get_records_from_collection_and_decode
[docs] def get_records_from_collection_and_decode( self, table_name = None, offset = 0, limit = 10000, encoding = 'binary', options = {}, force_primitive_return_types = True ): """Retrieves records from a collection. The operation can optionally return the record IDs which can be used in certain queries such as :meth:`.delete_records`. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when using the Java API, it is not possible to retrieve records from join tables using this operation. Parameters: table_name (str) Name of the collection or table from which records are to be retrieved. Must be an existing collection or table. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The default value is 0.The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned, or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) The default value is an empty dict ( {} ). Allowed keys are: * **return_record_ids** -- If 'true' then return the internal record ID along with each returned record. Default is 'false'. Allowed values are: * true * false The default value is 'false'. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. type_names (list of str) The type IDs of the corresponding records in output parameter *records_binary* or output parameter *records_json*. This is useful when input parameter *table_name* is a heterogeneous collection (collections containing tables of different types). record_ids (list of str) If the 'return_record_ids' option of the request was 'true', then this list contains the internal ID for each object. Otherwise it will be empty. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. """ assert isinstance( table_name, (basestring)), "get_records_from_collection_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( offset, (int, long, float)), "get_records_from_collection_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__ assert isinstance( limit, (int, long, float)), "get_records_from_collection_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__ assert isinstance( encoding, (basestring)), "get_records_from_collection_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__ assert isinstance( options, (dict)), "get_records_from_collection_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert isinstance(force_primitive_return_types, bool), "get_records_from_collection_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__ (REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records/fromcollection", get_rsp_cext = True ) # Force JSON encoding if client encoding is json and method encoding # is binary (checking for binary so that we do not accidentally override # the GeoJSON encoding) if ( (self.encoding == "JSON") and (encoding == "binary") ): encoding = "json" obj = {} obj['table_name'] = table_name obj['offset'] = offset obj['limit'] = limit obj['encoding'] = encoding obj['options'] = self.__sanitize_dicts( options ) response, raw_response = self.__post_then_get_cext_raw( REQ_SCHEMA, RSP_SCHEMA_CEXT, obj, '/get/records/fromcollection' ) if not _Util.is_ok( response ): return AttrDict( response ) # Decode the data if (encoding == 'binary'): record_types = [ self.get_known_type( type_id ) for type_id in response["type_names"] ] records = [ rt.decode_records( raw_response, records )[ 0 ] for rt, records in zip( record_types, response["records_binary"] ) ] if force_primitive_return_types: records = _Util.convert_cext_records_to_ordered_dicts( records ) response["records"] = records else: response["records"] = [ json.loads(record, object_pairs_hook = collections.OrderedDict) for record in response["records_json"] ] # end if del response["records_binary"] del response["records_json"] return AttrDict( response )
# end get_records_from_collection_and_decode # begin grant_permission_system
[docs] def grant_permission_system( self, name = None, permission = None, options = {} ): """Grants a system-level permission to a user or role. Parameters: name (str) Name of the user or role to which the permission will be granted. Must be an existing user or role. permission (str) Permission to grant to the user or role. Allowed values are: * **system_admin** -- Full access to all data and system functions. * **system_write** -- Read and write access to all tables. * **system_read** -- Read-only access to all tables. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. permission (str) Value of input parameter *permission*. """ assert isinstance( name, (basestring)), "grant_permission_system(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( permission, (basestring)), "grant_permission_system(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__ assert isinstance( options, (dict)), "grant_permission_system(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/grant/permission/system" ) obj = {} obj['name'] = name obj['permission'] = permission obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/grant/permission/system' ) return AttrDict( response )
# end grant_permission_system # begin grant_permission_table
[docs] def grant_permission_table( self, name = None, permission = None, table_name = None, filter_expression = '', options = {} ): """Grants a table-level permission to a user or role. Parameters: name (str) Name of the user or role to which the permission will be granted. Must be an existing user or role. permission (str) Permission to grant to the user or role. Allowed values are: * **table_admin** -- Full read/write and administrative access to the table. * **table_insert** -- Insert access to the table. * **table_update** -- Update access to the table. * **table_delete** -- Delete access to the table. * **table_read** -- Read access to the table. table_name (str) Name of the table to which the permission grants access. Must be an existing table, collection, or view. If a collection, the permission also applies to tables and views in the collection. filter_expression (str) Reserved for future use. The default value is ''. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. permission (str) Value of input parameter *permission*. table_name (str) Value of input parameter *table_name*. filter_expression (str) Value of input parameter *filter_expression*. """ assert isinstance( name, (basestring)), "grant_permission_table(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( permission, (basestring)), "grant_permission_table(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__ assert isinstance( table_name, (basestring)), "grant_permission_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( filter_expression, (basestring)), "grant_permission_table(): Argument 'filter_expression' must be (one) of type(s) '(basestring)'; given %s" % type( filter_expression ).__name__ assert isinstance( options, (dict)), "grant_permission_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/grant/permission/table" ) obj = {} obj['name'] = name obj['permission'] = permission obj['table_name'] = table_name obj['filter_expression'] = filter_expression obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/grant/permission/table' ) return AttrDict( response )
# end grant_permission_table # begin grant_role
[docs] def grant_role( self, role = None, member = None, options = {} ): """Grants membership in a role to a user or role. Parameters: role (str) Name of the role in which membership will be granted. Must be an existing role. member (str) Name of the user or role that will be granted membership in input parameter *role*. Must be an existing user or role. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- role (str) Value of input parameter *role*. member (str) Value of input parameter *member*. """ assert isinstance( role, (basestring)), "grant_role(): Argument 'role' must be (one) of type(s) '(basestring)'; given %s" % type( role ).__name__ assert isinstance( member, (basestring)), "grant_role(): Argument 'member' must be (one) of type(s) '(basestring)'; given %s" % type( member ).__name__ assert isinstance( options, (dict)), "grant_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/grant/role" ) obj = {} obj['role'] = role obj['member'] = member obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/grant/role' ) return AttrDict( response )
# end grant_role # begin has_proc
[docs] def has_proc( self, proc_name = None, options = {} ): """Checks the existence of a proc with the given name. Parameters: proc_name (str) Name of the proc to check for existence. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- proc_name (str) Value of input parameter *proc_name* proc_exists (bool) Indicates whether the proc exists or not. Allowed values are: * true * false """ assert isinstance( proc_name, (basestring)), "has_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__ assert isinstance( options, (dict)), "has_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/has/proc" ) obj = {} obj['proc_name'] = proc_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/has/proc' ) return AttrDict( response )
# end has_proc # begin has_table
[docs] def has_table( self, table_name = None, options = {} ): """Checks for the existence of a table with the given name. Parameters: table_name (str) Name of the table to check for existence. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name* table_exists (bool) Indicates whether the table exists or not. Allowed values are: * true * false """ assert isinstance( table_name, (basestring)), "has_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( options, (dict)), "has_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/has/table" ) obj = {} obj['table_name'] = table_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/has/table' ) return AttrDict( response )
# end has_table # begin has_type
[docs] def has_type( self, type_id = None, options = {} ): """Check for the existence of a type. Parameters: type_id (str) Id of the type returned in response to :meth:`.create_type` request. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- type_id (str) Value of input parameter *type_id*. type_exists (bool) Indicates whether the type exists or not. Allowed values are: * true * false """ assert isinstance( type_id, (basestring)), "has_type(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__ assert isinstance( options, (dict)), "has_type(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/has/type" ) obj = {} obj['type_id'] = type_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/has/type' ) return AttrDict( response )
# end has_type # begin insert_records
[docs] def insert_records( self, table_name = None, data = None, list_encoding = None, options = {}, record_type = None ): """Adds multiple records to the specified table. The operation is synchronous, meaning that a response will not be returned until all the records are fully inserted and available. The response payload provides the counts of the number of records actually inserted and/or updated, and can provide the unique identifier of each added record. The input parameter *options* parameter can be used to customize this function's behavior. The *update_on_existing_pk* option specifies the record collision policy for inserting into a table with a `primary key <../../../concepts/tables.html#primary-keys>`_, but is ignored if no primary key exists. The *return_record_ids* option indicates that the database should return the unique identifiers of inserted records. Parameters: table_name (str) Table to which the records are to be added. Must be an existing table. data (list of Records) An array of *binary* or *json* encoded data, or :class:`Record` objects for the records to be added. The user can provide a single element (which will be automatically promoted to a list internally) or a list. The user can provide a single element (which will be automatically promoted to a list internally) or a list. list_encoding (str) The encoding of the records to be inserted. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **update_on_existing_pk** -- Specifies the record collision policy for inserting into a table with a `primary key <../../../concepts/tables.html#primary-keys>`_. If set to *true*, any existing table record with primary key values that match those of a record being inserted will be replaced by that new record. If set to *false*, any existing table record with primary key values that match those of a record being inserted will remain unchanged and the new record discarded. If the specified table does not have a primary key, then this option is ignored. Allowed values are: * true * false The default value is 'false'. * **return_record_ids** -- If *true* then return the internal record id along for each inserted record. Allowed values are: * true * false The default value is 'false'. record_type (RecordType) A :class:`RecordType` object using which the the binary data will be encoded. If None, then it is assumed that the data is already encoded, and no further encoding will occur. Default is None. Returns: A dict with the following entries-- record_ids (list of str) An array containing the IDs with which the added records are identified internally. count_inserted (int) The number of records inserted. count_updated (int) The number of records updated. """ assert isinstance( table_name, (basestring)), "insert_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ data = data if isinstance( data, list ) else ( [] if (data is None) else [ data ] ) assert isinstance( list_encoding, (basestring, type( None ))), "insert_records(): Argument 'list_encoding' must be (one) of type(s) '(basestring, type( None ))'; given %s" % type( list_encoding ).__name__ assert isinstance( options, (dict)), "insert_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "insert_records: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/insert/records" ) (REQ_SCHEMA_CEXT, RSP_SCHEMA) = self.__get_schemas( "/insert/records", get_req_cext = True ) obj = {} obj['table_name'] = table_name list_encoding = list_encoding if list_encoding else self.__client_to_object_encoding() obj['list_encoding'] = list_encoding obj['options'] = self.__sanitize_dicts( options ) if (list_encoding == 'binary'): # Convert the objects to proper Records use_object_array, data = _Util.convert_binary_data_to_cext_records( self, table_name, data, record_type ) if use_object_array: # First tuple element must be a RecordType or a Schema from the c-extension obj['list'] = (data[0].type, data) if data else () else: # use avro-encoded bytes for the data obj['list'] = data obj['list_str'] = [] else: obj['list_str'] = data obj['list'] = () # needs a tuple for the c-extension use_object_array = True # end if if use_object_array: response = self.__post_then_get_cext( REQ_SCHEMA_CEXT, RSP_SCHEMA, obj, '/insert/records' ) else: response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/insert/records' ) if not _Util.is_ok( response ): return AttrDict( response ) return AttrDict( response )
# end insert_records # begin insert_records_random
[docs] def insert_records_random( self, table_name = None, count = None, options = {} ): """Generates a specified number of random records and adds them to the given table. There is an optional parameter that allows the user to customize the ranges of the column values. It also allows the user to specify linear profiles for some or all columns in which case linear values are generated rather than random ones. Only individual tables are supported for this operation. This operation is synchronous, meaning that a response will not be returned until all random records are fully available. Parameters: table_name (str) Table to which random records will be added. Must be an existing table. Also, must be an individual table, not a collection of tables, nor a view of a table. count (long) Number of records to generate. options (dict of str to dicts of str to floats) Optional parameter to pass in specifications for the randomness of the values. This map is different from the *options* parameter of most other endpoints in that it is a map of string to map of string to doubles, while most others are maps of string to string. In this map, the top level keys represent which column's parameters are being specified, while the internal keys represents which parameter is being specified. These parameters take on different meanings depending on the type of the column. Below follows a more detailed description of the map:. The default value is an empty dict ( {} ). Allowed keys are: * **seed** -- If provided, the internal random number generator will be initialized with the given value. The minimum is 0. This allows for the same set of random numbers to be generated across invocation of this endpoint in case the user wants to repeat the test. Since input parameter *options*, is a map of maps, we need an internal map to provide the seed value. For example, to pass 100 as the seed value through this parameter, you need something equivalent to: 'options' = {'seed': { 'value': 100 } } Allowed keys are: * **value** -- Pass the seed value here. * **all** -- This key indicates that the specifications relayed in the internal map are to be applied to all columns of the records. Allowed keys are: * **min** -- For numerical columns, the minimum of the generated values is set to this value. Default is -99999. For point, shape, and track columns, min for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are -180.0 and -90.0. For the 'TIMESTAMP' column, the default minimum corresponds to Jan 1, 2010. For string columns, the minimum length of the randomly generated strings is set to this value (default is 0). If both minimum and maximum are provided, minimum must be less than or equal to max. Value needs to be within [0, 200]. If the min is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **max** -- For numerical columns, the maximum of the generated values is set to this value. Default is 99999. For point, shape, and track columns, max for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are 180.0 and 90.0. For string columns, the maximum length of the randomly generated strings is set to this value (default is 200). If both minimum and maximum are provided, *max* must be greater than or equal to *min*. Value needs to be within [0, 200]. If the *max* is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **interval** -- If specified, generate values for all columns evenly spaced with the given interval value. If a max value is specified for a given column the data is randomly generated between min and max and decimated down to the interval. If no max is provided the data is linerally generated starting at the minimum value (instead of generating random data). For non-decimated string-type columns the interval value is ignored. Instead the values are generated following the pattern: 'attrname_creationIndex#', i.e. the column name suffixed with an underscore and a running counter (starting at 0). For string types with limited size (eg char4) the prefix is dropped. No nulls will be generated for nullable columns. * **null_percentage** -- If specified, then generate the given percentage of the count as nulls for all nullable columns. This option will be ignored for non-nullable columns. The value must be within the range [0, 1.0]. The default value is 5% (0.05). * **cardinality** -- If specified, limit the randomly generated values to a fixed set. Not allowed on a column with interval specified, and is not applicable to WKT or Track-specific columns. The value must be greater than 0. This option is disabled by default. * **attr_name** -- Use the desired column name in place of *attr_name*, and set the following parameters for the column specified. This overrides any parameter set by *all*. Allowed keys are: * **min** -- For numerical columns, the minimum of the generated values is set to this value. Default is -99999. For point, shape, and track columns, min for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are -180.0 and -90.0. For the 'TIMESTAMP' column, the default minimum corresponds to Jan 1, 2010. For string columns, the minimum length of the randomly generated strings is set to this value (default is 0). If both minimum and maximum are provided, minimum must be less than or equal to max. Value needs to be within [0, 200]. If the min is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **max** -- For numerical columns, the maximum of the generated values is set to this value. Default is 99999. For point, shape, and track columns, max for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are 180.0 and 90.0. For string columns, the maximum length of the randomly generated strings is set to this value (default is 200). If both minimum and maximum are provided, *max* must be greater than or equal to *min*. Value needs to be within [0, 200]. If the *max* is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **interval** -- If specified, generate values for all columns evenly spaced with the given interval value. If a max value is specified for a given column the data is randomly generated between min and max and decimated down to the interval. If no max is provided the data is linerally generated starting at the minimum value (instead of generating random data). For non-decimated string-type columns the interval value is ignored. Instead the values are generated following the pattern: 'attrname_creationIndex#', i.e. the column name suffixed with an underscore and a running counter (starting at 0). For string types with limited size (eg char4) the prefix is dropped. No nulls will be generated for nullable columns. * **null_percentage** -- If specified and if this column is nullable, then generate the given percentage of the count as nulls. This option will result in an error if the column is not nullable. The value must be within the range [0, 1.0]. The default value is 5% (0.05). * **cardinality** -- If specified, limit the randomly generated values to a fixed set. Not allowed on a column with interval specified, and is not applicable to WKT or Track-specific columns. The value must be greater than 0. This option is disabled by default. * **track_length** -- This key-map pair is only valid for track data sets (an error is thrown otherwise). No nulls would be generated for nullable columns. Allowed keys are: * **min** -- Minimum possible length for generated series; default is 100 records per series. Must be an integral value within the range [1, 500]. If both min and max are specified, min must be less than or equal to max. * **max** -- Maximum possible length for generated series; default is 500 records per series. Must be an integral value within the range [1, 500]. If both min and max are specified, max must be greater than or equal to min. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. count (long) Number of records inserted. """ assert isinstance( table_name, (basestring)), "insert_records_random(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( count, (int, long, float)), "insert_records_random(): Argument 'count' must be (one) of type(s) '(int, long, float)'; given %s" % type( count ).__name__ assert isinstance( options, (dict)), "insert_records_random(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/insert/records/random" ) obj = {} obj['table_name'] = table_name obj['count'] = count obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/insert/records/random' ) return AttrDict( response )
# end insert_records_random # begin insert_symbol
[docs] def insert_symbol( self, symbol_id = None, symbol_format = None, symbol_data = None, options = {} ): """Adds a symbol or icon (i.e. an image) to represent data points when data is rendered visually. Users must provide the symbol identifier (string), a format (currently supported: 'svg' and 'svg_path'), the data for the symbol, and any additional optional parameter (e.g. color). To have a symbol used for rendering create a table with a string column named 'SYMBOLCODE' (along with 'x' or 'y' for example). Then when the table is rendered (via `WMS <../../../api/rest/wms_rest.html>`_) if the 'dosymbology' parameter is 'true' then the value of the 'SYMBOLCODE' column is used to pick the symbol displayed for each point. Parameters: symbol_id (str) The id of the symbol being added. This is the same id that should be in the 'SYMBOLCODE' column for objects using this symbol symbol_format (str) Specifies the symbol format. Must be either 'svg' or 'svg_path'. Allowed values are: * svg * svg_path symbol_data (str) The actual symbol data. If input parameter *symbol_format* is 'svg' then this should be the raw bytes representing an svg file. If input parameter *symbol_format* is svg path then this should be an svg path string, for example: 'M25.979,12.896,5.979,12.896,5.979,19.562,25.979,19.562z' options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **color** -- If input parameter *symbol_format* is 'svg' this is ignored. If input parameter *symbol_format* is 'svg_path' then this option specifies the color (in RRGGBB hex format) of the path. For example, to have the path rendered in red, used 'FF0000'. If 'color' is not provided then '00FF00' (i.e. green) is used by default. Returns: A dict with the following entries-- symbol_id (str) Value of input parameter *symbol_id*. """ assert isinstance( symbol_id, (basestring)), "insert_symbol(): Argument 'symbol_id' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_id ).__name__ assert isinstance( symbol_format, (basestring)), "insert_symbol(): Argument 'symbol_format' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_format ).__name__ assert isinstance( symbol_data, (basestring)), "insert_symbol(): Argument 'symbol_data' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_data ).__name__ assert isinstance( options, (dict)), "insert_symbol(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/insert/symbol" ) obj = {} obj['symbol_id'] = symbol_id obj['symbol_format'] = symbol_format obj['symbol_data'] = symbol_data obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/insert/symbol' ) return AttrDict( response )
# end insert_symbol # begin kill_proc
[docs] def kill_proc( self, run_id = '', options = {} ): """Kills a running proc instance. Parameters: run_id (str) The run ID of the running proc instance. If the run ID is not found or the proc instance has already completed, this does nothing. If not specified, all running proc instances will be killed. The default value is ''. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- run_ids (list of str) List of run IDs of proc instances that were killed. """ assert isinstance( run_id, (basestring)), "kill_proc(): Argument 'run_id' must be (one) of type(s) '(basestring)'; given %s" % type( run_id ).__name__ assert isinstance( options, (dict)), "kill_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/kill/proc" ) obj = {} obj['run_id'] = run_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/kill/proc' ) return AttrDict( response )
# end kill_proc # begin lock_table
[docs] def lock_table( self, table_name = None, lock_type = 'status', options = {} ): """Manages global access to a table's data. By default a table has a input parameter *lock_type* of *read_write*, indicating all operations are permitted. A user may request a *read_only* or a *write_only* lock, after which only read or write operations, respectively, are permitted on the table until the lock is removed. When input parameter *lock_type* is *no_access* then no operations are permitted on the table. The lock status can be queried by setting input parameter *lock_type* to *status*. Parameters: table_name (str) Name of the table to be locked. It must be a currently existing table, collection, or view. lock_type (str) The type of lock being applied to the table. Setting it to *status* will return the current lock status of the table without changing it. Allowed values are: * **status** -- Show locked status * **no_access** -- Allow no read/write operations * **read_only** -- Allow only read operations * **write_only** -- Allow only write operations * **read_write** -- Allow all read/write operations The default value is 'status'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- lock_type (str) Returns the lock state of the table. """ assert isinstance( table_name, (basestring)), "lock_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( lock_type, (basestring)), "lock_table(): Argument 'lock_type' must be (one) of type(s) '(basestring)'; given %s" % type( lock_type ).__name__ assert isinstance( options, (dict)), "lock_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/lock/table" ) obj = {} obj['table_name'] = table_name obj['lock_type'] = lock_type obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/lock/table' ) return AttrDict( response )
# end lock_table # begin merge_records
[docs] def merge_records( self, table_name = None, source_table_names = None, field_maps = None, options = {} ): """Create a new empty result table (specified by input parameter *table_name*), and insert all records from source tables (specified by input parameter *source_table_names*) based on the field mapping information (specified by input parameter *field_maps*). For merge records details and examples, see `Merge Records <../../../concepts/merge_records.html>`_. For limitations, see `Merge Records Limitations and Cautions <../../../concepts/merge_records.html#limitations-and-cautions>`_. The field map (specified by input parameter *field_maps*) holds the user-specified maps of target table column names to source table columns. The array of input parameter *field_maps* must match one-to-one with the input parameter *source_table_names*, e.g., there's a map present in input parameter *field_maps* for each table listed in input parameter *source_table_names*. Parameters: table_name (str) The new result table name for the records to be merged. Must NOT be an existing table. source_table_names (list of str) The list of source table names to get the records from. Must be existing table names. The user can provide a single element (which will be automatically promoted to a list internally) or a list. field_maps (list of dicts of str to str) Contains a list of source/target column mappings, one mapping for each source table listed in input parameter *source_table_names* being merged into the target table specified by input parameter *table_name*. Each mapping contains the target column names (as keys) that the data in the mapped source columns or column `expressions <../../../concepts/expressions.html>`_ (as values) will be merged into. All of the source columns being merged into a given target column must match in type, as that type will determine the type of the new target column. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created merged table specified by input parameter *table_name*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created merged table will be a top-level table. * **is_replicated** -- Indicates the `distribution scheme <../../../concepts/tables.html#distribution>`_ for the data of the merged table specified in input parameter *table_name*. If true, the table will be `replicated <../../../concepts/tables.html#replication>`_. If false, the table will be `randomly sharded <../../../concepts/tables.html#random-sharding>`_. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the merged table specified in input parameter *table_name*. * **persist** -- If *true*, then the table specified in input parameter *table_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'true'. * **chunk_size** -- Indicates the chunk size to be used for the merged table specified in input parameter *table_name*. * **view_id** -- view this result table is part of. The default value is ''. Returns: A dict with the following entries-- table_name (str) """ assert isinstance( table_name, (basestring)), "merge_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ source_table_names = source_table_names if isinstance( source_table_names, list ) else ( [] if (source_table_names is None) else [ source_table_names ] ) field_maps = field_maps if isinstance( field_maps, list ) else ( [] if (field_maps is None) else [ field_maps ] ) assert isinstance( options, (dict)), "merge_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/merge/records" ) obj = {} obj['table_name'] = table_name obj['source_table_names'] = source_table_names obj['field_maps'] = field_maps obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/merge/records' ) return AttrDict( response )
# end merge_records # begin admin_replace_tom def admin_replace_tom( self, old_rank_tom = None, new_rank_tom = None ): assert isinstance( old_rank_tom, (int, long, float)), "admin_replace_tom(): Argument 'old_rank_tom' must be (one) of type(s) '(int, long, float)'; given %s" % type( old_rank_tom ).__name__ assert isinstance( new_rank_tom, (int, long, float)), "admin_replace_tom(): Argument 'new_rank_tom' must be (one) of type(s) '(int, long, float)'; given %s" % type( new_rank_tom ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/replace/tom" ) obj = {} obj['old_rank_tom'] = old_rank_tom obj['new_rank_tom'] = new_rank_tom response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/replace/tom' ) return AttrDict( response ) # end admin_replace_tom # begin revoke_permission_system
[docs] def revoke_permission_system( self, name = None, permission = None, options = {} ): """Revokes a system-level permission from a user or role. Parameters: name (str) Name of the user or role from which the permission will be revoked. Must be an existing user or role. permission (str) Permission to revoke from the user or role. Allowed values are: * **system_admin** -- Full access to all data and system functions. * **system_write** -- Read and write access to all tables. * **system_read** -- Read-only access to all tables. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. permission (str) Value of input parameter *permission*. """ assert isinstance( name, (basestring)), "revoke_permission_system(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( permission, (basestring)), "revoke_permission_system(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__ assert isinstance( options, (dict)), "revoke_permission_system(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/revoke/permission/system" ) obj = {} obj['name'] = name obj['permission'] = permission obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/revoke/permission/system' ) return AttrDict( response )
# end revoke_permission_system # begin revoke_permission_table
[docs] def revoke_permission_table( self, name = None, permission = None, table_name = None, options = {} ): """Revokes a table-level permission from a user or role. Parameters: name (str) Name of the user or role from which the permission will be revoked. Must be an existing user or role. permission (str) Permission to revoke from the user or role. Allowed values are: * **table_admin** -- Full read/write and administrative access to the table. * **table_insert** -- Insert access to the table. * **table_update** -- Update access to the table. * **table_delete** -- Delete access to the table. * **table_read** -- Read access to the table. table_name (str) Name of the table to which the permission grants access. Must be an existing table, collection, or view. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- name (str) Value of input parameter *name*. permission (str) Value of input parameter *permission*. table_name (str) Value of input parameter *table_name*. """ assert isinstance( name, (basestring)), "revoke_permission_table(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__ assert isinstance( permission, (basestring)), "revoke_permission_table(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__ assert isinstance( table_name, (basestring)), "revoke_permission_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( options, (dict)), "revoke_permission_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/revoke/permission/table" ) obj = {} obj['name'] = name obj['permission'] = permission obj['table_name'] = table_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/revoke/permission/table' ) return AttrDict( response )
# end revoke_permission_table # begin revoke_role
[docs] def revoke_role( self, role = None, member = None, options = {} ): """Revokes membership in a role from a user or role. Parameters: role (str) Name of the role in which membership will be revoked. Must be an existing role. member (str) Name of the user or role that will be revoked membership in input parameter *role*. Must be an existing user or role. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- role (str) Value of input parameter *role*. member (str) Value of input parameter *member*. """ assert isinstance( role, (basestring)), "revoke_role(): Argument 'role' must be (one) of type(s) '(basestring)'; given %s" % type( role ).__name__ assert isinstance( member, (basestring)), "revoke_role(): Argument 'member' must be (one) of type(s) '(basestring)'; given %s" % type( member ).__name__ assert isinstance( options, (dict)), "revoke_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/revoke/role" ) obj = {} obj['role'] = role obj['member'] = member obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/revoke/role' ) return AttrDict( response )
# end revoke_role # begin show_proc
[docs] def show_proc( self, proc_name = '', options = {} ): """Shows information about a proc. Parameters: proc_name (str) Name of the proc to show information about. If specified, must be the name of a currently existing proc. If not specified, information about all procs will be returned. The default value is ''. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **include_files** -- If set to *true*, the files that make up the proc will be returned. If set to *false*, the files will not be returned. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- proc_names (list of str) The proc names. execution_modes (list of str) The execution modes of the procs named in output parameter *proc_names*. Allowed values are: * @INNER_STRUCTURE files (list of dicts of str to str) Maps of the files that make up the procs named in output parameter *proc_names*. commands (list of str) The commands (excluding arguments) that will be invoked when the procs named in output parameter *proc_names* are executed. args (list of lists of str) Arrays of command-line arguments that will be passed to the procs named in output parameter *proc_names* when executed. options (list of dicts of str to str) The optional parameters for the procs named in output parameter *proc_names*. """ assert isinstance( proc_name, (basestring)), "show_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__ assert isinstance( options, (dict)), "show_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/proc" ) obj = {} obj['proc_name'] = proc_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/proc' ) return AttrDict( response )
# end show_proc # begin show_proc_status
[docs] def show_proc_status( self, run_id = '', options = {} ): """Shows the statuses of running or completed proc instances. Results are grouped by run ID (as returned from :meth:`.execute_proc`) and data segment ID (each invocation of the proc command on a data segment is assigned a data segment ID). Parameters: run_id (str) The run ID of a specific running or completed proc instance for which the status will be returned. If the run ID is not found, nothing will be returned. If not specified, the statuses of all running and completed proc instances will be returned. The default value is ''. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **clear_complete** -- If set to *true*, if a proc instance has completed (either successfully or unsuccessfully) then its status will be cleared and no longer returned in subsequent calls. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- proc_names (dict of str to str) The proc names corresponding to the returned run IDs. params (dict of str to dicts of str to str) The string params passed to :meth:`.execute_proc` for the returned run IDs. bin_params (dict of str to dicts of str to str) The binary params passed to :meth:`.execute_proc` for the returned run IDs. input_table_names (dict of str to lists of str) The input table names passed to :meth:`.execute_proc` for the returned run IDs. input_column_names (dict of str to dicts of str to lists of str) The input column names passed to :meth:`.execute_proc` for the returned run IDs, supplemented with the column names for input tables not included in the input column name map. output_table_names (dict of str to lists of str) The output table names passed to :meth:`.execute_proc` for the returned run IDs. options (dict of str to dicts of str to str) The optional parameters passed to :meth:`.execute_proc` for the returned run IDs. overall_statuses (dict of str to str) Overall statuses for the returned run IDs. Note that these are rollups and individual statuses may differ between data segments for the same run ID; see output parameter *statuses* and output parameter *messages* for statuses from individual data segments. Allowed values are: * **running** -- The proc instance is currently running. * **complete** -- The proc instance completed with no errors. * **killed** -- The proc instance was killed before completion. * **error** -- The proc instance failed with an error. statuses (dict of str to dicts of str to str) Statuses for the returned run IDs, grouped by data segment ID. messages (dict of str to dicts of str to str) Messages containing additional status information for the returned run IDs, grouped by data segment ID. results (dict of str to dicts of str to dicts of str to str) String results for the returned run IDs, grouped by data segment ID. bin_results (dict of str to dicts of str to dicts of str to str) Binary results for the returned run IDs, grouped by data segment ID. timings (dict of str to dicts of str to dicts of str to longs) Timing information for the returned run IDs, grouped by data segment ID. """ assert isinstance( run_id, (basestring)), "show_proc_status(): Argument 'run_id' must be (one) of type(s) '(basestring)'; given %s" % type( run_id ).__name__ assert isinstance( options, (dict)), "show_proc_status(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/proc/status" ) obj = {} obj['run_id'] = run_id obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/proc/status' ) return AttrDict( response )
# end show_proc_status # begin show_security
[docs] def show_security( self, names = None, options = {} ): """Shows security information relating to users and/or roles. If the caller is not a system administrator, only information relating to the caller and their roles is returned. Parameters: names (list of str) A list of names of users and/or roles about which security information is requested. If none are provided, information about all users and roles will be returned. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- types (dict of str to str) Map of user/role name to the type of that user/role. Allowed values are: * **internal_user** -- A user whose credentials are managed by the database system. * **external_user** -- A user whose credentials are managed by an external LDAP. * **role** -- A role. roles (dict of str to lists of str) Map of user/role name to a list of names of roles of which that user/role is a member. permissions (dict of str to lists of dicts of str to str) Map of user/role name to a list of permissions directly granted to that user/role. """ names = names if isinstance( names, list ) else ( [] if (names is None) else [ names ] ) assert isinstance( options, (dict)), "show_security(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/security" ) obj = {} obj['names'] = names obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/security' ) return AttrDict( response )
# end show_security # begin show_system_properties
[docs] def show_system_properties( self, options = {} ): """Returns server configuration and version related information to the caller. The admin tool uses it to present server related information to the user. Parameters: options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **properties** -- A list of comma separated names of properties requested. If not specified, all properties will be returned. Returns: A dict with the following entries-- property_map (dict of str to str) A map of server configuration parameters and version information. Allowed keys are: * **conf.enable_worker_http_servers** -- Boolean value indicating whether the system is configured for multi-head ingestion. Allowed values are: * **TRUE** -- Indicates that the system is configured for multi-head ingestion. * **FALSE** -- Indicates that the system is NOT configured for multi-head ingestion. * **conf.worker_http_server_ips** -- Semicolon (';') separated string of IP addresses of all the ingestion-enabled worker heads of the system. * **conf.worker_http_server_ports** -- Semicolon (';') separated string of the port numbers of all the ingestion-enabled worker ranks of the system. * **conf.hm_http_port** -- The host manager port number (an integer value). """ assert isinstance( options, (dict)), "show_system_properties(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/system/properties" ) obj = {} obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/system/properties' ) return AttrDict( response )
# end show_system_properties # begin show_system_status
[docs] def show_system_status( self, options = {} ): """Provides server configuration and health related status to the caller. The admin tool uses it to present server related information to the user. Parameters: options (dict of str to str) Optional parameters, currently unused. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- status_map (dict of str to str) A map of server configuration and health related status. """ assert isinstance( options, (dict)), "show_system_status(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/system/status" ) obj = {} obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/system/status' ) return AttrDict( response )
# end show_system_status # begin show_system_timing
[docs] def show_system_timing( self, options = {} ): """Returns the last 100 database requests along with the request timing and internal job id. The admin tool uses it to present request timing information to the user. Parameters: options (dict of str to str) Optional parameters, currently unused. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- endpoints (list of str) List of recently called endpoints, most recent first. time_in_ms (list of floats) List of time (in ms) of the recent requests. jobIds (list of str) List of the internal job ids for the recent requests. """ assert isinstance( options, (dict)), "show_system_timing(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/system/timing" ) obj = {} obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/system/timing' ) return AttrDict( response )
# end show_system_timing # begin show_table
[docs] def show_table( self, table_name = None, options = {} ): """Retrieves detailed information about tables, views, and collections. If input parameter *table_name* specifies a table or view, information specific to that entity will be returned. If input parameter *table_name* specifies a collection, the call can return information about either the collection itself (setting the *show_children* option to *false*) or the tables and views it contains (setting *show_children* to *true*). If input parameter *table_name* is empty, information about all collections and top-level tables and views can be returned. Note: *show_children* must be set to *true*. If input parameter *table_name* is '*', information about all tables, collections, and views will be returned. Note: *show_children* must be set to *true*. If the option *get_sizes* is set to *true*, then the sizes (objects and elements) of each table are returned (in output parameter *sizes* and output parameter *full_sizes*), along with the total number of objects in the requested table (in output parameter *total_size* and output parameter *total_full_size*). Parameters: table_name (str) Name of the table for which to retrieve the information. If blank, then information about all collections and top-level tables and views is returned. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **force_synchronous** -- If *true* then the table sizes will wait for read lock before returning. Allowed values are: * true * false The default value is 'true'. * **get_sizes** -- If *true* then the table sizes will be returned; blank, otherwise. Allowed values are: * true * false The default value is 'false'. * **show_children** -- If input parameter *table_name* is a collection, then *true* will return information about the children of the collection, while *false* will return information about the collection itself. If input parameter *table_name* is empty or '*', then *show_children* must be *true* (or not specified); otherwise, no results will be returned. Allowed values are: * true * false The default value is 'true'. * **no_error_if_not_exists** -- If *false* will return an error if the provided input parameter *table_name* does not exist. If *true* then it will return an empty result. Allowed values are: * true * false The default value is 'false'. * **get_column_info** -- If *true* then column info (memory usage, etc) will be returned. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- table_name (str) Value of input parameter *table_name*. table_names (list of str) If input parameter *table_name* is a table or view, then the single element of the array is input parameter *table_name*. If input parameter *table_name* is a collection and *show_children* is set to *true*, then this array is populated with the names of all tables and views contained by the given collection; if *show_children* is *false* then this array will only include the collection name itself. If input parameter *table_name* is an empty string, then the array contains the names of all collections and top-level tables. table_descriptions (list of lists of str) List of descriptions for the respective tables in output parameter *table_names*. Allowed values are: * COLLECTION * VIEW * REPLICATED * JOIN * RESULT_TABLE type_ids (list of str) Type ids of the respective tables in output parameter *table_names*. type_schemas (list of str) Type schemas of the respective tables in output parameter *table_names*. type_labels (list of str) Type labels of the respective tables in output parameter *table_names*. properties (list of dicts of str to lists of str) Property maps of the respective tables in output parameter *table_names*. additional_info (list of dicts of str to str) Additional information about the respective tables in output parameter *table_names*. Allowed values are: * @INNER_STRUCTURE sizes (list of longs) Empty array if the *get_sizes* option is *false*. Otherwise, sizes of the respective tables represented in output parameter *table_names*. For all but track data types, this is simply the number of total objects in a table. For track types, since each track semantically contains many individual objects, the output parameter *sizes* are the counts of conceptual tracks (each of which may be associated with multiple objects). full_sizes (list of longs) Empty array if the *get_sizes* option is *false*. Otherwise, number of total objects in the respective tables represented in output parameter *table_names*. For all but track data types, this is the same as output parameter *sizes*. For track types, since each track semantically contains many individual objects, output parameter *full_sizes* is the count of total objects. join_sizes (list of floats) Empty array if the *get_sizes* option is *false*. Otherwise, number of unfiltered objects in the cross product of the sub-tables in the joined-tables represented in output parameter *table_names*. For simple tables, this number will be the same as output parameter *sizes*. For join-tables this value gives the number of joined-table rows that must be processed by any aggregate functions operating on the table. total_size (long) -1 if the *get_sizes* option is *false*. Otherwise, the sum of the elements of output parameter *sizes*. total_full_size (long) -1 if the *get_sizes* option is *false*. The sum of the elements of output parameter *full_sizes*. """ assert isinstance( table_name, (basestring)), "show_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( options, (dict)), "show_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/table" ) obj = {} obj['table_name'] = table_name obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/table' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create record types for the returned types and save them for __type_info in zip( response["type_ids"], response["type_labels"], response["type_schemas"], response["properties"] ): # Create a type only if it is not colleciton if (__type_info[ 1 ] != "<collection>"): record_type = RecordType.from_type_schema( __type_info[ 1 ], __type_info[ 2 ], __type_info[ 3 ] ) self.save_known_type( __type_info[ 0 ], record_type ) # end loop return AttrDict( response )
# end show_table # begin show_table_metadata
[docs] def show_table_metadata( self, table_names = None, options = {} ): """Retrieves the user provided metadata for the specified tables. Parameters: table_names (list of str) Tables whose metadata will be fetched. All provided tables must exist, or an error is returned. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- table_names (list of str) Value of input parameter *table_names*. metadata_maps (list of dicts of str to str) A list of maps which contain the metadata of the tables in the order the tables are listed in input parameter *table_names*. Each map has (metadata attribute name, metadata attribute value) pairs. """ table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( options, (dict)), "show_table_metadata(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/table/metadata" ) obj = {} obj['table_names'] = table_names obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/table/metadata' ) return AttrDict( response )
# end show_table_metadata # begin show_tables_by_type
[docs] def show_tables_by_type( self, type_id = None, label = None, options = {} ): """Gets names of the tables whose type matches the given criteria. Each table has a particular type. This type comprises the schema and properties of the table and sometimes a type label. This function allows a look up of the existing tables based on full or partial type information. The operation is synchronous. Parameters: type_id (str) Type id returned by a call to :meth:`.create_type`. label (str) Optional user supplied label which can be used instead of the type_id to retrieve all tables with the given label. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- table_names (list of str) List of tables matching the input criteria. """ assert isinstance( type_id, (basestring)), "show_tables_by_type(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__ assert isinstance( label, (basestring)), "show_tables_by_type(): Argument 'label' must be (one) of type(s) '(basestring)'; given %s" % type( label ).__name__ assert isinstance( options, (dict)), "show_tables_by_type(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/tables/bytype" ) obj = {} obj['type_id'] = type_id obj['label'] = label obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/tables/bytype' ) return AttrDict( response )
# end show_tables_by_type # begin show_triggers
[docs] def show_triggers( self, trigger_ids = None, options = {} ): """Retrieves information regarding the specified triggers or all existing triggers currently active. Parameters: trigger_ids (list of str) List of IDs of the triggers whose information is to be retrieved. An empty list means information will be retrieved on all active triggers. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- trigger_map (dict of str to dicts of str to str) This dictionary contains (key, value) pairs of (trigger ID, information map/dictionary) where the key is a Unicode string representing a Trigger ID. The value is another embedded dictionary containing (key, value) pairs where the keys consist of 'table_name', 'type' and the parameter names relating to the trigger type, e.g. *nai*, *min*, *max*. The values are unicode strings (numeric values are also converted to strings) representing the value of the respective parameter. If a trigger is associated with multiple tables, then the string value for *table_name* contains a comma separated list of table names. """ trigger_ids = trigger_ids if isinstance( trigger_ids, list ) else ( [] if (trigger_ids is None) else [ trigger_ids ] ) assert isinstance( options, (dict)), "show_triggers(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/triggers" ) obj = {} obj['trigger_ids'] = trigger_ids obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/triggers' ) return AttrDict( response )
# end show_triggers # begin show_types
[docs] def show_types( self, type_id = None, label = None, options = {} ): """Retrieves information for the specified data type ID or type label. For all data types that match the input criteria, the database returns the type ID, the type schema, the label (if available), and the type's column properties. Parameters: type_id (str) Type Id returned in response to a call to :meth:`.create_type`. label (str) Option string that was supplied by user in a call to :meth:`.create_type`. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **no_join_types** -- When set to 'true', no join types will be included. Allowed values are: * true * false The default value is 'false'. Returns: A dict with the following entries-- type_ids (list of str) type_schemas (list of str) labels (list of str) properties (list of dicts of str to lists of str) """ assert isinstance( type_id, (basestring)), "show_types(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__ assert isinstance( label, (basestring)), "show_types(): Argument 'label' must be (one) of type(s) '(basestring)'; given %s" % type( label ).__name__ assert isinstance( options, (dict)), "show_types(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/show/types" ) obj = {} obj['type_id'] = type_id obj['label'] = label obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/show/types' ) if not _Util.is_ok( response ): return AttrDict( response ) # Create record types for the returned types and save them for __type_info in zip( response["type_ids"], response["labels"], response["type_schemas"], response["properties"] ): # Create a type only if it is not colleciton if (__type_info[ 1 ] != "<collection>"): record_type = RecordType.from_type_schema( __type_info[ 1 ], __type_info[ 2 ], __type_info[ 3 ] ) self.save_known_type( __type_info[ 0 ], record_type ) # end loop return AttrDict( response )
# end show_types # begin update_records
[docs] def update_records( self, table_name = None, expressions = None, new_values_maps = None, records_to_insert = [], records_to_insert_str = [], record_encoding = 'binary', options = {}, record_type = None ): """Runs multiple predicate-based updates in a single call. With the list of given expressions, any matching record's column values will be updated as provided in input parameter *new_values_maps*. There is also an optional 'upsert' capability where if a particular predicate doesn't match any existing record, then a new record can be inserted. Note that this operation can only be run on an original table and not on a collection or a result view. This operation can update primary key values. By default only 'pure primary key' predicates are allowed when updating primary key values. If the primary key for a table is the column 'attr1', then the operation will only accept predicates of the form: "attr1 == 'foo'" if the attr1 column is being updated. For a composite primary key (e.g. columns 'attr1' and 'attr2') then this operation will only accept predicates of the form: "(attr1 == 'foo') and (attr2 == 'bar')". Meaning, all primary key columns must appear in an equality predicate in the expressions. Furthermore each 'pure primary key' predicate must be unique within a given request. These restrictions can be removed by utilizing some available options through input parameter *options*. Parameters: table_name (str) Table to be updated. Must be a currently existing table and not a collection or view. expressions (list of str) A list of the actual predicates, one for each update; format should follow the guidelines :meth:`here <.filter>`. The user can provide a single element (which will be automatically promoted to a list internally) or a list. The user can provide a single element (which will be automatically promoted to a list internally) or a list. new_values_maps (list of dicts of str to str and/or None) List of new values for the matching records. Each element is a map with (key, value) pairs where the keys are the names of the columns whose values are to be updated; the values are the new values. The number of elements in the list should match the length of input parameter *expressions*. The user can provide a single element (which will be automatically promoted to a list internally) or a list. The user can provide a single element (which will be automatically promoted to a list internally) or a list. records_to_insert (list of str) An *optional* list of new binary-avro encoded records to insert, one for each update. If one of input parameter *expressions* does not yield a matching record to be updated, then the corresponding element from this list will be added to the table. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. The user can provide a single element (which will be automatically promoted to a list internally) or a list. records_to_insert_str (list of str) An optional list of new json-avro encoded objects to insert, one for each update, to be added to the set if the particular update did not affect any objects. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. The user can provide a single element (which will be automatically promoted to a list internally) or a list. record_encoding (str) Identifies which of input parameter *records_to_insert* and input parameter *records_to_insert_str* should be used. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **global_expression** -- An optional global expression to reduce the search space of the predicates listed in input parameter *expressions*. The default value is ''. * **bypass_safety_checks** -- When set to *true*, all predicates are available for primary key updates. Keep in mind that it is possible to destroy data in this case, since a single predicate may match multiple objects (potentially all of records of a table), and then updating all of those records to have the same primary key will, due to the primary key uniqueness constraints, effectively delete all but one of those updated records. Allowed values are: * true * false The default value is 'false'. * **update_on_existing_pk** -- Can be used to customize behavior when the updated primary key value already exists as described in :meth:`.insert_records`. Allowed values are: * true * false The default value is 'false'. * **use_expressions_in_new_values_maps** -- When set to *true*, all new values in input parameter *new_values_maps* are considered as expression values. When set to *false*, all new values in input parameter *new_values_maps* are considered as constants. NOTE: When *true*, string constants will need to be quoted to avoid being evaluated as expressions. Allowed values are: * true * false The default value is 'false'. * **record_id** -- ID of a single record to be updated (returned in the call to :meth:`.insert_records` or :meth:`.get_records_from_collection`). record_type (RecordType) A :class:`RecordType` object using which the the binary data will be encoded. If None, then it is assumed that the data is already encoded, and no further encoding will occur. Default is None. Returns: A dict with the following entries-- count_updated (long) Total number of records updated. counts_updated (list of longs) Total number of records updated per predicate in input parameter *expressions*. count_inserted (long) Total number of records inserted (due to expressions not matching any existing records). counts_inserted (list of longs) Total number of records inserted per predicate in input parameter *expressions* (will be either 0 or 1 for each expression). """ assert isinstance( table_name, (basestring)), "update_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ expressions = expressions if isinstance( expressions, list ) else ( [] if (expressions is None) else [ expressions ] ) new_values_maps = new_values_maps if isinstance( new_values_maps, list ) else ( [] if (new_values_maps is None) else [ new_values_maps ] ) records_to_insert = records_to_insert if isinstance( records_to_insert, list ) else ( [] if (records_to_insert is None) else [ records_to_insert ] ) records_to_insert_str = records_to_insert_str if isinstance( records_to_insert_str, list ) else ( [] if (records_to_insert_str is None) else [ records_to_insert_str ] ) assert isinstance( record_encoding, (basestring)), "update_records(): Argument 'record_encoding' must be (one) of type(s) '(basestring)'; given %s" % type( record_encoding ).__name__ assert isinstance( options, (dict)), "update_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ assert ( (record_type == None) or isinstance(record_type, RecordType) ), "update_records: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/update/records" ) (REQ_SCHEMA_CEXT, RSP_SCHEMA) = self.__get_schemas( "/update/records", get_req_cext = True ) obj = {} obj['table_name'] = table_name obj['expressions'] = expressions obj['new_values_maps'] = new_values_maps obj['records_to_insert_str'] = records_to_insert_str obj['record_encoding'] = record_encoding obj['options'] = self.__sanitize_dicts( options ) if (record_encoding == 'binary'): # Convert the objects to proper Records use_object_array, data = _Util.convert_binary_data_to_cext_records( self, table_name, records_to_insert, record_type ) if use_object_array: # First tuple element must be a RecordType or a Schema from the c-extension obj['records_to_insert'] = (data[0].type, data) if data else () else: # use avro-encoded bytes for the data obj['records_to_insert'] = data else: use_object_array = False obj['records_to_insert'] = [] # end if if use_object_array: response = self.__post_then_get_cext( REQ_SCHEMA_CEXT, RSP_SCHEMA, obj, '/update/records' ) else: response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/update/records' ) if not _Util.is_ok( response ): return AttrDict( response ) return AttrDict( response )
# end update_records # begin update_records_by_series
[docs] def update_records_by_series( self, table_name = None, world_table_name = None, view_name = '', reserved = [], options = {} ): """Updates the view specified by input parameter *table_name* to include full series (track) information from the input parameter *world_table_name* for the series (tracks) present in the input parameter *view_name*. Parameters: table_name (str) Name of the view on which the update operation will be performed. Must be an existing view. world_table_name (str) Name of the table containing the complete series (track) information. view_name (str) Optional name of the view containing the series (tracks) which have to be updated. The default value is ''. reserved (list of str) The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- count (int) """ assert isinstance( table_name, (basestring)), "update_records_by_series(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( world_table_name, (basestring)), "update_records_by_series(): Argument 'world_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( world_table_name ).__name__ assert isinstance( view_name, (basestring)), "update_records_by_series(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__ reserved = reserved if isinstance( reserved, list ) else ( [] if (reserved is None) else [ reserved ] ) assert isinstance( options, (dict)), "update_records_by_series(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/update/records/byseries" ) obj = {} obj['table_name'] = table_name obj['world_table_name'] = world_table_name obj['view_name'] = view_name obj['reserved'] = reserved obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/update/records/byseries' ) return AttrDict( response )
# end update_records_by_series # begin visualize_image def visualize_image( self, table_names = None, world_table_names = None, x_column_name = None, y_column_name = None, geometry_column_name = None, track_ids = None, min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', bg_color = None, style_options = None, options = {} ): table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) world_table_names = world_table_names if isinstance( world_table_names, list ) else ( [] if (world_table_names is None) else [ world_table_names ] ) assert isinstance( x_column_name, (basestring)), "visualize_image(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_image(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( geometry_column_name, (basestring)), "visualize_image(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__ track_ids = track_ids if isinstance( track_ids, list ) else ( [] if (track_ids is None) else [ track_ids ] ) assert isinstance( min_x, (int, long, float)), "visualize_image(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_image(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_image(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_image(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_image(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_image(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_image(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( bg_color, (int, long, float)), "visualize_image(): Argument 'bg_color' must be (one) of type(s) '(int, long, float)'; given %s" % type( bg_color ).__name__ assert isinstance( style_options, (dict)), "visualize_image(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_image(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/image" ) obj = {} obj['table_names'] = table_names obj['world_table_names'] = world_table_names obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['geometry_column_name'] = geometry_column_name obj['track_ids'] = track_ids obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['projection'] = projection obj['bg_color'] = bg_color obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/image' ) return AttrDict( response ) # end visualize_image # begin visualize_image_chart
[docs] def visualize_image_chart( self, table_name = None, x_column_names = None, y_column_names = None, min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, bg_color = None, style_options = None, options = {} ): """Scatter plot is the only plot type currently supported. A non-numeric column can be specified as x or y column and jitters can be added to them to avoid excessive overlapping. All color values must be in the format RRGGBB or AARRGGBB (to specify the alpha value). The image is contained in the output parameter *image_data* field. Parameters: table_name (str) Name of the table containing the data to be drawn as a chart. x_column_names (list of str) Names of the columns containing the data mapped to the x axis of a chart. The user can provide a single element (which will be automatically promoted to a list internally) or a list. y_column_names (list of str) Names of the columns containing the data mapped to the y axis of a chart. The user can provide a single element (which will be automatically promoted to a list internally) or a list. min_x (float) Lower bound for the x column values. For non-numeric x column, each x column item is mapped to an integral value starting from 0. max_x (float) Upper bound for the x column values. For non-numeric x column, each x column item is mapped to an integral value starting from 0. min_y (float) Lower bound for the y column values. For non-numeric y column, each y column item is mapped to an integral value starting from 0. max_y (float) Upper bound for the y column values. For non-numeric y column, each y column item is mapped to an integral value starting from 0. width (int) Width of the generated image in pixels. height (int) Height of the generated image in pixels. bg_color (str) Background color of the generated image. style_options (dict of str to lists of str) Rendering style options for a chart. Allowed keys are: * **pointcolor** -- The color of points in the plot represented as a hexadecimal number. The default value is '0000FF'. * **pointsize** -- The size of points in the plot represented as number of pixels. The default value is '3'. * **pointshape** -- The shape of points in the plot. Allowed values are: * none * circle * square * diamond * hollowcircle * hollowsquare * hollowdiamond The default value is 'square'. * **cb_pointcolors** -- Point color class break information consisting of three entries: class-break attribute, class-break values/ranges, and point color values. This option overrides the pointcolor option if both are provided. Class-break ranges are represented in the form of "min:max". Class-break values/ranges and point color values are separated by cb_delimiter, e.g. {"price", "20:30;30:40;40:50", "0xFF0000;0x00FF00;0x0000FF"}. * **cb_pointsizes** -- Point size class break information consisting of three entries: class-break attribute, class-break values/ranges, and point size values. This option overrides the pointsize option if both are provided. Class-break ranges are represented in the form of "min:max". Class-break values/ranges and point size values are separated by cb_delimiter, e.g. {"states", "NY;TX;CA", "3;5;7"}. * **cb_pointshapes** -- Point shape class break information consisting of three entries: class-break attribute, class-break values/ranges, and point shape names. This option overrides the pointshape option if both are provided. Class-break ranges are represented in the form of "min:max". Class-break values/ranges and point shape names are separated by cb_delimiter, e.g. {"states", "NY;TX;CA", "circle;square;diamond"}. * **cb_delimiter** -- A character or string which separates per-class values in a class-break style option string. The default value is ';'. * **x_order_by** -- An expression or aggregate expression by which non-numeric x column values are sorted, e.g. "avg(price) descending". * **y_order_by** -- An expression or aggregate expression by which non-numeric y column values are sorted, e.g. "avg(price)", which defaults to "avg(price) ascending". * **scale_type_x** -- Type of x axis scale. Allowed values are: * **none** -- No scale is applied to the x axis. * **log** -- A base-10 log scale is applied to the x axis. The default value is 'none'. * **scale_type_y** -- Type of y axis scale. Allowed values are: * **none** -- No scale is applied to the y axis. * **log** -- A base-10 log scale is applied to the y axis. The default value is 'none'. * **min_max_scaled** -- If this options is set to "false", this endpoint expects request's min/max values are not yet scaled. They will be scaled according to scale_type_x or scale_type_y for response. If this options is set to "true", this endpoint expects request's min/max values are already scaled according to scale_type_x/scale_type_y. Response's min/max values will be equal to request's min/max values. The default value is 'false'. * **jitter_x** -- Amplitude of horizontal jitter applied to non-numeric x column values. The default value is '0.0'. * **jitter_y** -- Amplitude of vertical jitter applied to non-numeric y column values. The default value is '0.0'. * **plot_all** -- If this options is set to "true", all non-numeric column values are plotted ignoring min_x, max_x, min_y and max_y parameters. The default value is 'false'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: A dict with the following entries-- min_x (float) Lower bound for the x column values as provided in input parameter *min_x* or calculated for non-numeric columns when plot_all option is used. max_x (float) Upper bound for the x column values as provided in input parameter *max_x* or calculated for non-numeric columns when plot_all option is used. min_y (float) Lower bound for the y column values as provided in input parameter *min_y* or calculated for non-numeric columns when plot_all option is used. max_y (float) Upper bound for the y column values as provided in input parameter *max_y* or calculated for non-numeric columns when plot_all option is used. width (int) Width of the image as provided in input parameter *width*. height (int) Height of the image as provided in input parameter *height*. bg_color (str) Background color of the image as provided in input parameter *bg_color*. image_data (str) The generated image data. axes_info (dict of str to lists of str) Information returned for drawing labels for the axes associated with non-numeric columns. Allowed keys are: * **sorted_x_values** -- Sorted non-numeric x column value list for drawing x axis label. * **location_x** -- X axis label positions of sorted_x_values in pixel coordinates. * **sorted_y_values** -- Sorted non-numeric y column value list for drawing y axis label. * **location_y** -- Y axis label positions of sorted_y_values in pixel coordinates. """ assert isinstance( table_name, (basestring)), "visualize_image_chart(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ x_column_names = x_column_names if isinstance( x_column_names, list ) else ( [] if (x_column_names is None) else [ x_column_names ] ) y_column_names = y_column_names if isinstance( y_column_names, list ) else ( [] if (y_column_names is None) else [ y_column_names ] ) assert isinstance( min_x, (int, long, float)), "visualize_image_chart(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_image_chart(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_image_chart(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_image_chart(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_image_chart(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_image_chart(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( bg_color, (basestring)), "visualize_image_chart(): Argument 'bg_color' must be (one) of type(s) '(basestring)'; given %s" % type( bg_color ).__name__ assert isinstance( style_options, (dict)), "visualize_image_chart(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_image_chart(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/image/chart" ) obj = {} obj['table_name'] = table_name obj['x_column_names'] = x_column_names obj['y_column_names'] = y_column_names obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['bg_color'] = bg_color obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/image/chart' ) return AttrDict( response )
# end visualize_image_chart # begin visualize_image_classbreak def visualize_image_classbreak( self, table_names = None, world_table_names = None, x_column_name = None, y_column_name = None, geometry_column_name = None, track_ids = None, cb_attr = None, cb_vals = None, cb_pointcolor_attr = None, cb_pointcolor_vals = None, cb_pointsize_attr = None, cb_pointsize_vals = None, cb_pointshape_attr = None, cb_pointshape_vals = None, min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', bg_color = None, style_options = None, options = {} ): table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) world_table_names = world_table_names if isinstance( world_table_names, list ) else ( [] if (world_table_names is None) else [ world_table_names ] ) assert isinstance( x_column_name, (basestring)), "visualize_image_classbreak(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_image_classbreak(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( geometry_column_name, (basestring)), "visualize_image_classbreak(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__ track_ids = track_ids if isinstance( track_ids, list ) else ( [] if (track_ids is None) else [ track_ids ] ) assert isinstance( cb_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_attr ).__name__ cb_vals = cb_vals if isinstance( cb_vals, list ) else ( [] if (cb_vals is None) else [ cb_vals ] ) assert isinstance( cb_pointcolor_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointcolor_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointcolor_attr ).__name__ cb_pointcolor_vals = cb_pointcolor_vals if isinstance( cb_pointcolor_vals, list ) else ( [] if (cb_pointcolor_vals is None) else [ cb_pointcolor_vals ] ) assert isinstance( cb_pointsize_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointsize_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointsize_attr ).__name__ cb_pointsize_vals = cb_pointsize_vals if isinstance( cb_pointsize_vals, list ) else ( [] if (cb_pointsize_vals is None) else [ cb_pointsize_vals ] ) assert isinstance( cb_pointshape_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointshape_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointshape_attr ).__name__ cb_pointshape_vals = cb_pointshape_vals if isinstance( cb_pointshape_vals, list ) else ( [] if (cb_pointshape_vals is None) else [ cb_pointshape_vals ] ) assert isinstance( min_x, (int, long, float)), "visualize_image_classbreak(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_image_classbreak(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_image_classbreak(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_image_classbreak(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_image_classbreak(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_image_classbreak(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_image_classbreak(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( bg_color, (int, long, float)), "visualize_image_classbreak(): Argument 'bg_color' must be (one) of type(s) '(int, long, float)'; given %s" % type( bg_color ).__name__ assert isinstance( style_options, (dict)), "visualize_image_classbreak(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_image_classbreak(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/image/classbreak" ) obj = {} obj['table_names'] = table_names obj['world_table_names'] = world_table_names obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['geometry_column_name'] = geometry_column_name obj['track_ids'] = track_ids obj['cb_attr'] = cb_attr obj['cb_vals'] = cb_vals obj['cb_pointcolor_attr'] = cb_pointcolor_attr obj['cb_pointcolor_vals'] = cb_pointcolor_vals obj['cb_pointsize_attr'] = cb_pointsize_attr obj['cb_pointsize_vals'] = cb_pointsize_vals obj['cb_pointshape_attr'] = cb_pointshape_attr obj['cb_pointshape_vals'] = cb_pointshape_vals obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['projection'] = projection obj['bg_color'] = bg_color obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/image/classbreak' ) return AttrDict( response ) # end visualize_image_classbreak # begin visualize_image_contour def visualize_image_contour( self, table_names = None, x_column_name = None, y_column_name = None, value_column_name = None, min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', style_options = None, options = {} ): table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( x_column_name, (basestring)), "visualize_image_contour(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_image_contour(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( value_column_name, (basestring)), "visualize_image_contour(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__ assert isinstance( min_x, (int, long, float)), "visualize_image_contour(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_image_contour(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_image_contour(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_image_contour(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_image_contour(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_image_contour(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_image_contour(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( style_options, (dict)), "visualize_image_contour(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_image_contour(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/image/contour" ) obj = {} obj['table_names'] = table_names obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['value_column_name'] = value_column_name obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['projection'] = projection obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/image/contour' ) return AttrDict( response ) # end visualize_image_contour # begin visualize_image_heatmap def visualize_image_heatmap( self, table_names = None, x_column_name = None, y_column_name = None, value_column_name = None, geometry_column_name = None, min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', style_options = None, options = {} ): table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( x_column_name, (basestring)), "visualize_image_heatmap(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_image_heatmap(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( value_column_name, (basestring)), "visualize_image_heatmap(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__ assert isinstance( geometry_column_name, (basestring)), "visualize_image_heatmap(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__ assert isinstance( min_x, (int, long, float)), "visualize_image_heatmap(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_image_heatmap(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_image_heatmap(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_image_heatmap(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_image_heatmap(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_image_heatmap(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_image_heatmap(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( style_options, (dict)), "visualize_image_heatmap(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_image_heatmap(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/image/heatmap" ) obj = {} obj['table_names'] = table_names obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['value_column_name'] = value_column_name obj['geometry_column_name'] = geometry_column_name obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['projection'] = projection obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/image/heatmap' ) return AttrDict( response ) # end visualize_image_heatmap # begin visualize_image_labels def visualize_image_labels( self, table_name = None, x_column_name = None, y_column_name = None, x_offset = '', y_offset = '', text_string = None, font = '', text_color = '', text_angle = '', text_scale = '', draw_box = '', draw_leader = '', line_width = '', line_color = '', fill_color = '', leader_x_column_name = '', leader_y_column_name = '', filter = '', min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', options = {} ): assert isinstance( table_name, (basestring)), "visualize_image_labels(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__ assert isinstance( x_column_name, (basestring)), "visualize_image_labels(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_image_labels(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( x_offset, (basestring)), "visualize_image_labels(): Argument 'x_offset' must be (one) of type(s) '(basestring)'; given %s" % type( x_offset ).__name__ assert isinstance( y_offset, (basestring)), "visualize_image_labels(): Argument 'y_offset' must be (one) of type(s) '(basestring)'; given %s" % type( y_offset ).__name__ assert isinstance( text_string, (basestring)), "visualize_image_labels(): Argument 'text_string' must be (one) of type(s) '(basestring)'; given %s" % type( text_string ).__name__ assert isinstance( font, (basestring)), "visualize_image_labels(): Argument 'font' must be (one) of type(s) '(basestring)'; given %s" % type( font ).__name__ assert isinstance( text_color, (basestring)), "visualize_image_labels(): Argument 'text_color' must be (one) of type(s) '(basestring)'; given %s" % type( text_color ).__name__ assert isinstance( text_angle, (basestring)), "visualize_image_labels(): Argument 'text_angle' must be (one) of type(s) '(basestring)'; given %s" % type( text_angle ).__name__ assert isinstance( text_scale, (basestring)), "visualize_image_labels(): Argument 'text_scale' must be (one) of type(s) '(basestring)'; given %s" % type( text_scale ).__name__ assert isinstance( draw_box, (basestring)), "visualize_image_labels(): Argument 'draw_box' must be (one) of type(s) '(basestring)'; given %s" % type( draw_box ).__name__ assert isinstance( draw_leader, (basestring)), "visualize_image_labels(): Argument 'draw_leader' must be (one) of type(s) '(basestring)'; given %s" % type( draw_leader ).__name__ assert isinstance( line_width, (basestring)), "visualize_image_labels(): Argument 'line_width' must be (one) of type(s) '(basestring)'; given %s" % type( line_width ).__name__ assert isinstance( line_color, (basestring)), "visualize_image_labels(): Argument 'line_color' must be (one) of type(s) '(basestring)'; given %s" % type( line_color ).__name__ assert isinstance( fill_color, (basestring)), "visualize_image_labels(): Argument 'fill_color' must be (one) of type(s) '(basestring)'; given %s" % type( fill_color ).__name__ assert isinstance( leader_x_column_name, (basestring)), "visualize_image_labels(): Argument 'leader_x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( leader_x_column_name ).__name__ assert isinstance( leader_y_column_name, (basestring)), "visualize_image_labels(): Argument 'leader_y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( leader_y_column_name ).__name__ assert isinstance( filter, (basestring)), "visualize_image_labels(): Argument 'filter' must be (one) of type(s) '(basestring)'; given %s" % type( filter ).__name__ assert isinstance( min_x, (int, long, float)), "visualize_image_labels(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_image_labels(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_image_labels(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_image_labels(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_image_labels(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_image_labels(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_image_labels(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( options, (dict)), "visualize_image_labels(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/image/labels" ) obj = {} obj['table_name'] = table_name obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['x_offset'] = x_offset obj['y_offset'] = y_offset obj['text_string'] = text_string obj['font'] = font obj['text_color'] = text_color obj['text_angle'] = text_angle obj['text_scale'] = text_scale obj['draw_box'] = draw_box obj['draw_leader'] = draw_leader obj['line_width'] = line_width obj['line_color'] = line_color obj['fill_color'] = fill_color obj['leader_x_column_name'] = leader_x_column_name obj['leader_y_column_name'] = leader_y_column_name obj['filter'] = filter obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['projection'] = projection obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/image/labels' ) return AttrDict( response ) # end visualize_image_labels # begin visualize_video def visualize_video( self, table_names = None, world_table_names = None, track_ids = None, x_column_name = None, y_column_name = None, geometry_column_name = None, min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', bg_color = None, time_intervals = None, video_style = None, session_key = None, style_options = None, options = {} ): table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) world_table_names = world_table_names if isinstance( world_table_names, list ) else ( [] if (world_table_names is None) else [ world_table_names ] ) track_ids = track_ids if isinstance( track_ids, list ) else ( [] if (track_ids is None) else [ track_ids ] ) assert isinstance( x_column_name, (basestring)), "visualize_video(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_video(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( geometry_column_name, (basestring)), "visualize_video(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__ assert isinstance( min_x, (int, long, float)), "visualize_video(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_video(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_video(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_video(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ assert isinstance( width, (int, long, float)), "visualize_video(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_video(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_video(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( bg_color, (int, long, float)), "visualize_video(): Argument 'bg_color' must be (one) of type(s) '(int, long, float)'; given %s" % type( bg_color ).__name__ time_intervals = time_intervals if isinstance( time_intervals, list ) else ( [] if (time_intervals is None) else [ time_intervals ] ) assert isinstance( video_style, (basestring)), "visualize_video(): Argument 'video_style' must be (one) of type(s) '(basestring)'; given %s" % type( video_style ).__name__ assert isinstance( session_key, (basestring)), "visualize_video(): Argument 'session_key' must be (one) of type(s) '(basestring)'; given %s" % type( session_key ).__name__ assert isinstance( style_options, (dict)), "visualize_video(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_video(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/video" ) obj = {} obj['table_names'] = table_names obj['world_table_names'] = world_table_names obj['track_ids'] = track_ids obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['geometry_column_name'] = geometry_column_name obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['width'] = width obj['height'] = height obj['projection'] = projection obj['bg_color'] = bg_color obj['time_intervals'] = time_intervals obj['video_style'] = video_style obj['session_key'] = session_key obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/video' ) return AttrDict( response ) # end visualize_video # begin visualize_video_heatmap def visualize_video_heatmap( self, table_names = None, x_column_name = None, y_column_name = None, min_x = None, max_x = None, min_y = None, max_y = None, time_intervals = None, width = None, height = None, projection = 'PLATE_CARREE', video_style = None, session_key = None, style_options = None, options = {} ): table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] ) assert isinstance( x_column_name, (basestring)), "visualize_video_heatmap(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__ assert isinstance( y_column_name, (basestring)), "visualize_video_heatmap(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__ assert isinstance( min_x, (int, long, float)), "visualize_video_heatmap(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__ assert isinstance( max_x, (int, long, float)), "visualize_video_heatmap(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__ assert isinstance( min_y, (int, long, float)), "visualize_video_heatmap(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__ assert isinstance( max_y, (int, long, float)), "visualize_video_heatmap(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__ time_intervals = time_intervals if isinstance( time_intervals, list ) else ( [] if (time_intervals is None) else [ time_intervals ] ) assert isinstance( width, (int, long, float)), "visualize_video_heatmap(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__ assert isinstance( height, (int, long, float)), "visualize_video_heatmap(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__ assert isinstance( projection, (basestring)), "visualize_video_heatmap(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__ assert isinstance( video_style, (basestring)), "visualize_video_heatmap(): Argument 'video_style' must be (one) of type(s) '(basestring)'; given %s" % type( video_style ).__name__ assert isinstance( session_key, (basestring)), "visualize_video_heatmap(): Argument 'session_key' must be (one) of type(s) '(basestring)'; given %s" % type( session_key ).__name__ assert isinstance( style_options, (dict)), "visualize_video_heatmap(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__ assert isinstance( options, (dict)), "visualize_video_heatmap(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__ (REQ_SCHEMA, RSP_SCHEMA) = self.__get_schemas( "/visualize/video/heatmap" ) obj = {} obj['table_names'] = table_names obj['x_column_name'] = x_column_name obj['y_column_name'] = y_column_name obj['min_x'] = min_x obj['max_x'] = max_x obj['min_y'] = min_y obj['max_y'] = max_y obj['time_intervals'] = time_intervals obj['width'] = width obj['height'] = height obj['projection'] = projection obj['video_style'] = video_style obj['session_key'] = session_key obj['style_options'] = self.__sanitize_dicts( style_options ) obj['options'] = self.__sanitize_dicts( options ) response = self.__post_then_get_cext( REQ_SCHEMA, RSP_SCHEMA, obj, '/visualize/video/heatmap' ) return AttrDict( response )
# end visualize_video_heatmap # ----------------------------------------------------------------------- # End autogenerated functions # ----------------------------------------------------------------------- # end class GPUdb # --------------------------------------------------------------------------- # Import GPUdbIngestor; try from an installed package first, if not, try local if IS_PYTHON_3: try: from gpudb import GPUdbIngestor, RecordRetriever except: if not gpudb_module_path in sys.path : sys.path.insert(1, gpudb_module_path) from gpudb_multihead_io import GPUdbIngestor, RecordRetriever else: try: from gpudb import GPUdbIngestor, RecordRetriever except: if not gpudb_module_path in sys.path : sys.path.insert(1, gpudb_module_path) from gpudb_multihead_io import GPUdbIngestor, RecordRetriever # done importing GPUdbIngestor # --------------------------------------------------------------------------- # GPUdbTable - Class to Handle GPUdb Tables # ---------------------------------------------------------------------------
[docs]class GPUdbTable( object ): @staticmethod
[docs] def random_name(): """Returns a randomly generated uuid-based name""" return str(uuid.uuid1())
# end random_name @staticmethod
[docs] def prefix_name( val ): """Returns a random name with the specified prefix""" return val + GPUdbTable.random_name()
# end prefix_name def __init__( self, _type = None, name = None, options = None, db = None, read_only_table_count = None, delete_temporary_views = True, temporary_view_names = None, create_views = True, use_multihead_io = False, use_multihead_ingest = False, multihead_ingest_batch_size = 10000, flush_multi_head_ingest_per_insertion = False ): """ Parameters: _type (:class:`RecordType` or :class:`GPUdbRecordType` or list of lists of str) Either a :class:`.GPUdbRecordType` or :class:`RecordType` object which represents a type for the table, or a nested list of lists, where each internal list has the format of: :: # Just the name and type [ "name", "type (double, int etc.)" ] # Name, type, and one column property [ "name", "type (double, int etc.)", "nullable" ] # Name, type, and multiple column properties [ "name", "string", "char4", "nullable" ] Pass None for collections. If creating a GPUdbTable object for a pre-existing table, then also pass None. If no table with the given name exists, then the given type will be created in GPUdb before creating the table. Default is None. name (str) The name for the table. if none provided, then a random name will be generated using :meth:`.random_name`. options (GPUdbTableOptions or dict) A :class:`.GPUdbTableOptions` object or a dict containing options for the table creation. db (GPUdb) A :class:`.GPUdb` object that allows the user to connect to the GPUdb server. read_only_table_count (int) For known read-only tables, provide the number of records in it. Integer. Must provide the name of the table. delete_temporary_views (bool) If true, then in terminal queries--queries that can not be chained--delete the temporary views upon completion. Defaults to True. create_views (bool) Indicates whether or not to create views for this table. temporary_view_names (list) Optional list of temporary view names (that ought to be deleted upon terminal queries) use_multihead_io (bool) Indicates whether or not to use multi-head input and output (meaning ingestion and lookup). Default is False. Note that multi-head ingestion is more computation intensive for sharded tables, and it it probably advisable only if there is a heavy ingestion load. Choose carefully. Please see documentation of parameters *multihead_ingest_batch_size* and *flush_multi_head_ingest_per_insertion* for controlling the multi-head ingestion related behavior. use_multihead_ingest (bool) Indicates whether or not to use multi-head ingestion, if available upon insertion. Note that multi-head ingestion is more computation intensive for sharded tables, and it it probably advisable only if there is a heavy ingestion load. Default is False. Will be deprecated in version 7.0. multihead_ingest_batch_size (int) Used only in conjunction with *use_multihead_ingest*; ignored otherwise. Sets the batch size to be used for the ingestor. Must be greater than zero. Default is 10,000. The multi-head ingestor flushes the inserted records every *multihead_ingest_batch_size* automatically, unless *flush_multi_head_ingest_automatically* is False. Any remaining records would have to be manually flushed using :meth:`.flush_data_to_server` by the user, or will be automatically flushed per :meth:`.insert_records` if *flush_multi_head_ingest_automatically* is True. flush_multi_head_ingest_per_insertion (bool) Used only in conjunction with *use_multihead_ingest*; ignored otherwise. If True, flushes the multi-head ingestor in every :meth:`.insert_records` call. Otherwise, the multi-head ingestor flushes the data to the server when a worker queue reaches *multihead_ingest_batch_size* in size, and any remaining records will have to be manually flushed using :meth:`.flush_data_to_server`. Default True. Returns: A GPUdbTable object. """ # The given DB handle must be a GPUdb instance if not isinstance( db, GPUdb ): raise GPUdbException( "Argument 'db' must be a GPUdb object; " "given %s" % str( type(db) ) ) self.db = db # Save the options (maybe need to convert to a dict) if options: if isinstance( options, GPUdbTableOptions ): self.options = options elif isinstance( options, dict ): self.options = GPUdbTableOptions( options ) else: raise GPUdbException( "Argument 'options' must be either a dict " "or a GPUdbTableOptions object; given '%s'" % str( type( options ) ) ) else: self.options = GPUdbTableOptions() # Save the type self._type = _type if isinstance( _type, RecordType): self.record_type = _type type_info = _type.to_type_schema() self.gpudbrecord_type = GPUdbRecordType( schema_string = type_info["type_definition"], column_properties = type_info["properties"] ) if isinstance( _type, GPUdbRecordType): self.gpudbrecord_type = _type self.record_type = _type.record_type elif not _type: self.gpudbrecord_type = None self.record_type = None else: _type = GPUdbRecordType( _type ) self.gpudbrecord_type = _type self.record_type = _type.record_type # Save passed-in arguments self._delete_temporary_views = delete_temporary_views self.create_views = create_views # Create and update the set of temporary table names self._temporary_view_names = set() if temporary_view_names: self._temporary_view_names.update( temporary_view_names ) # Some default values (assuming it is not a read-only table) self._count = None self._is_read_only = False self._is_collection = False self._collection_name = self.options._collection_name self._type_id = None self._is_replicated = self.options._is_replicated # The table is known to be read only if read_only_table_count is not None: # Integer value 0 accepted if not name: # name must be given! raise GPUdbException( "Table name must be provided with 'read_only_table_count'." ) if not isinstance( read_only_table_count, (int, long) ): raise GPUdbException( "Argument 'read_only_table_count' must be an integer." ) if (read_only_table_count < 0): raise GPUdbException( "Argument 'read_only_table_count' must be greater than " "or equal to zero; given %d" % read_only_table_count ) # All checks pass; save the name and count self.name = name self._count = read_only_table_count self._is_read_only = True # Update the table's type self.__update_table_type() return # Nothing more to do # end if # NOT a known read-only table; need to either get info on it or create it # ----------------------------------------------------------------------- # Create a random table name if none is given self.name = name if name else GPUdbTable.random_name() try: # Does a table with the same name exist already? has_table_rsp = self.db.has_table( self.name ) if not _Util.is_ok( has_table_rsp ): # problem creating the table raise GPUdbException( "Problem checking existence of the table: " + _Util.get_error_msg( has_table_rsp ) ) table_exists = has_table_rsp["table_exists"] # Do different things based on whether the table already exists if table_exists: # Check that the given type agrees with the existing table's type, if any given show_table_rsp = self.db.show_table( self.name, options = {"show_children": "false"} ) if not _Util.is_ok( show_table_rsp ): # problem creating the table raise GPUdbException( "Problem creating the table: " + _Util.get_error_msg( show_table_rsp ) ) # Check if the table is a collection if ( (show_table_rsp[ C._table_descriptions ] == C._collection) or (C._collection in show_table_rsp[ C._table_descriptions ][0]) ): self._is_collection = True else: # need to save the type ID for regular tables self._type_id = show_table_rsp["type_ids"][0] # Also save the name of any collection this table is a part of if ( (C._collection_name in show_table_rsp[ C._additional_info ][0] ) and show_table_rsp[ C._additional_info ][0][ C._collection_name ] ): self._collection_name = show_table_rsp[ C._additional_info ][0][ C._collection_name ] # end if else if not self._is_collection: # not a collection gtable_type = GPUdbRecordType( None, "", show_table_rsp["type_schemas"][0], show_table_rsp["properties"][0] ) table_type = RecordType.from_type_schema( "", show_table_rsp["type_schemas"][0], show_table_rsp["properties"][0] ) else: gtable_type = None table_type = None if ( self.record_type and not table_type ): # TODO: Decide if we should have this check or silently ignore the given type raise GPUdbException( "Table '%s' is an existing collection; so cannot be of the " "given type." % self.name ) if ( self.gpudbrecord_type and (self.gpudbrecord_type != gtable_type) ): raise GPUdbException( "Table '%s' exists; existing table's type does " "not match the given type." % self.name ) # Save the types self.record_type = table_type self.gpudbrecord_type = gtable_type # Check if the table is read-only or not if show_table_rsp[ C._table_descriptions ] in [ C._view, C._join, C._result_table ]: self._is_read_only = True # Check if the table is replicated if ( (show_table_rsp[ C._table_descriptions ] == C._replicated) or (C._replicated in show_table_rsp[ C._table_descriptions ][0]) ): self._is_replicated = True else: # table does not already exist in GPUdb # Create the table (and the type) if self.options._is_collection: # Create a collection rsp_obj = self.db.create_table( self.name, "", self.options.as_dict() ) self._is_collection = True elif self.record_type: # create a regular table type_id = self.gpudbrecord_type.create_type( self.db ) rsp_obj = self.db.create_table( self.name, type_id, self.options.as_dict() ) self._type_id = type_id else: # Need to create a table-hence the type-but none given raise GPUdbException( "Must provide a type to create a new table; none given." ) if not _Util.is_ok( rsp_obj ): # problem creating the table raise GPUdbException( _Util.get_error_msg( rsp_obj ) ) # end if-else except GPUdbException as e: if IS_PYTHON_3: raise GPUdbException( "Error creating GPUdbTable: '{}'" "".format( e ) ) else: raise GPUdbException( "Error creating GPUdbTable: '{}'" "".format( e.message ) ) except Exception as e: # all other exceptions raise GPUdbException( "Error creating GPUdbTable; {}: '{}'" "".format( e.__doc__, str(e) ) ) # Set up multi-head ingestion, if needed if not isinstance( use_multihead_io, bool ): raise GPUdbException( "Argument 'use_multihead_io' must be " "a bool; given '%s'" % str( type( use_multihead_io ) ) ) if not isinstance( use_multihead_ingest, bool ): raise GPUdbException( "Argument 'use_multihead_ingest' must be " "a bool; given '%s'" % str( type( use_multihead_ingest ) ) ) self._multihead_ingestor = None if use_multihead_ingest or use_multihead_io: # Check multihead_ingest_batch_size if ( not isinstance( multihead_ingest_batch_size, (int, long) ) or (multihead_ingest_batch_size < 1) ): raise GPUdbException( "Argument 'multihead_ingest_batch_size' " "must be an integer greater than zero; " "given: " + multihead_ingest_batch_size ) self._multihead_ingestor = GPUdbIngestor( self.db, self.name, self.gpudbrecord_type, multihead_ingest_batch_size, is_table_replicated = self._is_replicated ) # Save the per-insertion-call flushing setting self._flush_multi_head_ingest_per_insertion = flush_multi_head_ingest_per_insertion # Set the function used by multihead ingestor for encoding records # TODO: Convert the multihead ingestor to use the c-extension self._record_encoding_function = lambda vals: GPUdbRecord( self.gpudbrecord_type, vals ) else: # no multi-head ingestion # Set the function used by the regular insertion for encoding records self._record_encoding_function = lambda vals: self.__encode_data_for_insertion_cext( vals ) # self._record_encoding_function = lambda vals: self.__encode_data_for_insertion_avro( vals ) # end if # Set up multi-head record retriever self._multihead_retriever = None if use_multihead_io: self._multihead_retriever = RecordRetriever( self.db, self.name, self.gpudbrecord_type ) # Set the function used by multihead ingestor for encoding records # TODO: Convert the multi-head record retriever to use the c-extension self._record_encoding_function = lambda vals: GPUdbRecord( self.gpudbrecord_type, vals ) else: # no multi-head ingestion # Set the function used by the regular insertion for encoding records self._record_encoding_function = lambda vals: self.__encode_data_for_insertion_cext( vals ) # self._record_encoding_function = lambda vals: self.__encode_data_for_insertion_avro( vals ) # end if # end __init__ def __str__( self ): return self.name # end __str__ def __eq__( self, other ): """Override the equality operator. """ # Check the type of the other object if not isinstance( other, GPUdbTable ): return False # Check the name if (self.name != other.name): return False # Check for GPUdbRecordType equivalency if (self.gpudbrecord_type != other.gpudbrecord_type): return False # TODO: Add the c-extension RecordType class equivalency # Check for the database client handle equivalency if (self.db != other.db): return False return True # end __eq__ def __len__( self ): """Return the current size of the table. If it is a read-only table, then return the cached count; if not a read-only table, get the current size from GPUdb. """ if self._is_read_only: # Get the count, if not known if (self._count is None): show_table_rsp = self.db.show_table( self.name, options = {"get_sizes": "true"} ) if not show_table_rsp.is_ok(): raise GPUdbException( "Problem getting table size: '{}'" "".format( show_table_rsp.get_error_msg() ) ) self._count = show_table_rsp[ C._total_full_size ] # end inner if return self._count # end if read only table # Not a read-only table; get the current size show_table_rsp = self.db.show_table( self.name, options = {"get_sizes": "true"} ) if not show_table_rsp.is_ok(): raise GPUdbException( "Problem getting table size: '{}'" "".format( show_table_rsp.get_error_msg() ) ) return show_table_rsp[ C._total_full_size ] # end __len__
[docs] def size( self ): """Return the table's size/length/count. """ return self.__len__()
# end size def __getitem__( self, key ): """Implement indexing and slicing for the table. """ # A single integer--get a single record if isinstance( key, (int, long) ): if (key < 0): raise TypeError( "GPUdbTable does not support negative indexing" ) return self.get_records( key, 1 ) # end if # Handle slicing if isinstance( key, slice ): if key.step and (key.step != 1): raise TypeError( "GPUdbTable does not support slicing with steps" ) if not isinstance(key.start, (int, long)) or not isinstance(key.stop, (int, long)): raise TypeError( "GPUdbTable slicing requires integers" ) if (key.start < 0): raise TypeError( "GPUdbTable does not support negative indexing" ) if ( (key.stop < 0) and (key.stop != self.db.END_OF_SET) ): raise TypeError( "GPUdbTable does not support negative indexing" ) if ( (key.stop <= key.start) and (key.stop != self.db.END_OF_SET) ): raise IndexError( "GPUdbTable slice start index must be greater than the stop index" ) limit = key.stop if (key.stop == self.db.END_OF_SET) \ else (key.stop - key.start) return self.get_records( key.start, limit ) # end if raise TypeError( "GPUdbTable indexing/slicing requires integers" ) # end __getitem__ def __iter__( self ): """Return a table iterator for this table. Defaults to the first 10,000 records in the table. If needing to access more records, please use the GPUdbTableIterator class directly. """ return GPUdbTableIterator( self ) # end __iter__ def __process_view_name(self, view_name ): """Given a view name, process it as needed. Returns: The processed view name """ # If no view name is given but views ought to be created, get a random name if not view_name: if self.create_views: # will create a view view_name = GPUdbTable.random_name() else: # won't create views view_name = "" # end if return view_name # end __process_view_name def __save_table_type( self, type_schema_str, properties = None ): """Given the type information, save the table's current/new type. """ # A collection can't be changed if self._is_collection: return # No new type given; so no modification was done if (not type_schema_str): return # Save the GPUdbRecordType object self.gpudbrecord_type = GPUdbRecordType( None, "", type_schema_str, properties ) # Save the RecordType C object self.record_type = RecordType.from_type_schema( "", type_schema_str, properties ) # end __save_table_type def __update_table_type( self): """Update the table's type by getting the latest table information (the table type may have been altered by an /alter/table call). Returns: If the type was updated, i.e. the cached type needed to be changed, then returns True. If the cached type is still valid, then returns False. """ show_table_rsp = self.db.show_table( self.name ) # Check if the type ID matches with the cached type type_id = show_table_rsp["type_ids"][0] if (self._type_id == type_id): return False self.__save_table_type( show_table_rsp["type_schemas"][0], show_table_rsp["properties"][0] ) # And also the type ID self._type_id = type_id return True # yes, the type was updated # end __update_table_type @property def table_name( self ): return self.name # end table_name @property def is_read_only( self ): # read-only attribute is_read_only """Is the table read-only, or can we modify it? """ return self._is_read_only # end is_read_only @property def count( self ): # read-only property count """Return the table's size/length/count. """ return self.__len__() # end count @property def is_collection( self ): """Returns True if the table is a collection; False otherwise.""" return self._is_collection # end is_collection @property def collection_name( self ): """Returns the name of the collection this table is a member of; None if this table does not belong to any collection. """ return self._collection_name # end collection_name
[docs] def is_replicated( self ): """Returns True if the table is replicated.""" return self._is_replicated
# end is_replicated
[docs] def get_table_type( self ): """Return the table's (record) type (the GPUdbRecordType object, not the c-extension RecordType).""" return self.gpudbrecord_type
# end get_table_type
[docs] def alias( self, alias ): """Create an alias string for this table. Parameters: alias (str) A string that contains the alias. Returns: A string with the format "this-table-name as alias". """ if not isinstance( alias, (str, unicode) ): raise GPUdbException( "'alias' must be a string; given {0}" "".format( str( type( alias ) ) ) ) return "{0} as {1}".format( self.name, alias )
# end alias
[docs] def create_view( self, view_name, count = None ): """Given a view name and a related response, create a new GPUdbTable object which is a read-only table with the intermediate tables automatically updated. Returns: A :class:`.GPUdbTable` object """ # If the current table is read-only, add it to the list of intermediate # temporary table names if self.is_read_only: self._temporary_view_names.update( [ self.name ] ) view = GPUdbTable( None, name = view_name, read_only_table_count = count, db = self.db, temporary_view_names = self._temporary_view_names ) return view
# end create_view
[docs] def cleanup( self ): """Clear/drop all intermediate tables if settings allow it. Returns: self for enabling chaining method invocations. """ # Clear/drop all temporary tables if self._delete_temporary_views: for view in list(self._temporary_view_names): # iterate over a copy self.db.clear_table( table_name = view ) self._temporary_view_names.remove( view ) else: # We're not allowed to delete intermediate tables! raise GPUdbException( "Not allowed to delete intermediate " "tables." ) return self
# end cleanup
[docs] def exists( self, options = {} ): """Checks for the existence of a table with the given name. Returns: A boolean flag indicating whether the table currently exists in the database. """ response = self.db.has_table( self.name, options = options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response[ "table_exists" ]
# end exists
[docs] def flush_data_to_server( self ): """If multi-head ingestion is enabled, then flush all records in the ingestors' worker queues so that they actually get inserted to the server database. """ if self._multihead_ingestor: self._multihead_ingestor.flush()
# end flush_data_to_server def __encode_data_for_insertion_avro( self, values ): """Encode the given values with the database client's encoding and return the encoded data. """ encoding = self.db._GPUdb__client_to_object_encoding() if encoding is "binary": encoded_record = GPUdbRecord( self.gpudbrecord_type, values ).binary_data else: # JSON encoding encoded_record = GPUdbRecord( self.gpudbrecord_type, values ).json_data_string return encoded_record # end __encode_data_for_insertion_avro def __encode_data_for_insertion_cext( self, values ): """Encode the given values with the database client's encoding and return the encoded data. """ encoding = self.db._GPUdb__client_to_object_encoding() if encoding is "binary": # No encoding is needed here encoded_record = values else: # JSON encoding encoded_record = GPUdbRecord( self.gpudbrecord_type, values ).json_data_string return encoded_record # end __encode_data_for_insertion_cext def __encode_data_for_insertion( self, *args, **kwargs ): """Parse the input and encode the data for insertion. Returns: The encoded data. """ encoded_data = [] # Process the input--single record or multiple records (or invalid syntax)? if args and kwargs: # Cannot give both args and kwargs raise GPUdbException( "Cannot specify both args and kwargs: either provide " "the column values for a single record " "in 'kwargs', or provide column values for any number " "of records in 'args'." ) if kwargs: # Gave the column values for a single record in kwargs encoded_record = self._record_encoding_function( kwargs ) encoded_data.append( encoded_record ) elif not any( _Util.is_list_or_dict( i ) for i in args): # Column values not within a single list/dict: so it is a single record if (isinstance( args[0], GPUdbRecord) or isinstance( args[0], Record) ): encoded_data.append( args[0] ) else: encoded_record = self._record_encoding_function( list(args) ) encoded_data.append( encoded_record ) elif not all( _Util.is_list_or_dict( i ) for i in args): # Some values are lists or dicts, but not all--this is an error case raise GPUdbException( "Arguments must be either contain no list, or contain only " "lists or dicts; i.e. it must not be a mix; " "given {0}".format( args ) ) elif (len( args ) == 1): # A list/dict of length one given if any( _Util.is_list_or_dict( i ) for i in args[0]): # At least one element within the list is also a list if not all( _Util.is_list_or_dict( i ) for i in args[0]): # But not all elements are lists/dict; this is an error case raise GPUdbException( "Arguments must be either a single list, multiple lists, " "a list of lists, or contain no lists; i.e. it must not be " "a mix of lists and non-lists; given a list with mixed " "elements: {0}".format( args ) ) else: # A list of lists/dicts--multiple records within a list for record in args[0]: encoded_record = self._record_encoding_function( record ) encoded_data.append( encoded_record ) # end for # end inner-most if-else else: # A single list--a single record encoded_record = self._record_encoding_function( *args ) encoded_data.append( encoded_record ) # end 2nd inner if-else else: # All arguments are either lists or dicts, so multiple records given for col_vals in args: encoded_record = self._record_encoding_function( col_vals ) encoded_data.append( encoded_record ) # end for # end if-else if not encoded_data: # No data given raise GPUdbException( "Must provide data for at least a single record; none given." ) return encoded_data # end __encode_data_for_insertion def __insert_encoded_records( self, encoded_data, options ): """Given encoded records and some options, insert the records into the respective table in Kinetica. """ # Make the insertion call-- either with the multi-head ingestor or the regular way if self._multihead_ingestor: # Set the multi-head ingestor's options self._multihead_ingestor.options = options try: # Call the insertion funciton response = self._multihead_ingestor.insert_records( encoded_data ) # Need to flush the records, per the setting if self._flush_multi_head_ingest_per_insertion: self._multihead_ingestor.flush() except Exception as e: raise GPUdbException( str(e) ) else: # Call the insert function and check the status response = self.db.insert_records( self.name, encoded_data, options = options, record_type = self.record_type ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # end if-else # end __insert_encoded_records
[docs] def insert_records( self, *args, **kwargs ): """Insert one or more records. Parameters: args Values for all columns of a single record or multiple records. For a single record, use either of the following syntaxes: :: insert_records( 1, 2, 3 ) insert_records( [1, 2, 3] ) For multiple records, use either of the following syntaxes: :: insert_records( [ [1, 2, 3], [4, 5, 6] ] ) insert_records( [1, 2, 3], [4, 5, 6] ) Also, the user can use keyword arguments to pass in values: :: # For a record type with two integers named 'a' and 'b': insert_records( {"a": 1, "b": 1}, {"a": 42, "b": 32} ) # Also can use a list to pass the dicts insert_records( [ {"a": 1, "b": 1}, {"a": 42, "b": 32} ] ) Additionally, the user may provide options for the insertion operation. For example: :: insert_records( [1, 2, 3], [4, 5, 6], options = {"return_record_ids": "true"} ) kwargs Values for all columns for a single record. Mutually exclusive with args (i.e. cannot provide both) when it only contains data. May contain an 'options' keyword arg which will be passed to the database for the insertion operation. Returns: A :class:`.GPUdbTable` object with the the insert_records() response fields converted to attributes and stored within. """ # Extract any options that the user may have provided options = kwargs.get( "options", None ) if options is not None: # if given, remove from kwargs kwargs.pop( "options" ) else: # no option given; use an empty dict options = {} # Encode the data for insertion encoded_data = self.__encode_data_for_insertion( *args, **kwargs ) # self.__insert_encoded_records( encoded_data, options ) try: # if the first attempt fails, we'll check if the table # type has been modified by any chance self.__insert_encoded_records( encoded_data, options ) except GPUdbException as e: if self.__update_table_type(): # The table type indeed had been modified; retry insertion # with the current/new type encoded_data = self.__encode_data_for_insertion( *args, **kwargs ) self.__insert_encoded_records( encoded_data, options ) else: raise # end try-catch return self
# end insert_records
[docs] def insert_records_random( self, count = None, options = {} ): """Generates a specified number of random records and adds them to the given table. There is an optional parameter that allows the user to customize the ranges of the column values. It also allows the user to specify linear profiles for some or all columns in which case linear values are generated rather than random ones. Only individual tables are supported for this operation. This operation is synchronous, meaning that a response will not be returned until all random records are fully available. Parameters: count (long) Number of records to generate. options (dict of dicts of floats) Optional parameter to pass in specifications for the randomness of the values. This map is different from the *options* parameter of most other endpoints in that it is a map of string to map of string to doubles, while most others are maps of string to string. In this map, the top level keys represent which column's parameters are being specified, while the internal keys represents which parameter is being specified. These parameters take on different meanings depending on the type of the column. Below follows a more detailed description of the map: Default value is an empty dict ( {} ). Allowed keys are: * **seed** -- If provided, the internal random number generator will be initialized with the given value. The minimum is 0. This allows for the same set of random numbers to be generated across invocation of this endpoint in case the user wants to repeat the test. Since input parameter *options*, is a map of maps, we need an internal map to provide the seed value. For example, to pass 100 as the seed value through this parameter, you need something equivalent to: 'options' = {'seed': { 'value': 100 } } Allowed keys are: * **value** -- Pass the seed value here. * **all** -- This key indicates that the specifications relayed in the internal map are to be applied to all columns of the records. Allowed keys are: * **min** -- For numerical columns, the minimum of the generated values is set to this value. Default is -99999. For point, shape, and track semantic types, min for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are -180.0 and -90.0. For the 'TIMESTAMP' column, the default minimum corresponds to Jan 1, 2010. For string columns, the minimum length of the randomly generated strings is set to this value (default is 0). If both minimum and maximum are provided, minimum must be less than or equal to max. Value needs to be within [0, 200]. If the min is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track types, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **max** -- For numerical columns, the maximum of the generated values is set to this value. Default is 99999. For point, shape, and track semantic types, max for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are 180.0 and 90.0. For string columns, the maximum length of the randomly generated strings is set to this value (default is 200). If both minimum and maximum are provided, *max* must be greater than or equal to *min*. Value needs to be within [0, 200]. If the *max* is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track types, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **interval** -- If specified, generate values for all columns evenly spaced with the given interval value. If a max value is specified for a given column the data is randomly generated between min and max and decimated down to the interval. If no max is provided the data is linerally generated starting at the minimum value (instead of generating random data). For non-decimated string-type columns the interval value is ignored. Instead the values are generated following the pattern: 'attrname_creationIndex#', i.e. the column name suffixed with an underscore and a running counter (starting at 0). For string types with limited size (eg char4) the prefix is dropped. No nulls will be generated for nullable columns. * **null_percentage** -- If specified, then generate the given percentage of the count as nulls for all nullable columns. This option will be ignored for non-nullable columns. The value must be within the range [0, 1.0]. The default value is 5% (0.05). * **cardinality** -- If specified, limit the randomly generated values to a fixed set. Not allowed on a column with interval specified, and is not applicable to WKT or Track-specific columns. The value must be greater than 0. This option is disabled by default. * **attr_name** -- Set the following parameters for the column specified by the key. This overrides any parameter set by *all*. Allowed keys are: * **min** -- For numerical columns, the minimum of the generated values is set to this value. Default is -99999. For point, shape, and track semantic types, min for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are -180.0 and -90.0. For the 'TIMESTAMP' column, the default minimum corresponds to Jan 1, 2010. For string columns, the minimum length of the randomly generated strings is set to this value (default is 0). If both minimum and maximum are provided, minimum must be less than or equal to max. Value needs to be within [0, 200]. If the min is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track types, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **max** -- For numerical columns, the maximum of the generated values is set to this value. Default is 99999. For point, shape, and track semantic types, max for numeric 'x' and 'y' columns needs to be within [-180, 180] and [-90, 90], respectively. The default minimum possible values for these columns in such cases are 180.0 and 90.0. For string columns, the maximum length of the randomly generated strings is set to this value (default is 200). If both minimum and maximum are provided, *max* must be greater than or equal to *min*. Value needs to be within [0, 200]. If the *max* is outside the accepted ranges for strings columns and 'x' and 'y' columns for point/shape/track types, then those parameters will not be set; however, an error will not be thrown in such a case. It is the responsibility of the user to use the *all* parameter judiciously. * **interval** -- If specified, generate values for all columns evenly spaced with the given interval value. If a max value is specified for a given column the data is randomly generated between min and max and decimated down to the interval. If no max is provided the data is linerally generated starting at the minimum value (instead of generating random data). For non-decimated string-type columns the interval value is ignored. Instead the values are generated following the pattern: 'attrname_creationIndex#', i.e. the column name suffixed with an underscore and a running counter (starting at 0). For string types with limited size (eg char4) the prefix is dropped. No nulls will be generated for nullable columns. * **null_percentage** -- If specified and if this column is nullable, then generate the given percentage of the count as nulls. This option will result in an error if the column is not nullable. The value must be within the range [0, 1.0]. The default value is 5% (0.05). * **cardinality** -- If specified, limit the randomly generated values to a fixed set. Not allowed on a column with interval specified, and is not applicable to WKT or Track-specific columns. The value must be greater than 0. This option is disabled by default. * **track_length** -- This key-map pair is only valid for track type data sets (an error is thrown otherwise). No nulls would be generated for nullable columns. Allowed keys are: * **min** -- Minimum possible length for generated series; default is 100 records per series. Must be an integral value within the range [1, 500]. If both min and max are specified, min must be less than or equal to max. * **max** -- Maximum possible length for generated series; default is 500 records per series. Must be an integral value within the range [1, 500]. If both min and max are specified, max must be greater than or equal to min. Returns: A GPUdbTable object with the the insert_records() response fields converted to attributes (and stored within) with the following entries: table_name (str) Value of input parameter *table_name*. count (long) Value of input parameter *count*. """ response = self.db.insert_records_random( self.name, count = count, options = options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # We can return self
# end insert_records_random
[docs] def get_records_by_key( self, key_values, expression = "" ): """Fetches the record(s) from the appropriate worker rank directly (or, if multi-head record retrieval is not set up, then from the head node) that map to the given shard key. Parameters: key_values (list or dict) Values for the sharding columns of the record to fetch either in a list (then it is assumed to be in the order of the sharding keys in the record type) or a dict. Must not have any missing sharding/primary column value or any extra column values. expression (str) Optional parameter. If given, it is passed to /get/records as a filter expression. Returns: The decoded records. """ if not self._multihead_retriever: raise GPUdbException( "Record retrieval by sharding/primary keys " "is not set up for this table." ) return self._multihead_retriever.get_records_by_key( key_values, expression )
# end get_records_by_key
[docs] def get_records( self, offset = 0, limit = 10000, encoding = 'binary', options = {}, force_primitive_return_types = True ): """Retrieves records from a given table, optionally filtered by an expression and/or sorted by a column. This operation can be performed on tables, views, or on homogeneous collections (collections containing tables of all the same type). Records can be returned encoded as binary or json. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when paging through a table, if the table (or the underlying table in case of a view) is updated (records are inserted, deleted or modified) the records retrieved may differ between calls based on the updates applied. Decodes and returns the fetched records. Parameters: offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). Default value is 0. The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. Default value is 10000. encoding (str) Specifies the encoding for returned records. Default value is 'binary'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str) Default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **fast_index_lookup** -- Indicates if indexes should be used to perform the lookup for a given expression if possible. Only applicable if there is no sorting, the expression contains only equivalence comparisons based on existing tables indexes and the range of requested values is from [0 to END_OF_SET]. The default value is true. * **sort_by** -- Optional column that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A list of :class:`Record` objects containg the record values. """ response = self.db.get_records_and_decode( self.name, offset, limit, encoding, options, record_type = self.record_type, force_primitive_return_types = force_primitive_return_types ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # Double check that the type ID is the same if (response.type_name != self._type_id): # The table's type seems to have changed; update it! self.__update_table_type() # And re-submit the /get/records call response = self.db.get_records_and_decode( self.name, offset, limit, encoding, options, record_type = self.record_type, force_primitive_return_types = force_primitive_return_types ) # end if # Return just the records; disregard the extra info within the response return response.records
# end get_records
[docs] def get_records_by_column( self, column_names, offset = 0, limit = 10000, encoding = 'binary', options = {}, print_data = False, force_primitive_return_types = True, get_column_major = True ): """For a given table, retrieves the values of the given columns within a given range. It returns maps of column name to the vector of values for each supported data type (double, float, long, int and string). This operation supports pagination feature, i.e. values that are retrieved are those associated with the indices between the start (offset) and end value (offset + limit) parameters (inclusive). If there are num_points values in the table then each of the indices between 0 and num_points-1 retrieves a unique value. Note that when using the pagination feature, if the table (or the underlying table in case of a view) is updated (records are inserted, deleted or modified) the records or values retrieved may differ between calls (discontiguous or overlap) based on the type of the update. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../concepts/dynamic_schemas.html>`_. Parameters: column_names (list of str) The list of column values to retrieve. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned (if not provided the default is 10000), or END_OF_SET (-9999) to indicate that the maximum number of results allowed by the server should be returned. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Default value is 'binary'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str) Default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **sort_by** -- Optional column that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. Default is 'ascending'. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. print_data (bool) If True, print the fetched data to the console in a tabular format if the data is being returned in the column-major format. Default is False. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Decodes the fetched records and saves them in the response class in an attribute called data. Returns: A dict of column name to column values for column-major data, or a list of :class:`Record` objects for row-major data. """ # Issue the /get/records/bycolumn query response = self.db.get_records_by_column_and_decode( self.name, column_names, offset, limit, encoding, options, force_primitive_return_types = force_primitive_return_types, get_column_major = get_column_major ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # Get the records out data = response[ "records" ] # Print the date, if desired if print_data and get_column_major: print( tabulate( data , headers = 'keys', tablefmt = 'psql') ) # Else, return the decoded records return data
# end get_records_by_column
[docs] def get_records_by_series( self, world_table_name = None, offset = 0, limit = 250, encoding = 'binary', options = {}, force_primitive_return_types = True ): """Retrieves the complete series/track records from the given input parameter *world_table_name* based on the partial track information contained in the input parameter *table_name*. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. In contrast to :meth:`.get_records` this returns records grouped by series/track. So if input parameter *offset* is 0 and input parameter *limit* is 5 this operation would return the first 5 series/tracks in input parameter *table_name*. Each series/track will be returned sorted by their TIMESTAMP column. Parameters: world_table_name (str) Name of the table containing the complete series/track information to be returned for the tracks present in the input parameter *table_name*. Typically this is used when retrieving series/tracks from a view (which contains partial series/tracks) but the user wants to retrieve the entire original series/tracks. Can be blank. offset (int) A positive integer indicating the number of initial series/tracks to skip (useful for paging through the results). Default value is 0. The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (int) A positive integer indicating the maximum number of series/tracks to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. Default value is 250. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Default value is 'binary'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str) Optional parameters. Default value is an empty dict ( {} ). force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A list of list of :class:`Record` objects containing the record values. Each external record corresponds to a single track (or series). """ # Issue the /get/records/byseries query response = self.db.get_records_by_series_and_decode( self.name, world_table_name = world_table_name, offset = offset, limit = limit, encoding = encoding, options = options, force_primitive_return_types = force_primitive_return_types ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # Return just the records; disregard the extra info within the response return response.records
# end get_records_by_series
[docs] def get_records_from_collection( self, offset = 0, limit = 10000, encoding = 'binary', options = {}, force_primitive_return_types = True ): """Retrieves records from a collection. The operation can optionally return the record IDs which can be used in certain queries such as :meth:`.delete_records`. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when using the Java API, it is not possible to retrieve records from join tables using this operation. Parameters: offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). Default value is 0. The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned, or END_OF_SET (-9999) to indicate that the max number of results should be returned. Default value is 10000. encoding (str) Specifies the encoding for returned records; either 'binary' or 'json'. Default value is 'binary'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str) Default value is an empty dict ( {} ). Allowed keys are: * **return_record_ids** -- If 'true' then return the internal record ID along with each returned record. Default is 'false'. Allowed values are: * true * false The default value is 'false'. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A list of :class:`Record` objects containing the record values. """ # Issue the /get/records/fromcollection query response = self.db.get_records_from_collection_and_decode( self.name, offset, limit, encoding, options, force_primitive_return_types = force_primitive_return_types ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # Return just the records; disregard the extra info within the response return response.records
# end get_records_from_collection
[docs] def get_geo_json( self, offset = 0, limit = 10000, options = {}, force_primitive_return_types = True ): """Retrieves records as a GeoJSON from a given table, optionally filtered by an expression and/or sorted by a column. This operation can be performed on tables, views, or on homogeneous collections (collections containing tables of all the same type). Records can be returned encoded as binary or json. This operation supports paging through the data via the input parameter *offset* and input parameter *limit* parameters. Note that when paging through a table, if the table (or the underlying table in case of a view) is updated (records are inserted, deleted or modified) the records retrieved may differ between calls based on the updates applied. Decodes and returns the fetched records. Parameters: offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). Default value is 0. The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. Default value is 10000. encoding (str) Specifies the encoding for returned records. Default value is 'binary'. Allowed values are: * binary * json The default value is 'binary'. options (dict of str) Default value is an empty dict ( {} ). Allowed keys are: * **expression** -- Optional filter expression to apply to the table. * **fast_index_lookup** -- Indicates if indexes should be used to perform the lookup for a given expression if possible. Only applicable if there is no sorting, the expression contains only equivalence comparisons based on existing tables indexes and the range of requested values is from [0 to END_OF_SET]. The default value is true. * **sort_by** -- Optional column that the data should be sorted by. Empty by default (i.e. no sorting is applied). * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. If sort_order is provided, sort_by has to be provided. Allowed values are: * ascending * descending The default value is 'ascending'. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. Returns: A GeoJSON object (a dict) containg the record values. """ response = self.db.get_records_and_decode( self.name, offset, limit, "geojson", options, record_type = self.record_type, force_primitive_return_types = force_primitive_return_types ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) # Return just the records; disregard the extra info within the response return response.records[0]
# end get_geo_json @staticmethod
[docs] def create_join_table( db, join_table_name = None, table_names = None, column_names = None, expressions = [], options = {} ): """Creates a table that is the result of a SQL JOIN. For join details and examples see: `Joins <../../../concepts/joins.html>`_. For limitations, see `Join Limitations and Cautions <../../../concepts/joins.html#limitations-cautions>`_. Parameters: join_table_name (str) Name of the join table to be created. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. table_names (list of str) The list of table names composing the join. Corresponds to a SQL statement FROM clause. The user can provide a single element (which will be automatically promoted to a list internally) or a list. column_names (list of str) List of member table columns or column expressions to be included in the join. Columns can be prefixed with 'table_id.column_name', where 'table_id' is the table name or alias. Columns can be aliased via the syntax 'column_name as alias'. Wild cards '*' can be used to include all columns across member tables or 'table_id.*' for all of a single table's columns. Columns and column expressions composing the join must be uniquely named or aliased--therefore, the '*' wild card cannot be used if column names aren't unique across all tables. The user can provide a single element (which will be automatically promoted to a list internally) or a list. expressions (list of str) An optional list of expressions to combine and filter the joined tables. Corresponds to a SQL statement WHERE clause. For details see: `expressions <../../../concepts/expressions.html>`_. The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the join. If the collection provided is non-existent, the collection will be automatically created. If empty, then the join will be at the top level. The default value is ''. * **max_query_dimensions** -- The maximum number of tables in a join that can be accessed by a query and are not equated by a foreign-key to primary-key equality predicate * **optimize_lookups** -- Use more memory to speed up the joining of tables. Allowed values are: * true * false The default value is 'false'. * **refresh_method** -- Method by which the join can be refreshed when the data in underlying member tables have changed. Allowed values are: * **manual** -- refresh only occurs when manually requested by calling this endpoint with refresh option set to *refresh* or *full_refresh* * **on_query** -- incrementally refresh (refresh just those records added) whenever a new query is issued and new data is inserted into the base table. A full refresh of all the records occurs when a new query is issued and there have been inserts to any non-base-tables since the last query. `TTL <../../../concepts/ttl.html>`_ will be set to not expire; any *ttl* specified will be ignored. * **on_insert** -- incrementally refresh (refresh just those records added) whenever new data is inserted into a base table. A full refresh of all the records occurs when a new query is issued and there have been inserts to any non-base-tables since the last query. `TTL <../../../concepts/ttl.html>`_ will be set to not expire; any *ttl* specified will be ignored. The default value is 'manual'. * **refresh** -- Do a manual refresh of the join if it exists - throws an error otherwise. Allowed values are: * **no_refresh** -- don't refresh * **refresh** -- incrementally refresh (refresh just those records added) if new data has been inserted into the base table. A full refresh of all the records occurs if there have been inserts to any non-base-tables since the last refresh * **full_refresh** -- always refresh even if no new records have been added. Only refresh method guaranteed to do a full refresh (refresh all the records) if a delete or update has occurred since the last refresh. The default value is 'no_refresh'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the join table specified in input parameter *join_table_name*. Ignored if *refresh_method* is either *on_insert* or *on_query*. * **view_id** -- view this projection is part of. The default value is ''. * **no_count** -- return a count of 0 for the join table for logging and for show_table. optimization needed for large overlapped equi-join stencils. The default value is 'false'. * **chunk_size** -- Maximum size of a joined-chunk for this table. Defaults to the gpudb.conf file chunk size * **allow_right_primary_key_join** -- When true allows right joins from a key to a primary key to be done as primary key joins. Such a join table cannot be joined to other join tables. When false the right join shall be done as an equi-join. The default value is 'false'. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ # Create a random table name if none is given join_table_name = join_table_name if join_table_name else GPUdbTable.random_name() # Normalize the input table names table_names = table_names if isinstance( table_names, list ) else [ table_names ] table_names = [ t.name if isinstance(t, GPUdbTable) else t for t in table_names ] # The given DB handle must be a GPUdb instance if not isinstance( db, GPUdb ): raise GPUdbException( "Argument 'db' must be a GPUdb object; " "given %s" % str( type( db ) ) ) response = db.create_join_table( join_table_name, table_names, column_names, expressions, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return GPUdbTable( None, name = join_table_name, db = db )
# end create_join_table @staticmethod
[docs] def create_union( db, table_name = None, table_names = None, input_column_names = None, output_column_names = None, options = {} ): """Merges data from one or more tables with comparable data types into a new table. The following merges are supported: UNION (DISTINCT/ALL) - For data set union details and examples, see `Union <../../../concepts/unions.html>`_. For limitations, see `Union Limitations and Cautions <../../../concepts/unions.html#limitations-and-cautions>`_. INTERSECT (DISTINCT/ALL) - For data set intersection details and examples, see `Intersect <../../../concepts/intersect.html>`_. For limitations, see `Intersect Limitations <../../../concepts/intersect.html#limitations>`_. EXCEPT (DISTINCT/ALL) - For data set subtraction details and examples, see `Except <../../../concepts/except.html>`_. For limitations, see `Except Limitations <../../../concepts/except.html#limitations>`_. MERGE VIEWS - For a given set of `filtered views <../../../concepts/filtered_views.html>`_ on a single table, creates a single filtered view containing all of the unique records across all of the given filtered data sets. Non-charN 'string' and 'bytes' column types cannot be merged, nor can columns marked as `store-only <../../../concepts/types.html#data-handling>`_. Parameters: table_name (str) Name of the table to be created. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. table_names (list of str) The list of table names to merge. Must contain the names of one or more existing tables. The user can provide a single element (which will be automatically promoted to a list internally) or a list. input_column_names (list of lists of str) The list of columns from each of the corresponding input tables. The user can provide a single element (which will be automatically promoted to a list internally) or a list. output_column_names (list of str) The list of names of the columns to be stored in the output table. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the output table. If the collection provided is non-existent, the collection will be automatically created. If empty, the output table will be a top-level table. The default value is ''. * **materialize_on_gpu** -- If *true*, then the columns of the output table will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **mode** -- If *merge_views*, then this operation will merge the provided views. All input parameter *table_names* must be views from the same underlying base table. Allowed values are: * **union_all** -- Retains all rows from the specified tables. * **union** -- Retains all unique rows from the specified tables (synonym for *union_distinct*). * **union_distinct** -- Retains all unique rows from the specified tables. * **except** -- Retains all unique rows from the first table that do not appear in the second table (only works on 2 tables). * **except_all** -- Retains all rows(including duplicates) from the first table that do not appear in the second table (only works on 2 tables). * **intersect** -- Retains all unique rows that appear in both of the specified tables (only works on 2 tables). * **intersect_all** -- Retains all rows(including duplicates) that appear in both of the specified tables (only works on 2 tables). * **merge_views** -- Merge two or more views (or views of views) of the same base data set into a new view. If this mode is selected input parameter *input_column_names* AND input parameter *output_column_names* must be empty. The resulting view would match the results of a SQL OR operation, e.g., if filter 1 creates a view using the expression 'x = 20' and filter 2 creates a view using the expression 'x <= 10', then the merge views operation creates a new view using the expression 'x = 20 OR x <= 10'. The default value is 'union_all'. * **chunk_size** -- Indicates the chunk size to be used for this table. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the output table. The columns specified must be present in input parameter *output_column_names*. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in input parameter *table_name*. * **persist** -- If *true*, then the table specified in input parameter *table_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **view_id** -- view the output table will be a part of. The default value is ''. * **force_replicated** -- If *true*, then the table specified in input parameter *table_name* will be replicated even if the source tables are not. Allowed values are: * true * false The default value is 'false'. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ # Create a random table name if none is given table_name = table_name if table_name else GPUdbTable.random_name() # Normalize the input table names table_names = table_names if isinstance( table_names, list ) else [ table_names ] table_names = [ t.name if isinstance(t, GPUdbTable) else t for t in table_names ] # The given DB handle must be a GPUdb instance if not isinstance( db, GPUdb ): raise GPUdbException( "Argument 'db' must be a GPUdb object; " "given %s" % str( type( db ) ) ) response = db.create_union( table_name, table_names, input_column_names, output_column_names, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return GPUdbTable( None, name = table_name, db = db )
# end create_union @staticmethod
[docs] def merge_records( db, table_name = None, source_table_names = None, field_maps = None, options = {} ): """Create a new empty result table (specified by input parameter *table_name*), and insert all records from source tables (specified by input parameter *source_table_names*) based on the field mapping information (specified by input parameter *field_maps*). For merge records details and examples, see `Merge Records <../../../concepts/merge_records.html>`_. For limitations, see `Merge Records Limitations and Cautions <../../../concepts/merge_records.html#limitations-and-cautions>`_. The field map (specified by input parameter *field_maps*) holds the user-specified maps of target table column names to source table columns. The array of input parameter *field_maps* must match one-to-one with the input parameter *source_table_names*, e.g., there's a map present in input parameter *field_maps* for each table listed in input parameter *source_table_names*. Parameters: table_name (str) The new result table name for the records to be merged. Must NOT be an existing table. source_table_names (list of str) The list of source table names to get the records from. Must be existing table names. The user can provide a single element (which will be automatically promoted to a list internally) or a list. field_maps (list of dicts of str to str) Contains a list of source/target column mappings, one mapping for each source table listed in input parameter *source_table_names* being merged into the target table specified by input parameter *table_name*. Each mapping contains the target column names (as keys) that the data in the mapped source columns or column `expressions <../../../concepts/expressions.html>`_ (as values) will be merged into. All of the source columns being merged into a given target column must match in type, as that type will determine the type of the new target column. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created merged table specified by input parameter *table_name*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created merged table will be a top-level table. * **is_replicated** -- Indicates the `distribution scheme <../../../concepts/tables.html#distribution>`_ for the data of the merged table specified in input parameter *table_name*. If true, the table will be `replicated <../../../concepts/tables.html#replication>`_. If false, the table will be `randomly sharded <../../../concepts/tables.html#random-sharding>`_. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the merged table specified in input parameter *table_name*. * **persist** -- If *true*, then the table specified in input parameter *table_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'true'. * **chunk_size** -- Indicates the chunk size to be used for the merged table specified in input parameter *table_name*. * **view_id** -- view this result table is part of. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ # Create a random table name if none is given table_name = table_name if table_name else GPUdbTable.random_name() # Normalize the input table names source_table_names = source_table_names if isinstance( source_table_names, list ) else [ source_table_names ] source_table_names = [ t.name if isinstance(t, GPUdbTable) else t for t in source_table_names ] # The given DB handle must be a GPUdb instance if not isinstance( db, GPUdb ): raise GPUdbException( "Argument 'db' must be a GPUdb object; " "given %s" % str( type( db ) ) ) response = db.merge_records( table_name, source_table_names, field_maps, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return GPUdbTable( None, name = table_name, db = db )
# end merge_records
[docs] def aggregate_convex_hull( self, x_column_name = None, y_column_name = None, options = {} ): """Calculates and returns the convex hull for the values in a table specified by input parameter *table_name*. Parameters: x_column_name (str) Name of the column containing the x coordinates of the points for the operation being performed. y_column_name (str) Name of the column containing the y coordinates of the points for the operation being performed. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- x_vector (list of floats) Array of x coordinates of the resulting convex set. y_vector (list of floats) Array of y coordinates of the resulting convex set. count (int) Count of the number of points in the convex set. is_valid (bool) Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_convex_hull( self.name, x_column_name, y_column_name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_convex_hull
[docs] def aggregate_group_by( self, column_names = None, offset = None, limit = 1000, encoding = 'binary', options = {}, force_primitive_return_types = True, get_column_major = True ): """Calculates unique combinations (groups) of values for the given columns in a given table/view/collection and computes aggregates on each unique combination. This is somewhat analogous to an SQL-style SELECT...GROUP BY. For aggregation details and examples, see `Aggregation <../../../concepts/aggregation.html>`_. For limitations, see `Aggregation Limitations <../../../concepts/aggregation.html#limitations>`_. Any column(s) can be grouped on, and all column types except unrestricted-length strings may be used for computing applicable aggregates; columns marked as `store-only <../../../concepts/types.html#data-handling>`_ are unable to be used in grouping or aggregation. The results can be paged via the input parameter *offset* and input parameter *limit* parameters. For example, to get 10 groups with the largest counts the inputs would be: limit=10, options={"sort_order":"descending", "sort_by":"value"}. Input parameter *options* can be used to customize behavior of this call e.g. filtering or sorting the results. To group by columns 'x' and 'y' and compute the number of objects within each group, use: column_names=['x','y','count(*)']. To also compute the sum of 'z' over each group, use: column_names=['x','y','count(*)','sum(z)']. Available `aggregation functions <../../../concepts/expressions.html#aggregate-expressions>`_ are: count(*), sum, min, max, avg, mean, stddev, stddev_pop, stddev_samp, var, var_pop, var_samp, arg_min, arg_max and count_distinct. Available grouping functions are `Rollup <../../../concepts/rollup.html>`_, `Cube <../../../concepts/cube.html>`_, and `Grouping Sets <../../../concepts/grouping_sets.html>`_ This service also provides support for `Pivot <../../../concepts/pivot.html>`_ operations. Filtering on aggregates is supported via expressions using `aggregation functions <../../../concepts/expressions.html#aggregate-expressions>`_ supplied to *having*. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. If a *result_table* name is specified in the input parameter *options*, the results are stored in a new table with that name--no results are returned in the response. Both the table name and resulting column names must adhere to `standard naming conventions <../../../concepts/tables.html#table>`_; column/aggregation expressions will need to be aliased. If the source table's `shard key <../../../concepts/tables.html#shard-keys>`_ is used as the grouping column(s) and all result records are selected (input parameter *offset* is 0 and input parameter *limit* is -9999), the result table will be sharded, in all other cases it will be replicated. Sorting will properly function only if the result table is replicated or if there is only one processing node and should not be relied upon in other cases. Not available when any of the values of input parameter *column_names* is an unrestricted-length string. Parameters: column_names (list of str) List of one or more column names, expressions, and aggregate expressions. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 1000. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. Additionally this option is invalid if input parameter *table_name* is a collection. * **expression** -- Filter expression to apply to the table prior to computing the aggregate group by. * **having** -- Filter expression to apply to the aggregated results. * **sort_order** -- String indicating how the returned values should be sorted - ascending or descending. Allowed values are: * **ascending** -- Indicates that the returned values should be sorted in ascending order. * **descending** -- Indicates that the returned values should be sorted in descending order. The default value is 'ascending'. * **sort_by** -- String determining how the results are sorted. Allowed values are: * **key** -- Indicates that the returned values should be sorted by key, which corresponds to the grouping columns. If you have multiple grouping columns (and are sorting by key), it will first sort the first grouping column, then the second grouping column, etc. * **value** -- Indicates that the returned values should be sorted by value, which corresponds to the aggregates. If you have multiple aggregates (and are sorting by value), it will first sort by the first aggregate, then the second aggregate, etc. The default value is 'value'. * **result_table** -- The name of the table used to store the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Column names (group-by and aggregate fields) need to be given aliases e.g. ["FChar256 as fchar256", "sum(FDouble) as sfd"]. If present, no results are returned in the response. This option is not available if one of the grouping attributes is an unrestricted string (i.e.; not charN) type. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **result_table_generate_pk** -- If 'true' then set a primary key for the result table. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the result table. Must be used in combination with the *result_table* option. * **view_id** -- view this result table is part of. The default value is ''. * **materialize_on_gpu** -- If *true* then the columns of the groupby result table will be cached on the GPU. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **pivot** -- pivot column * **pivot_values** -- The value list provided will become the column headers in the output. Should be the values from the pivot_column. * **grouping_sets** -- Customize the grouping attribute sets to compute the aggregates. These sets can include ROLLUP or CUBE operartors. The attribute sets should be enclosed in paranthesis and can include composite attributes. All attributes specified in the grouping sets must present in the groupby attributes. * **rollup** -- This option is used to specify the multilevel aggregates. * **cube** -- This option is used to specify the multidimensional aggregates. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A read-only GPUdbTable object if input options has "result_table"; otherwise the response from the server, which is a dict containing the following entries-- response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. data (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. Raises: GPUdbException -- Upon an error from the server. """ if "result_table" in options: result_table = options[ "result_table" ] else: result_table = None response = self.db.aggregate_group_by_and_decode( self.name, column_names, offset, limit, encoding, options, force_primitive_return_types= force_primitive_return_types, get_column_major = get_column_major ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) response["data"] = response["records"] if result_table: # Create a read-only table for the result table return self.create_view( result_table, response[ "total_number_of_records" ] ) return response
# end aggregate_group_by
[docs] def aggregate_histogram( self, column_name = None, start = None, end = None, interval = None, options = {} ): """Performs a histogram calculation given a table, a column, and an interval function. The input parameter *interval* is used to produce bins of that size and the result, computed over the records falling within each bin, is returned. For each bin, the start value is inclusive, but the end value is exclusive--except for the very last bin for which the end value is also inclusive. The value returned for each bin is the number of records in it, except when a column name is provided as a *value_column*. In this latter case the sum of the values corresponding to the *value_column* is used as the result instead. The total number of bins requested cannot exceed 10,000. NOTE: The Kinetica instance being accessed must be running a CUDA (GPU-based) build to service a request that specifies a *value_column* option. Parameters: column_name (str) Name of a column or an expression of one or more column names over which the histogram will be calculated. start (float) Lower end value of the histogram interval, inclusive. end (float) Upper end value of the histogram interval, inclusive. interval (float) The size of each bin within the start and end parameters. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **value_column** -- The name of the column to use when calculating the bin values (values are summed). The column must be a numerical type (int, double, long, float). Returns: The response from the server which is a dict containing the following entries-- counts (list of floats) The array of calculated values that represents the histogram data points. start (float) Value of input parameter *start*. end (float) Value of input parameter *end*. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_histogram( self.name, column_name, start, end, interval, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_histogram
[docs] def aggregate_k_means( self, column_names = None, k = None, tolerance = None, options = {} ): """This endpoint runs the k-means algorithm - a heuristic algorithm that attempts to do k-means clustering. An ideal k-means clustering algorithm selects k points such that the sum of the mean squared distances of each member of the set to the nearest of the k points is minimized. The k-means algorithm however does not necessarily produce such an ideal cluster. It begins with a randomly selected set of k points and then refines the location of the points iteratively and settles to a local minimum. Various parameters and options are provided to control the heuristic search. NOTE: The Kinetica instance being accessed must be running a CUDA (GPU-based) build to service this request. Parameters: column_names (list of str) List of column names on which the operation would be performed. If n columns are provided then each of the k result points will have n dimensions corresponding to the n columns. The user can provide a single element (which will be automatically promoted to a list internally) or a list. k (int) The number of mean points to be determined by the algorithm. tolerance (float) Stop iterating when the distances between successive points is less than the given tolerance. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **whiten** -- When set to 1 each of the columns is first normalized by its stdv - default is not to whiten. * **max_iters** -- Number of times to try to hit the tolerance limit before giving up - default is 10. * **num_tries** -- Number of times to run the k-means algorithm with a different randomly selected starting points - helps avoid local minimum. Default is 1. Returns: The response from the server which is a dict containing the following entries-- means (list of lists of floats) The k-mean values found. counts (list of longs) The number of elements in the cluster closest the corresponding k-means values. rms_dists (list of floats) The root mean squared distance of the elements in the cluster for each of the k-means values. count (long) The total count of all the clusters - will be the size of the input table. rms_dist (float) The sum of all the rms_dists - the value the k-means algorithm is attempting to minimize. tolerance (float) The distance between the last two iterations of the algorithm before it quit. num_iters (int) The number of iterations the algorithm executed before it quit. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_k_means( self.name, column_names, k, tolerance, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_k_means
[docs] def aggregate_min_max( self, column_name = None, options = {} ): """Calculates and returns the minimum and maximum values of a particular column in a table. Parameters: column_name (str) Name of a column or an expression of one or more column on which the min-max will be calculated. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- min (float) Minimum value of the input parameter *column_name*. max (float) Maximum value of the input parameter *column_name*. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_min_max( self.name, column_name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_min_max
[docs] def aggregate_min_max_geometry( self, column_name = None, options = {} ): """Calculates and returns the minimum and maximum x- and y-coordinates of a particular geospatial geometry column in a table. Parameters: column_name (str) Name of a geospatial geometry column on which the min-max will be calculated. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- min_x (float) Minimum x-coordinate value of the input parameter *column_name*. max_x (float) Maximum x-coordinate value of the input parameter *column_name*. min_y (float) Minimum y-coordinate value of the input parameter *column_name*. max_y (float) Maximum y-coordinate value of the input parameter *column_name*. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_min_max_geometry( self.name, column_name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_min_max_geometry
[docs] def aggregate_statistics( self, column_name = None, stats = None, options = {} ): """Calculates the requested statistics of the given column(s) in a given table. The available statistics are *count* (number of total objects), *mean*, *stdv* (standard deviation), *variance*, *skew*, *kurtosis*, *sum*, *min*, *max*, *weighted_average*, *cardinality* (unique count), *estimated_cardinality*, *percentile* and *percentile_rank*. Estimated cardinality is calculated by using the hyperloglog approximation technique. Percentiles and percentile ranks are approximate and are calculated using the t-digest algorithm. They must include the desired *percentile*/*percentile_rank*. To compute multiple percentiles each value must be specified separately (i.e. 'percentile(75.0),percentile(99.0),percentile_rank(1234.56),percentile_rank(-5)'). A second, comma-separated value can be added to the *percentile* statistic to calculate percentile resolution, e.g., a 50th percentile with 200 resolution would be 'percentile(50,200)'. The weighted average statistic requires a *weight_column_name* to be specified in input parameter *options*. The weighted average is then defined as the sum of the products of input parameter *column_name* times the *weight_column_name* values divided by the sum of the *weight_column_name* values. Additional columns can be used in the calculation of statistics via the *additional_column_names* option. Values in these columns will be included in the overall aggregate calculation--individual aggregates will not be calculated per additional column. For instance, requesting the *count* & *mean* of input parameter *column_name* x and *additional_column_names* y & z, where x holds the numbers 1-10, y holds 11-20, and z holds 21-30, would return the total number of x, y, & z values (30), and the single average value across all x, y, & z values (15.5). The response includes a list of key/value pairs of each statistic requested and its corresponding value. Parameters: column_name (str) Name of the primary column for which the statistics are to be calculated. stats (str) Comma separated list of the statistics to calculate, e.g. "sum,mean". Allowed values are: * **count** -- Number of objects (independent of the given column(s)). * **mean** -- Arithmetic mean (average), equivalent to sum/count. * **stdv** -- Sample standard deviation (denominator is count-1). * **variance** -- Unbiased sample variance (denominator is count-1). * **skew** -- Skewness (third standardized moment). * **kurtosis** -- Kurtosis (fourth standardized moment). * **sum** -- Sum of all values in the column(s). * **min** -- Minimum value of the column(s). * **max** -- Maximum value of the column(s). * **weighted_average** -- Weighted arithmetic mean (using the option *weight_column_name* as the weighting column). * **cardinality** -- Number of unique values in the column(s). * **estimated_cardinality** -- Estimate (via hyperloglog technique) of the number of unique values in the column(s). * **percentile** -- Estimate (via t-digest) of the given percentile of the column(s) (percentile(50.0) will be an approximation of the median). Add a second, comma-separated value to calculate percentile resolution, e.g., 'percentile(75,150)' * **percentile_rank** -- Estimate (via t-digest) of the percentile rank of the given value in the column(s) (if the given value is the median of the column(s), percentile_rank(<median>) will return approximately 50.0). options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **additional_column_names** -- A list of comma separated column names over which statistics can be accumulated along with the primary column. All columns listed and input parameter *column_name* must be of the same type. Must not include the column specified in input parameter *column_name* and no column can be listed twice. * **weight_column_name** -- Name of column used as weighting attribute for the weighted average statistic. Returns: The response from the server which is a dict containing the following entries-- stats (dict of str to floats) (statistic name, double value) pairs of the requested statistics, including the total count by default. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_statistics( self.name, column_name, stats, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_statistics
[docs] def aggregate_statistics_by_range( self, select_expression = '', column_name = None, value_column_name = None, stats = None, start = None, end = None, interval = None, options = {} ): """Divides the given set into bins and calculates statistics of the values of a value-column in each bin. The bins are based on the values of a given binning-column. The statistics that may be requested are mean, stdv (standard deviation), variance, skew, kurtosis, sum, min, max, first, last and weighted average. In addition to the requested statistics the count of total samples in each bin is returned. This counts vector is just the histogram of the column used to divide the set members into bins. The weighted average statistic requires a weight_column to be specified in input parameter *options*. The weighted average is then defined as the sum of the products of the value column times the weight column divided by the sum of the weight column. There are two methods for binning the set members. In the first, which can be used for numeric valued binning-columns, a min, max and interval are specified. The number of bins, nbins, is the integer upper bound of (max-min)/interval. Values that fall in the range [min+n*interval,min+(n+1)*interval) are placed in the nth bin where n ranges from 0..nbin-2. The final bin is [min+(nbin-1)*interval,max]. In the second method, input parameter *options* bin_values specifies a list of binning column values. Binning-columns whose value matches the nth member of the bin_values list are placed in the nth bin. When a list is provided the binning-column must be of type string or int. NOTE: The Kinetica instance being accessed must be running a CUDA (GPU-based) build to service this request. Parameters: select_expression (str) For a non-empty expression statistics are calculated for those records for which the expression is true. The default value is ''. column_name (str) Name of the binning-column used to divide the set samples into bins. value_column_name (str) Name of the value-column for which statistics are to be computed. stats (str) A string of comma separated list of the statistics to calculate, e.g. 'sum,mean'. Available statistics: mean, stdv (standard deviation), variance, skew, kurtosis, sum. start (float) The lower bound of the binning-column. end (float) The upper bound of the binning-column. interval (float) The interval of a bin. Set members fall into bin i if the binning-column falls in the range [start+interval*i, start+interval*(i+1)). options (dict of str to str) Map of optional parameters:. The default value is an empty dict ( {} ). Allowed keys are: * **additional_column_names** -- A list of comma separated value-column names over which statistics can be accumulated along with the primary value_column. * **bin_values** -- A list of comma separated binning-column values. Values that match the nth bin_values value are placed in the nth bin. * **weight_column_name** -- Name of the column used as weighting column for the weighted_average statistic. * **order_column_name** -- Name of the column used for candlestick charting techniques. Returns: The response from the server which is a dict containing the following entries-- stats (dict of str to lists of floats) A map with a key for each statistic in the stats input parameter having a value that is a vector of the corresponding value-column bin statistics. In a addition the key count has a value that is a histogram of the binning-column. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.aggregate_statistics_by_range( self.name, select_expression, column_name, value_column_name, stats, start, end, interval, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end aggregate_statistics_by_range
[docs] def aggregate_unique( self, column_name = None, offset = None, limit = 10000, encoding = 'binary', options = {}, force_primitive_return_types = True, get_column_major = True ): """Returns all the unique values from a particular column (specified by input parameter *column_name*) of a particular table or collection (specified by input parameter *table_name*). If input parameter *column_name* is a numeric column the values will be in output parameter *binary_encoded_response*. Otherwise if input parameter *column_name* is a string column the values will be in output parameter *json_encoded_response*. The results can be paged via the input parameter *offset* and input parameter *limit* parameters. Columns marked as `store-only <../../../concepts/types.html#data-handling>`_ are unable to be used with this function. To get the first 10 unique values sorted in descending order input parameter *options* would be:: {"limit":"10","sort_order":"descending"}. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. If a *result_table* name is specified in the input parameter *options*, the results are stored in a new table with that name--no results are returned in the response. Both the table name and resulting column name must adhere to `standard naming conventions <../../../concepts/tables.html#table>`_; any column expression will need to be aliased. If the source table's `shard key <../../../concepts/tables.html#shard-keys>`_ is used as the input parameter *column_name*, the result table will be sharded, in all other cases it will be replicated. Sorting will properly function only if the result table is replicated or if there is only one processing node and should not be relied upon in other cases. Not available if input parameter *table_name* is a collection or when the value of input parameter *column_name* is an unrestricted-length string. Parameters: column_name (str) Name of the column or an expression containing one or more column names on which the unique function would be applied. offset (long) A positive integer indicating the number of initial results to skip (this can be useful for paging through the results). The minimum allowed value is 0. The maximum allowed value is MAX_INT. limit (long) A positive integer indicating the maximum number of results to be returned. Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is 10000. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. Additionally this option is invalid if input parameter *table_name* is a collection. * **expression** -- Optional filter expression to apply to the table. * **sort_order** -- String indicating how the returned values should be sorted. Allowed values are: * ascending * descending The default value is 'ascending'. * **result_table** -- The name of the table used to store the results. If present, no results are returned in the response. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Not available if input parameter *table_name* is a collection or when input parameter *column_name* is an unrestricted-length string. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **result_table_generate_pk** -- If 'true' then set a primary key for the result table. Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **view_id** -- view this result table is part of. The default value is ''. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A read-only GPUdbTable object if input options has "result_table"; otherwise the response from the server, which is a dict containing the following entries-- table_name (str) The same table name as was passed in the parameter list. response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. data (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. Raises: GPUdbException -- Upon an error from the server. """ if "result_table" in options: result_table = options[ "result_table" ] else: result_table = None response = self.db.aggregate_unique_and_decode( self.name, column_name, offset, limit, encoding, options, force_primitive_return_types= force_primitive_return_types, get_column_major = get_column_major ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) response["data"] = response["records"] if result_table: # Create a read-only table for the result table return self.create_view( result_table ) return response
# end aggregate_unique
[docs] def aggregate_unpivot( self, column_names = None, variable_column_name = '', value_column_name = '', pivoted_columns = None, encoding = 'binary', options = {}, force_primitive_return_types = True, get_column_major = True ): """Rotate the column values into rows values. For unpivot details and examples, see `Unpivot <../../../concepts/unpivot.html>`_. For limitations, see `Unpivot Limitations <../../../concepts/unpivot.html#limitations>`_. Unpivot is used to normalize tables that are built for cross tabular reporting purposes. The unpivot operator rotates the column values for all the pivoted columns. A variable column, value column and all columns from the source table except the unpivot columns are projected into the result table. The variable column and value columns in the result table indicate the pivoted column name and values respectively. The response is returned as a dynamic schema. For details see: `dynamic schemas documentation <../../../api/index.html#dynamic-schemas>`_. Parameters: column_names (list of str) List of column names or expressions. A wildcard '*' can be used to include all the non-pivoted columns from the source table. variable_column_name (str) Specifies the variable/parameter column name. The default value is ''. value_column_name (str) Specifies the value column name. The default value is ''. pivoted_columns (list of str) List of one or more values typically the column names of the input table. All the columns in the source table must have the same data type. encoding (str) Specifies the encoding for returned records. Allowed values are: * **binary** -- Indicates that the returned records should be binary encoded. * **json** -- Indicates that the returned records should be json encoded. The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the table specified in *result_table*. If the collection provided is non-existent, the collection will be automatically created. If empty, then the table will be a top-level table. * **result_table** -- The name of the table used to store the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. If present, no results are returned in the response. * **result_table_persist** -- If *true*, then the result table specified in *result_table* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the result table will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **expression** -- Filter expression to apply to the table prior to unpivot processing. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The columns specified must be present in input table. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **chunk_size** -- Indicates the chunk size to be used for the result table. Must be used in combination with the *result_table* option. * **limit** -- The number of records to keep. The default value is ''. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the table specified in *result_table*. * **view_id** -- view this result table is part of. The default value is ''. * **materialize_on_gpu** -- If *true* then the output columns will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the table specified in *result_table*. The columns specified must be present in output column names. If any alias is given for any column name, the alias must be used, rather than the original column name. * **result_table_force_replicated** -- Force the result table to be replicated (ignores any sharding). Must be used in combination with the *result_table* option. Allowed values are: * true * false The default value is 'false'. force_primitive_return_types (bool) If `True`, then `OrderedDict` objects will be returned, where string sub-type columns will have their values converted back to strings; for example, the Python `datetime` structs, used for datetime type columns would have their values returned as strings. If `False`, then :class:`Record` objects will be returned, which for string sub-types, will return native or custom structs; no conversion to string takes place. String conversions, when returning `OrderedDicts`, incur a speed penalty, and it is strongly recommended to use the :class:`Record` object option instead. If `True`, but none of the returned columns require a conversion, then the original :class:`Record` objects will be returned. Default value is True. get_column_major (bool) Indicates if the decoded records will be transposed to be column-major or returned as is (row-major). Default value is True. Returns: A read-only GPUdbTable object if input options has "result_table"; otherwise the response from the server, which is a dict containing the following entries-- table_name (str) Typically shows the result-table name if provided in the request (Ignore otherwise). response_schema_str (str) Avro schema of output parameter *binary_encoded_response* or output parameter *json_encoded_response*. total_number_of_records (long) Total/Filtered number of records. has_more_records (bool) Too many records. Returned a partial set. records (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. data (list of :class:`Record`) A list of :class:`Record` objects which contain the decoded records. Raises: GPUdbException -- Upon an error from the server. """ if "result_table" in options: result_table = options[ "result_table" ] else: result_table = None response = self.db.aggregate_unpivot_and_decode( self.name, column_names, variable_column_name, value_column_name, pivoted_columns, encoding, options, force_primitive_return_types= force_primitive_return_types, get_column_major = get_column_major ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) response["data"] = response["records"] if result_table: # Create a read-only table for the result table return self.create_view( result_table ) return response
# end aggregate_unpivot
[docs] def alter_table( self, action = None, value = None, options = {} ): """Apply various modifications to a table, view, or collection. The available modifications include the following: Create or delete an `index <../../../concepts/indexes.html#column-index>`_ on a particular column. This can speed up certain operations when using expressions containing equality or relational operators on indexed columns. This only applies to tables. Set the `time-to-live (TTL) <../../../concepts/ttl.html>`_. This can be applied to tables, views, or collections. When applied to collections, every contained table & view that is not protected will have its TTL set to the given value. Set the global access mode (i.e. locking) for a table. This setting trumps any role-based access controls that may be in place; e.g., a user with write access to a table marked read-only will not be able to insert records into it. The mode can be set to read-only, write-only, read/write, and no access. Change the `protection <../../../concepts/protection.html>`_ mode to prevent or allow automatic expiration. This can be applied to tables, views, and collections. Allow homogeneous tables within a collection. Manage a table's columns--a column can be added, removed, or have its `type and properties <../../../concepts/types.html>`_ modified. Set or unset `compression <../../../concepts/compression.html>`_ for a column. Parameters: action (str) Modification operation to be applied Allowed values are: * **allow_homogeneous_tables** -- Sets whether homogeneous tables are allowed in the given collection. This action is only valid if input parameter *table_name* is a collection. The input parameter *value* must be either 'true' or 'false'. * **create_index** -- Creates an `index <../../../concepts/indexes.html#column-index>`_ on the column name specified in input parameter *value*. If this column is already indexed, an error will be returned. * **delete_index** -- Deletes an existing `index <../../../concepts/indexes.html#column-index>`_ on the column name specified in input parameter *value*. If this column does not have indexing turned on, an error will be returned. * **move_to_collection** -- Moves a table into a collection input parameter *value*. * **protected** -- Sets whether the given input parameter *table_name* should be `protected <../../../concepts/protection.html>`_ or not. The input parameter *value* must be either 'true' or 'false'. * **rename_table** -- Renames a table, view or collection to input parameter *value*. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. * **ttl** -- Sets the `time-to-live <../../../concepts/ttl.html>`_ in minutes of the table, view, or collection specified in input parameter *table_name*. * **memory_ttl** -- Sets the time-to-live in minutes for the individual chunks of the columns of the table, view, or collection specified in input parameter *table_name* to free their memory if unused longer than the given time. Specify an empty string to restore the global memory_ttl setting and a value of '-1' for an infinite timeout. * **add_column** -- Adds the column specified in input parameter *value* to the table specified in input parameter *table_name*. Use *column_type* and *column_properties* in input parameter *options* to set the column's type and properties, respectively. * **change_column** -- Changes type and properties of the column specified in input parameter *value*. Use *column_type* and *column_properties* in input parameter *options* to set the column's type and properties, respectively. Note that primary key and/or shard key columns cannot be changed. All unchanging column properties must be listed for the change to take place, e.g., to add dictionary encoding to an existing 'char4' column, both 'char4' and 'dict' must be specified in the input parameter *options* map. * **set_column_compression** -- Modifies the `compression <../../../concepts/compression.html>`_ setting on the column specified in input parameter *value*. * **delete_column** -- Deletes the column specified in input parameter *value* from the table specified in input parameter *table_name*. * **create_foreign_key** -- Creates a `foreign key <../../../concepts/tables.html#foreign-key>`_ using the format '(source_column_name [, ...]) references target_table_name(primary_key_column_name [, ...]) [as foreign_key_name]'. * **delete_foreign_key** -- Deletes a `foreign key <../../../concepts/tables.html#foreign-key>`_. The input parameter *value* should be the foreign_key_name specified when creating the key or the complete string used to define it. * **set_global_access_mode** -- Sets the global access mode (i.e. locking) for the table specified in input parameter *table_name*. Specify the access mode in input parameter *value*. Valid modes are 'no_access', 'read_only', 'write_only' and 'read_write'. * **refresh** -- Replays all the table creation commands required to create this `materialized view <../../../concepts/materialized_views.html>`_. * **set_refresh_method** -- Sets the method by which this `materialized view <../../../concepts/materialized_views.html>`_ is refreshed - one of 'manual', 'periodic', 'on_change'. * **set_refresh_start_time** -- Sets the time to start periodic refreshes of this `materialized view <../../../concepts/materialized_views.html>`_ to datetime string with format 'YYYY-MM-DD HH:MM:SS'. Subsequent refreshes occur at the specified time + N * the refresh period. * **set_refresh_period** -- Sets the time interval in seconds at which to refresh this `materialized view <../../../concepts/materialized_views.html>`_. Also, sets the refresh method to periodic if not alreay set. * **remove_text_search_attributes** -- remove text_search attribute from all columns, if exists. value (str) The value of the modification. May be a column name, 'true' or 'false', a TTL, or the global access mode depending on input parameter *action*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **column_default_value** -- When adding a column, set a default value for existing records. For nullable columns, the default value will be null, regardless of data type. * **column_properties** -- When adding or changing a column, set the column properties (strings, separated by a comma: data, store_only, text_search, char8, int8 etc). * **column_type** -- When adding or changing a column, set the column type (strings, separated by a comma: int, double, string, null etc). * **compression_type** -- When setting column compression (*set_column_compression* for input parameter *action*), compression type to use: *none* (to use no compression) or a valid compression type. Allowed values are: * none * snappy * lz4 * lz4hc The default value is 'snappy'. * **copy_values_from_column** -- please see add_column_expression instead. * **rename_column** -- When changing a column, specify new column name. * **validate_change_column** -- When changing a column, validate the change before applying it. If *true*, then validate all values. A value too large (or too long) for the new type will prevent any change. If *false*, then when a value is too large or long, it will be truncated. Allowed values are: * **true** -- true * **false** -- false The default value is 'true'. * **update_last_access_time** -- Indicates whether need to update the last_access_time. Allowed values are: * true * false The default value is 'true'. * **add_column_expression** -- expression for new column's values (optional with add_column). Any valid expressions including existing columns. Returns: The response from the server which is a dict containing the following entries-- table_name (str) Table on which the operation was performed. action (str) Modification operation that was performed. value (str) The value of the modification that was performed. type_id (str) return the type_id (when changing a table, a new type may be created) type_definition (str) return the type_definition (when changing a table, a new type may be created) properties (dict of str to lists of str) return the type properties (when changing a table, a new type may be created) label (str) return the type label (when changing a table, a new type may be created) Raises: GPUdbException -- Upon an error from the server. """ response = self.db.alter_table( self.name, action, value, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) self.__save_table_type( response.type_definition, response.properties ) if (action == "rename_table" ): self.name = value return response
# end alter_table
[docs] def append_records( self, source_table_name = None, field_map = None, options = {} ): """Append (or insert) all records from a source table (specified by input parameter *source_table_name*) to a particular target table (specified by input parameter *table_name*). The field map (specified by input parameter *field_map*) holds the user specified map of target table column names with their mapped source column names. Parameters: source_table_name (str) The source table name to get records from. Must be an existing table name. field_map (dict of str to str) Contains the mapping of column names from the target table (specified by input parameter *table_name*) as the keys, and corresponding column names or expressions (e.g., 'col_name+1') from the source table (specified by input parameter *source_table_name*). Must be existing column names in source table and target table, and their types must be matched. For details on using expressions, see `Expressions <../../../concepts/expressions.html>`_. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **offset** -- A positive integer indicating the number of initial results to skip from source table (specified by input parameter *source_table_name*). Default is 0. The minimum allowed value is 0. The maximum allowed value is MAX_INT. The default value is '0'. * **limit** -- A positive integer indicating the maximum number of results to be returned from source table (specified by input parameter *source_table_name*). Or END_OF_SET (-9999) to indicate that the max number of results should be returned. The default value is '-9999'. * **expression** -- Optional filter expression to apply to the source table (specified by input parameter *source_table_name*). Empty by default. The default value is ''. * **order_by** -- Comma-separated list of the columns and expressions to be sorted by from the source table (specified by input parameter *source_table_name*); e.g. 'timestamp asc, x desc'. The *order_by* columns do not have to be present in input parameter *field_map*. The default value is ''. * **update_on_existing_pk** -- Specifies the record collision policy for inserting the source table records (specified by input parameter *source_table_name*) into the target table (specified by input parameter *table_name*) table with a `primary key <../../../concepts/tables.html#primary-keys>`_. If set to *true*, any existing target table record with primary key values that match those of a source table record being inserted will be replaced by that new record. If set to *false*, any existing target table record with primary key values that match those of a source table record being inserted will remain unchanged and the new record discarded. If the specified table does not have a primary key, then this option is ignored. Allowed values are: * true * false The default value is 'false'. * **truncate_strings** -- If set to *true*, it allows inserting unrestricted length strings into charN string columns by truncating the unrestricted length strings to fit. Allowed values are: * true * false The default value is 'false'. Returns: The response from the server which is a dict containing the following entries-- table_name (str) Raises: GPUdbException -- Upon an error from the server. """ response = self.db.append_records( self.name, source_table_name, field_map, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end append_records
[docs] def clear( self, authorization = '', options = {} ): """Clears (drops) one or all tables in the database cluster. The operation is synchronous meaning that the table will be cleared before the function returns. The response payload returns the status of the operation along with the name of the table that was cleared. Parameters: authorization (str) No longer used. User can pass an empty string. The default value is ''. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **no_error_if_not_exists** -- If *true* and if the table specified in input parameter *table_name* does not exist no error is returned. If *false* and if the table specified in input parameter *table_name* does not exist then an error is returned. Allowed values are: * true * false The default value is 'false'. Returns: The response from the server which is a dict containing the following entries-- table_name (str) Value of input parameter *table_name* for a given table, or 'ALL CLEARED' in case of clearing all tables. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.clear_table( self.name, authorization, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end clear
[docs] def create_projection( self, column_names = None, options = {}, projection_name = None ): """Creates a new `projection <../../../concepts/projections.html>`_ of an existing table. A projection represents a subset of the columns (potentially including derived columns) of a table. For projection details and examples, see `Projections <../../../concepts/projections.html>`_. For limitations, see `Projection Limitations and Cautions <../../../concepts/projections.html#limitations-and-cautions>`_. `Window functions <../../../concepts/window.html>`_, which can perform operations like moving averages, are available through this endpoint as well as :meth:`.get_records_by_column`. A projection can be created with a different `shard key <../../../concepts/tables.html#shard-keys>`_ than the source table. By specifying *shard_key*, the projection will be sharded according to the specified columns, regardless of how the source table is sharded. The source table can even be unsharded or replicated. Parameters: column_names (list of str) List of columns from input parameter *table_name* to be included in the projection. Can include derived columns. Can be specified as aliased via the syntax 'column_name as alias'. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a `collection <../../../concepts/collections.html>`_ to which the projection is to be assigned as a child. If the collection provided is non-existent, the collection will be automatically created. If empty, then the projection will be at the top level. The default value is ''. * **expression** -- An optional filter `expression <../../../concepts/expressions.html>`_ to be applied to the source table prior to the projection. The default value is ''. * **is_replicated** -- If *true* then the projection will be replicated even if the source table is not. Allowed values are: * true * false The default value is 'false'. * **limit** -- The number of records to keep. The default value is ''. * **order_by** -- Comma-separated list of the columns to be sorted by; e.g. 'timestamp asc, x desc'. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **materialize_on_gpu** -- If *true* then the columns of the projection will be cached on the GPU. Allowed values are: * true * false The default value is 'false'. * **chunk_size** -- Indicates the chunk size to be used for this table. * **create_indexes** -- Comma-separated list of columns on which to create indexes on the output table. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the projection specified in input parameter *projection_name*. * **shard_key** -- Comma-separated list of the columns to be sharded on; e.g. 'column1, column2'. The columns specified must be present in input parameter *column_names*. If any alias is given for any column name, the alias must be used, rather than the original column name. The default value is ''. * **persist** -- If *true*, then the projection specified in input parameter *projection_name* will be persisted and will not expire unless a *ttl* is specified. If *false*, then the projection will be an in-memory table and will expire unless a *ttl* is specified otherwise. Allowed values are: * true * false The default value is 'false'. * **preserve_dict_encoding** -- If *true*, then columns that were dict encoded in the source table will be dict encoded in the projection table. Allowed values are: * true * false The default value is 'false'. * **view_id** -- view this projection is part of. The default value is ''. projection_name (str) Name of the projection to be created. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ projection_name = self.__process_view_name( projection_name ) response = self.db.create_projection( self.name, projection_name, column_names, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( projection_name )
# end create_projection
[docs] def create_table_monitor( self, options = {} ): """Creates a monitor that watches for new records inserted into a particular table (identified by input parameter *table_name*) and forwards copies to subscribers via ZMQ. After this call completes, subscribe to the returned output parameter *topic_id* on the ZMQ table monitor port (default 9002). Each time an insert operation on the table completes, a multipart message is published for that topic; the first part contains only the topic ID, and each subsequent part contains one binary-encoded Avro object that was inserted. The monitor will continue to run (regardless of whether or not there are any subscribers) until deactivated with :meth:`.clear_table_monitor`. Parameters: options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- topic_id (str) The ZMQ topic ID to subscribe to for inserted records. table_name (str) Value of input parameter *table_name*. type_schema (str) JSON Avro schema of the table, for use in decoding published records. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.create_table_monitor( self.name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end create_table_monitor
[docs] def delete_records( self, expressions = None, options = {} ): """Deletes record(s) matching the provided criteria from the given table. The record selection criteria can either be one or more input parameter *expressions* (matching multiple records), a single record identified by *record_id* options, or all records when using *delete_all_records*. Note that the three selection criteria are mutually exclusive. This operation cannot be run on a collection or a view. The operation is synchronous meaning that a response will not be available until the request is completely processed and all the matching records are deleted. Parameters: expressions (list of str) A list of the actual predicates, one for each select; format should follow the guidelines provided `here <../../../concepts/expressions.html>`_. Specifying one or more input parameter *expressions* is mutually exclusive to specifying *record_id* in the input parameter *options*. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **global_expression** -- An optional global expression to reduce the search space of the input parameter *expressions*. The default value is ''. * **record_id** -- A record ID identifying a single record, obtained at the time of :meth:`insertion of the record <.insert_records>` or by calling :meth:`.get_records_from_collection` with the *return_record_ids* option. This option cannot be used to delete records from `replicated <../../../concepts/tables.html#replication>`_ tables. * **delete_all_records** -- If set to *true*, all records in the table will be deleted. If set to *false*, then the option is effectively ignored. Allowed values are: * true * false The default value is 'false'. Returns: The response from the server which is a dict containing the following entries-- count_deleted (long) Total number of records deleted across all expressions. counts_deleted (list of longs) Total number of records deleted per expression. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.delete_records( self.name, expressions, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end delete_records
[docs] def filter( self, expression = None, options = {}, view_name = '' ): """Filters data based on the specified expression. The results are stored in a `result set <../../../concepts/filtered_views.html>`_ with the given input parameter *view_name*. For details see `Expressions <../../../concepts/expressions.html>`_. The response message contains the number of points for which the expression evaluated to be true, which is equivalent to the size of the result view. Parameters: expression (str) The select expression to filter the specified table. For details see `Expressions <../../../concepts/expressions.html>`_. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **view_id** -- view this filtered-view is part of. The default value is ''. * **ttl** -- Sets the `TTL <../../../concepts/ttl.html>`_ of the view specified in input parameter *view_name*. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter( self.name, view_name, expression, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter
[docs] def filter_by_area( self, x_column_name = None, x_vector = None, y_column_name = None, y_vector = None, options = {}, view_name = '' ): """Calculates which objects from a table are within a named area of interest (NAI/polygon). The operation is synchronous, meaning that a response will not be returned until all the matching objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input NAI restriction specification is created with the name input parameter *view_name* passed in as part of the input. Parameters: x_column_name (str) Name of the column containing the x values to be filtered. x_vector (list of floats) List of x coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. y_column_name (str) Name of the column containing the y values to be filtered. y_vector (list of floats) List of y coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_area( self.name, view_name, x_column_name, x_vector, y_column_name, y_vector, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_area
[docs] def filter_by_area_geometry( self, column_name = None, x_vector = None, y_vector = None, options = {}, view_name = '' ): """Calculates which geospatial geometry objects from a table intersect a named area of interest (NAI/polygon). The operation is synchronous, meaning that a response will not be returned until all the matching objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input NAI restriction specification is created with the name input parameter *view_name* passed in as part of the input. Parameters: column_name (str) Name of the geospatial geometry column to be filtered. x_vector (list of floats) List of x coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. y_vector (list of floats) List of y coordinates of the vertices of the polygon representing the area to be filtered. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Must not be an already existing collection, table or view. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_area_geometry( self.name, view_name, column_name, x_vector, y_vector, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_area_geometry
[docs] def filter_by_box( self, x_column_name = None, min_x = None, max_x = None, y_column_name = None, min_y = None, max_y = None, options = {}, view_name = '' ): """Calculates how many objects within the given table lie in a rectangular box. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set which satisfies the input NAI restriction specification is also created when a input parameter *view_name* is passed in as part of the input payload. Parameters: x_column_name (str) Name of the column on which to perform the bounding box query. Must be a valid numeric column. min_x (float) Lower bound for the column chosen by input parameter *x_column_name*. Must be less than or equal to input parameter *max_x*. max_x (float) Upper bound for input parameter *x_column_name*. Must be greater than or equal to input parameter *min_x*. y_column_name (str) Name of a column on which to perform the bounding box query. Must be a valid numeric column. min_y (float) Lower bound for input parameter *y_column_name*. Must be less than or equal to input parameter *max_y*. max_y (float) Upper bound for input parameter *y_column_name*. Must be greater than or equal to input parameter *min_y*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) Optional name of the result view that will be created containing the results of the query. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_box( self.name, view_name, x_column_name, min_x, max_x, y_column_name, min_y, max_y, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_box
[docs] def filter_by_box_geometry( self, column_name = None, min_x = None, max_x = None, min_y = None, max_y = None, options = {}, view_name = '' ): """Calculates which geospatial geometry objects from a table intersect a rectangular box. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set which satisfies the input NAI restriction specification is also created when a input parameter *view_name* is passed in as part of the input payload. Parameters: column_name (str) Name of the geospatial geometry column to be filtered. min_x (float) Lower bound for the x-coordinate of the rectangular box. Must be less than or equal to input parameter *max_x*. max_x (float) Upper bound for the x-coordinate of the rectangular box. Must be greater than or equal to input parameter *min_x*. min_y (float) Lower bound for the y-coordinate of the rectangular box. Must be less than or equal to input parameter *max_y*. max_y (float) Upper bound for the y-coordinate of the rectangular box. Must be greater than or equal to input parameter *min_y*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) Optional name of the result view that will be created containing the results of the query. Must not be an already existing collection, table or view. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_box_geometry( self.name, view_name, column_name, min_x, max_x, min_y, max_y, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_box_geometry
[docs] def filter_by_geometry( self, column_name = None, input_wkt = '', operation = None, options = {}, view_name = '' ): """Applies a geometry filter against a geospatial geometry column in a given table, collection or view. The filtering geometry is provided by input parameter *input_wkt*. Parameters: column_name (str) Name of the column to be used in the filter. Must be a geospatial geometry column. input_wkt (str) A geometry in WKT format that will be used to filter the objects in input parameter *table_name*. The default value is ''. operation (str) The geometric filtering operation to perform Allowed values are: * **contains** -- Matches records that contain the given WKT in input parameter *input_wkt*, i.e. the given WKT is within the bounds of a record's geometry. * **crosses** -- Matches records that cross the given WKT. * **disjoint** -- Matches records that are disjoint from the given WKT. * **equals** -- Matches records that are the same as the given WKT. * **intersects** -- Matches records that intersect the given WKT. * **overlaps** -- Matches records that overlap the given WKT. * **touches** -- Matches records that touch the given WKT. * **within** -- Matches records that are within the given WKT. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_geometry( self.name, view_name, column_name, input_wkt, operation, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_geometry
[docs] def filter_by_list( self, column_values_map = None, options = {}, view_name = '' ): """Calculates which records from a table have values in the given list for the corresponding column. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input filter specification is also created if a input parameter *view_name* is passed in as part of the request. For example, if a type definition has the columns 'x' and 'y', then a filter by list query with the column map {"x":["10.1", "2.3"], "y":["0.0", "-31.5", "42.0"]} will return the count of all data points whose x and y values match both in the respective x- and y-lists, e.g., "x = 10.1 and y = 0.0", "x = 2.3 and y = -31.5", etc. However, a record with "x = 10.1 and y = -31.5" or "x = 2.3 and y = 0.0" would not be returned because the values in the given lists do not correspond. Parameters: column_values_map (dict of str to lists of str) List of values for the corresponding column in the table options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **filter_mode** -- String indicating the filter mode, either 'in_list' or 'not_in_list'. Allowed values are: * **in_list** -- The filter will match all items that are in the provided list(s). * **not_in_list** -- The filter will match all items that are not in the provided list(s). The default value is 'in_list'. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_list( self.name, view_name, column_values_map, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_list
[docs] def filter_by_radius( self, x_column_name = None, x_center = None, y_column_name = None, y_center = None, radius = None, options = {}, view_name = '' ): """Calculates which objects from a table lie within a circle with the given radius and center point (i.e. circular NAI). The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input circular NAI restriction specification is also created if a input parameter *view_name* is passed in as part of the request. For track data, all track points that lie within the circle plus one point on either side of the circle (if the track goes beyond the circle) will be included in the result. Parameters: x_column_name (str) Name of the column to be used for the x-coordinate (the longitude) of the center. x_center (float) Value of the longitude of the center. Must be within [-180.0, 180.0]. The minimum allowed value is -180. The maximum allowed value is 180. y_column_name (str) Name of the column to be used for the y-coordinate-the latitude-of the center. y_center (float) Value of the latitude of the center. Must be within [-90.0, 90.0]. The minimum allowed value is -90. The maximum allowed value is 90. radius (float) The radius of the circle within which the search will be performed. Must be a non-zero positive value. It is in meters; so, for example, a value of '42000' means 42 km. The minimum allowed value is 0. The maximum allowed value is MAX_INT. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_radius( self.name, view_name, x_column_name, x_center, y_column_name, y_center, radius, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_radius
[docs] def filter_by_radius_geometry( self, column_name = None, x_center = None, y_center = None, radius = None, options = {}, view_name = '' ): """Calculates which geospatial geometry objects from a table intersect a circle with the given radius and center point (i.e. circular NAI). The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new resultant set (view) which satisfies the input circular NAI restriction specification is also created if a input parameter *view_name* is passed in as part of the request. Parameters: column_name (str) Name of the geospatial geometry column to be filtered. x_center (float) Value of the longitude of the center. Must be within [-180.0, 180.0]. The minimum allowed value is -180. The maximum allowed value is 180. y_center (float) Value of the latitude of the center. Must be within [-90.0, 90.0]. The minimum allowed value is -90. The maximum allowed value is 90. radius (float) The radius of the circle within which the search will be performed. Must be a non-zero positive value. It is in meters; so, for example, a value of '42000' means 42 km. The minimum allowed value is 0. The maximum allowed value is MAX_INT. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Must not be an already existing collection, table or view. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_radius_geometry( self.name, view_name, column_name, x_center, y_center, radius, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_radius_geometry
[docs] def filter_by_range( self, column_name = None, lower_bound = None, upper_bound = None, options = {}, view_name = '' ): """Calculates which objects from a table have a column that is within the given bounds. An object from the table identified by input parameter *table_name* is added to the view input parameter *view_name* if its column is within [input parameter *lower_bound*, input parameter *upper_bound*] (inclusive). The operation is synchronous. The response provides a count of the number of objects which passed the bound filter. Although this functionality can also be accomplished with the standard filter function, it is more efficient. For track objects, the count reflects how many points fall within the given bounds (which may not include all the track points of any given track). Parameters: column_name (str) Name of a column on which the operation would be applied. lower_bound (float) Value of the lower bound (inclusive). upper_bound (float) Value of the upper bound (inclusive). options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_range( self.name, view_name, column_name, lower_bound, upper_bound, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_range
[docs] def filter_by_series( self, track_id = None, target_track_ids = None, options = {}, view_name = '' ): """Filters objects matching all points of the given track (works only on track type data). It allows users to specify a particular track to find all other points in the table that fall within specified ranges-spatial and temporal-of all points of the given track. Additionally, the user can specify another track to see if the two intersect (or go close to each other within the specified ranges). The user also has the flexibility of using different metrics for the spatial distance calculation: Euclidean (flat geometry) or Great Circle (spherical geometry to approximate the Earth's surface distances). The filtered points are stored in a newly created result set. The return value of the function is the number of points in the resultant set (view). This operation is synchronous, meaning that a response will not be returned until all the objects are fully available. Parameters: track_id (str) The ID of the track which will act as the filtering points. Must be an existing track within the given table. target_track_ids (list of str) Up to one track ID to intersect with the "filter" track. If any provided, it must be an valid track ID within the given set. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **spatial_radius** -- A positive number passed as a string representing the radius of the search area centered around each track point's geospatial coordinates. The value is interpreted in meters. Required parameter. * **time_radius** -- A positive number passed as a string representing the maximum allowable time difference between the timestamps of a filtered object and the given track's points. The value is interpreted in seconds. Required parameter. * **spatial_distance_metric** -- A string representing the coordinate system to use for the spatial search criteria. Acceptable values are 'euclidean' and 'great_circle'. Optional parameter; default is 'euclidean'. Allowed values are: * euclidean * great_circle view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_series( self.name, view_name, track_id, target_track_ids, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_series
[docs] def filter_by_string( self, expression = None, mode = None, column_names = None, options = {}, view_name = '' ): """Calculates which objects from a table, collection, or view match a string expression for the given string columns. The options 'case_sensitive' can be used to modify the behavior for all modes except 'search'. For 'search' mode details and limitations, see `Full Text Search <../../../concepts/full_text_search.html>`_. Parameters: expression (str) The expression with which to filter the table. mode (str) The string filtering mode to apply. See below for details. Allowed values are: * **search** -- Full text search query with wildcards and boolean operators. Note that for this mode, no column can be specified in input parameter *column_names*; all string columns of the table that have text search enabled will be searched. * **equals** -- Exact whole-string match (accelerated). * **contains** -- Partial substring match (not accelerated). If the column is a string type (non-charN) and the number of records is too large, it will return 0. * **starts_with** -- Strings that start with the given expression (not accelerated). If the column is a string type (non-charN) and the number of records is too large, it will return 0. * **regex** -- Full regular expression search (not accelerated). If the column is a string type (non-charN) and the number of records is too large, it will return 0. column_names (list of str) List of columns on which to apply the filter. Ignored for 'search' mode. The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **case_sensitive** -- If 'false' then string filtering will ignore case. Does not apply to 'search' mode. Allowed values are: * true * false The default value is 'true'. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_string( self.name, view_name, expression, mode, column_names, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_string
[docs] def filter_by_table( self, column_name = None, source_table_name = None, source_table_column_name = None, options = {}, view_name = '' ): """Filters objects in one table based on objects in another table. The user must specify matching column types from the two tables (i.e. the target table from which objects will be filtered and the source table based on which the filter will be created); the column names need not be the same. If a input parameter *view_name* is specified, then the filtered objects will then be put in a newly created view. The operation is synchronous, meaning that a response will not be returned until all objects are fully available in the result view. The return value contains the count (i.e. the size) of the resulting view. Parameters: column_name (str) Name of the column by whose value the data will be filtered from the table designated by input parameter *table_name*. source_table_name (str) Name of the table whose data will be compared against in the table called input parameter *table_name*. Must be an existing table. source_table_column_name (str) Name of the column in the input parameter *source_table_name* whose values will be used as the filter for table input parameter *table_name*. Must be a geospatial geometry column if in 'spatial' mode; otherwise, Must match the type of the input parameter *column_name*. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. * **filter_mode** -- String indicating the filter mode, either *in_table* or *not_in_table*. Allowed values are: * in_table * not_in_table The default value is 'in_table'. * **mode** -- Mode - should be either *spatial* or *normal*. Allowed values are: * normal * spatial The default value is 'normal'. * **buffer** -- Buffer size, in meters. Only relevant for *spatial* mode. The default value is '0'. * **buffer_method** -- Method used to buffer polygons. Only relevant for *spatial* mode. Allowed values are: * **geos** -- Use geos 1 edge per corner algorithm The default value is 'normal'. * **max_partition_size** -- Maximum number of points in a partition. Only relevant for *spatial* mode. The default value is '0'. * **max_partition_score** -- Maximum number of points * edges in a partition. Only relevant for *spatial* mode. The default value is '8000000'. * **x_column_name** -- Name of column containing x value of point being filtered in *spatial* mode. The default value is 'x'. * **y_column_name** -- Name of column containing y value of point being filtered in *spatial* mode. The default value is 'y'. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_table( self.name, view_name, column_name, source_table_name, source_table_column_name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_table
[docs] def filter_by_value( self, is_string = None, value = 0, value_str = '', column_name = None, options = {}, view_name = '' ): """Calculates which objects from a table has a particular value for a particular column. The input parameters provide a way to specify either a String or a Double valued column and a desired value for the column on which the filter is performed. The operation is synchronous, meaning that a response will not be returned until all the objects are fully available. The response payload provides the count of the resulting set. A new result view which satisfies the input filter restriction specification is also created with a view name passed in as part of the input payload. Although this functionality can also be accomplished with the standard filter function, it is more efficient. Parameters: is_string (bool) Indicates whether the value being searched for is string or numeric. value (float) The value to search for. The default value is 0. value_str (str) The string value to search for. The default value is ''. column_name (str) Name of a column on which the filter by value would be applied. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **collection_name** -- Name of a collection which is to contain the newly created view. If the collection provided is non-existent, the collection will be automatically created. If empty, then the newly created view will be top-level. view_name (str) If provided, then this will be the name of the view containing the results. Has the same naming restrictions as `tables <../../../concepts/tables.html>`_. The default value is ''. Returns: A read-only GPUdbTable object. Raises: GPUdbException -- Upon an error from the server. """ view_name = self.__process_view_name( view_name ) response = self.db.filter_by_value( self.name, view_name, is_string, value, value_str, column_name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return self.create_view( view_name, response[ "count" ] )
# end filter_by_value
[docs] def lock_table( self, lock_type = 'status', options = {} ): """Manages global access to a table's data. By default a table has a input parameter *lock_type* of *read_write*, indicating all operations are permitted. A user may request a *read_only* or a *write_only* lock, after which only read or write operations, respectively, are permitted on the table until the lock is removed. When input parameter *lock_type* is *no_access* then no operations are permitted on the table. The lock status can be queried by setting input parameter *lock_type* to *status*. Parameters: lock_type (str) The type of lock being applied to the table. Setting it to *status* will return the current lock status of the table without changing it. Allowed values are: * **status** -- Show locked status * **no_access** -- Allow no read/write operations * **read_only** -- Allow only read operations * **write_only** -- Allow only write operations * **read_write** -- Allow all read/write operations The default value is 'status'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- lock_type (str) Returns the lock state of the table. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.lock_table( self.name, lock_type, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end lock_table
[docs] def revoke_permission_table( self, permission = None, table_name = None, options = {} ): """Revokes a table-level permission from a user or role. Parameters: permission (str) Permission to revoke from the user or role. Allowed values are: * **table_admin** -- Full read/write and administrative access to the table. * **table_insert** -- Insert access to the table. * **table_update** -- Update access to the table. * **table_delete** -- Delete access to the table. * **table_read** -- Read access to the table. table_name (str) Name of the table to which the permission grants access. Must be an existing table, collection, or view. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- name (str) Value of input parameter *name*. permission (str) Value of input parameter *permission*. table_name (str) Value of input parameter *table_name*. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.revoke_permission_table( self.name, permission, table_name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end revoke_permission_table
[docs] def show_table( self, options = {} ): """Retrieves detailed information about tables, views, and collections. If input parameter *table_name* specifies a table or view, information specific to that entity will be returned. If input parameter *table_name* specifies a collection, the call can return information about either the collection itself (setting the *show_children* option to *false*) or the tables and views it contains (setting *show_children* to *true*). If input parameter *table_name* is empty, information about all collections and top-level tables and views can be returned. Note: *show_children* must be set to *true*. If input parameter *table_name* is '*', information about all tables, collections, and views will be returned. Note: *show_children* must be set to *true*. If the option *get_sizes* is set to *true*, then the sizes (objects and elements) of each table are returned (in output parameter *sizes* and output parameter *full_sizes*), along with the total number of objects in the requested table (in output parameter *total_size* and output parameter *total_full_size*). Parameters: options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **force_synchronous** -- If *true* then the table sizes will wait for read lock before returning. Allowed values are: * true * false The default value is 'true'. * **get_sizes** -- If *true* then the table sizes will be returned; blank, otherwise. Allowed values are: * true * false The default value is 'false'. * **show_children** -- If input parameter *table_name* is a collection, then *true* will return information about the children of the collection, while *false* will return information about the collection itself. If input parameter *table_name* is empty or '*', then *show_children* must be *true* (or not specified); otherwise, no results will be returned. Allowed values are: * true * false The default value is 'true'. * **no_error_if_not_exists** -- If *false* will return an error if the provided input parameter *table_name* does not exist. If *true* then it will return an empty result. Allowed values are: * true * false The default value is 'false'. * **get_column_info** -- If *true* then column info (memory usage, etc) will be returned. Allowed values are: * true * false The default value is 'false'. Returns: The response from the server which is a dict containing the following entries-- table_name (str) Value of input parameter *table_name*. table_names (list of str) If input parameter *table_name* is a table or view, then the single element of the array is input parameter *table_name*. If input parameter *table_name* is a collection and *show_children* is set to *true*, then this array is populated with the names of all tables and views contained by the given collection; if *show_children* is *false* then this array will only include the collection name itself. If input parameter *table_name* is an empty string, then the array contains the names of all collections and top-level tables. table_descriptions (list of lists of str) List of descriptions for the respective tables in output parameter *table_names*. Allowed values are: * COLLECTION * VIEW * REPLICATED * JOIN * RESULT_TABLE type_ids (list of str) Type ids of the respective tables in output parameter *table_names*. type_schemas (list of str) Type schemas of the respective tables in output parameter *table_names*. type_labels (list of str) Type labels of the respective tables in output parameter *table_names*. properties (list of dicts of str to lists of str) Property maps of the respective tables in output parameter *table_names*. additional_info (list of dicts of str to str) Additional information about the respective tables in output parameter *table_names*. Allowed values are: * @INNER_STRUCTURE sizes (list of longs) Empty array if the *get_sizes* option is *false*. Otherwise, sizes of the respective tables represented in output parameter *table_names*. For all but track data types, this is simply the number of total objects in a table. For track types, since each track semantically contains many individual objects, the output parameter *sizes* are the counts of conceptual tracks (each of which may be associated with multiple objects). full_sizes (list of longs) Empty array if the *get_sizes* option is *false*. Otherwise, number of total objects in the respective tables represented in output parameter *table_names*. For all but track data types, this is the same as output parameter *sizes*. For track types, since each track semantically contains many individual objects, output parameter *full_sizes* is the count of total objects. join_sizes (list of floats) Empty array if the *get_sizes* option is *false*. Otherwise, number of unfiltered objects in the cross product of the sub-tables in the joined-tables represented in output parameter *table_names*. For simple tables, this number will be the same as output parameter *sizes*. For join-tables this value gives the number of joined-table rows that must be processed by any aggregate functions operating on the table. total_size (long) -1 if the *get_sizes* option is *false*. Otherwise, the sum of the elements of output parameter *sizes*. total_full_size (long) -1 if the *get_sizes* option is *false*. The sum of the elements of output parameter *full_sizes*. Raises: GPUdbException -- Upon an error from the server. """ response = self.db.show_table( self.name, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end show_table
[docs] def update_records( self, expressions = None, new_values_maps = None, records_to_insert = [], records_to_insert_str = [], record_encoding = 'binary', options = {} ): """Runs multiple predicate-based updates in a single call. With the list of given expressions, any matching record's column values will be updated as provided in input parameter *new_values_maps*. There is also an optional 'upsert' capability where if a particular predicate doesn't match any existing record, then a new record can be inserted. Note that this operation can only be run on an original table and not on a collection or a result view. This operation can update primary key values. By default only 'pure primary key' predicates are allowed when updating primary key values. If the primary key for a table is the column 'attr1', then the operation will only accept predicates of the form: "attr1 == 'foo'" if the attr1 column is being updated. For a composite primary key (e.g. columns 'attr1' and 'attr2') then this operation will only accept predicates of the form: "(attr1 == 'foo') and (attr2 == 'bar')". Meaning, all primary key columns must appear in an equality predicate in the expressions. Furthermore each 'pure primary key' predicate must be unique within a given request. These restrictions can be removed by utilizing some available options through input parameter *options*. Parameters: expressions (list of str) A list of the actual predicates, one for each update; format should follow the guidelines :meth:`here <.filter>`. new_values_maps (list of dicts of str to str and/or None) List of new values for the matching records. Each element is a map with (key, value) pairs where the keys are the names of the columns whose values are to be updated; the values are the new values. The number of elements in the list should match the length of input parameter *expressions*. records_to_insert (list of str) An *optional* list of new binary-avro encoded records to insert, one for each update. If one of input parameter *expressions* does not yield a matching record to be updated, then the corresponding element from this list will be added to the table. The default value is an empty list ( [] ). records_to_insert_str (list of str) An optional list of new json-avro encoded objects to insert, one for each update, to be added to the set if the particular update did not affect any objects. The default value is an empty list ( [] ). record_encoding (str) Identifies which of input parameter *records_to_insert* and input parameter *records_to_insert_str* should be used. Allowed values are: * binary * json The default value is 'binary'. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Allowed keys are: * **global_expression** -- An optional global expression to reduce the search space of the predicates listed in input parameter *expressions*. The default value is ''. * **bypass_safety_checks** -- When set to *true*, all predicates are available for primary key updates. Keep in mind that it is possible to destroy data in this case, since a single predicate may match multiple objects (potentially all of records of a table), and then updating all of those records to have the same primary key will, due to the primary key uniqueness constraints, effectively delete all but one of those updated records. Allowed values are: * true * false The default value is 'false'. * **update_on_existing_pk** -- Can be used to customize behavior when the updated primary key value already exists as described in :meth:`.insert_records`. Allowed values are: * true * false The default value is 'false'. * **use_expressions_in_new_values_maps** -- When set to *true*, all new values in input parameter *new_values_maps* are considered as expression values. When set to *false*, all new values in input parameter *new_values_maps* are considered as constants. NOTE: When *true*, string constants will need to be quoted to avoid being evaluated as expressions. Allowed values are: * true * false The default value is 'false'. * **record_id** -- ID of a single record to be updated (returned in the call to :meth:`.insert_records` or :meth:`.get_records_from_collection`). Returns: The response from the server which is a dict containing the following entries-- count_updated (long) Total number of records updated. counts_updated (list of longs) Total number of records updated per predicate in input parameter *expressions*. count_inserted (long) Total number of records inserted (due to expressions not matching any existing records). counts_inserted (list of longs) Total number of records inserted per predicate in input parameter *expressions* (will be either 0 or 1 for each expression). Raises: GPUdbException -- Upon an error from the server. """ response = self.db.update_records( self.name, expressions, new_values_maps, records_to_insert, records_to_insert_str, record_encoding, options, record_type = self.record_type ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end update_records
[docs] def update_records_by_series( self, world_table_name = None, view_name = '', reserved = [], options = {} ): """Updates the view specified by input parameter *table_name* to include full series (track) information from the input parameter *world_table_name* for the series (tracks) present in the input parameter *view_name*. Parameters: world_table_name (str) Name of the table containing the complete series (track) information. view_name (str) Optional name of the view containing the series (tracks) which have to be updated. The default value is ''. reserved (list of str) The default value is an empty list ( [] ). The user can provide a single element (which will be automatically promoted to a list internally) or a list. options (dict of str to str) Optional parameters. The default value is an empty dict ( {} ). Returns: The response from the server which is a dict containing the following entries-- count (int) Raises: GPUdbException -- Upon an error from the server. """ response = self.db.update_records_by_series( self.name, world_table_name, view_name, reserved, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end update_records_by_series def visualize_image_labels( self, x_column_name = None, y_column_name = None, x_offset = '', y_offset = '', text_string = None, font = '', text_color = '', text_angle = '', text_scale = '', draw_box = '', draw_leader = '', line_width = '', line_color = '', fill_color = '', leader_x_column_name = '', leader_y_column_name = '', filter = '', min_x = None, max_x = None, min_y = None, max_y = None, width = None, height = None, projection = 'PLATE_CARREE', options = {} ): response = self.db.visualize_image_labels( self.name, x_column_name, y_column_name, x_offset, y_offset, text_string, font, text_color, text_angle, text_scale, draw_box, draw_leader, line_width, line_color, fill_color, leader_x_column_name, leader_y_column_name, filter, min_x, max_x, min_y, max_y, width, height, projection, options ) if not _Util.is_ok( response ): raise GPUdbException( _Util.get_error_msg( response ) ) return response
# end visualize_image_labels # end class GPUdbTable # --------------------------------------------------------------------------- # GPUdbTableIterator - Iterator Class to iterate over records in a table # --------------------------------------------------------------------------- class GPUdbTableIterator( Iterator ): """Iterates over a chunk of records of a given table. Once the initial chunk of records have been iterated over, a new iterator object must be instantiated since there is no way to guarantee that getting another chunk would yield the 'next' set of records without duplicates or skipping over records. GPUdb does not guarantee any order or returned records via /get/records/\*. """ def __init__( self, table, offset = 0, limit = 10000, db = None ): """Initiate the iterator with the given table, offset, and limit. Parameters: table (GPUdbTable) A GPUdbTable object or a name of a table offset (int) An integer value greater than or equal to 0. limit (int) An integer value greater than or equal to 1. db (GPUdb) Optional GPUdb object """ # Validate and set the offset if not isinstance( offset, (int, long) ) or (offset < 0): raise GPUdbException( "Offset must be >= 0; given {0}" "".format( offset ) ) self.offset = offset if not isinstance( limit, (int, long) ) or (limit < 1): raise GPUdbException( "Limit must be >= 1; given {0}" "".format( limit ) ) self.limit = limit # Save the table name and the GPUdb object if isinstance( table, GPUdbTable ): self.table = table elif isinstance( table, (str, unicode) ): if not isinstance( db, GPUdb ): raise GPUdbException( "Argument 'db' must be a GPUdb object " "if 'table' is the table name; given " "{0}".format( type( db ) ) ) # Create the table object self.table = GPUdbTable( None, table, db = db ) else: raise GPUdbException( "Argument 'table' must be a GPUdbTable object" " or a string; given {0}".format( table ) ) self.cursor = 0 # Call /get/records to get the batch of records records = self.table.get_records( offset = self.offset, limit = self.limit ) self.records = records # end __init__ def __iter__( self ): return self def next( self ): return self.__next__() # end next def __next__( self ): # For python3 if (self.cursor == len( self.records ) ): raise StopIteration() cursor = self.cursor self.cursor += 1 return self.records[ cursor ] # end __next__ # end class GPUdbTableIterator # --------------------------------------------------------------------------- # GPUdbTableOptions - Class to handle GPUdb table creation options # ---------------------------------------------------------------------------
[docs]class GPUdbTableOptions(object): """ Encapsulates the various options used to create a table. The same object can be used on multiple tables and state modifications are chained together: :: opts = GPUdbTableOptions.default().collection_name('coll_name') table1 = Table( None, options = opts ) table2 = Table( None, options = opts.is_replicated( True ) ) """ __no_error_if_exists = "no_error_if_exists" __collection_name = "collection_name" __is_collection = "is_collection" __disallow_homogeneous_tables = "disallow_homogeneous_tables" __is_replicated = "is_replicated" __foreign_keys = "foreign_keys" __foreign_shard_key = "foreign_shard_key" __ttl = "ttl" __chunk_size = "chunk_size" __is_result_table = "is_result_table" _supported_options = [ __no_error_if_exists, __collection_name, __is_collection, __disallow_homogeneous_tables, __is_replicated, __foreign_keys, __foreign_shard_key, __ttl, __chunk_size, __is_result_table ] @staticmethod def default(): return GPUdbTableOptions() def __init__(self, _dict = None): """Create a default set of options for create_table(). Parameters: _dict (dict) Optional dictionary with options already loaded. Returns: A GPUdbTableOptions object. """ # Set default values self._no_error_if_exists = False self._collection_name = None self._is_collection = False self._disallow_homogeneous_tables = False self._is_replicated = False self._foreign_keys = None self._foreign_shard_key = None self._ttl = None self._chunk_size = None self._is_result_table = None if (_dict is None): return # nothing to do if not isinstance( _dict, dict ): raise GPUdbException( "Argument '_dict' must be a dict; given '%s'." % type( _dict ) ) # Else,_dict is a dict; extract options from within it # Check for invalid options unsupported_options = set( _dict.keys() ).difference( self._supported_options ) if unsupported_options: raise GPUdbException( "Invalid options: %s" % unsupported_options ) # Extract and save each option for (key, val) in _dict.items(): getattr( self, key )( val ) # end __init__
[docs] def as_json(self): """Return the options as a JSON for using directly in create_table()""" result = {} if self._is_replicated is not None: result[ self.__is_replicated ] = "true" if self._is_replicated else "false" if self._is_result_table is not None: result[ self.__is_result_table ] = "true" if self._is_result_table else "false" if self._collection_name is not None: result[ self.__collection_name ] = str( self._collection_name ) if self._no_error_if_exists is not None: result[ self.__no_error_if_exists ] = "true" if self._no_error_if_exists else "false" if self._chunk_size is not None: result[ self.__chunk_size ] = str( self._chunk_size ) if self._is_collection is not None: result[ self.__is_collection ] = "true" if self._is_collection else "false" if self._foreign_keys is not None: result[ self.__foreign_keys ] = str( self._foreign_keys ) if self._foreign_shard_key is not None: result[ self.__foreign_shard_key ] = str( self._foreign_shard_key ) if self._ttl is not None: result[ self.__ttl ] = str( self._ttl ) if self._disallow_homogeneous_tables is not None: result[ self.__disallow_homogeneous_tables ] = "true" if self._disallow_homogeneous_tables else "false" return result
# end as_json
[docs] def as_dict(self): """Return the options as a dict for using directly in create_table()""" return self.as_json()
# end as_dict def no_error_if_exists(self, val): if isinstance( val, bool ): self._no_error_if_exists = val elif val.lower() in ["true", "false"]: self._no_error_if_exists = True if (val == "true") else False else: raise GPUdbException( "Value for 'no_error_if_exists' must be " "boolean or one of ['true', 'false']; " "given " + repr( val ) ) return self # end no_error_if_exists
[docs] def collection_name(self, val): """When creating a new table, sets the name of the collection which is to contain the table. If the collection specified is non-existent, the collection will be automatically created. If not specified, the newly created table will be a top-level table. """ if (val and not isinstance( val, basestring )): raise GPUdbException( "'collection_name' must be a string value; given '%s'" % val ) self._collection_name = val return self
# end collection_name
[docs] def is_collection(self, val): """When creating a new entity, sets whether the entity is a collection or a table (the default). """ if isinstance( val, bool ): self._is_collection = val elif val.lower() in ["true", "false"]: self._is_collection = True if (val == "true") else False else: raise GPUdbException( "Value for 'is_collection' must be " "boolean or one of ['true', 'false']; " "given " + repr( val ) ) return self
# end is_collection
[docs] def disallow_homogeneous_tables(self, val): """When creating a new collection, sets whether the collection prohibits containment of multiple tables of exactly the same type. """ if isinstance( val, bool ): self._disallow_homogeneous_tables = val elif val.lower() in ["true", "false"]: self._disallow_homogeneous_tables = True if (val == "true") else False else: raise GPUdbException( "Value for 'disallow_homogeneous_tables' must be " "boolean or one of ['true', 'false']; " "given " + repr( val ) ) return self
# end disallow_homogeneous_tables
[docs] def is_replicated(self, val): """When creating a new table, sets whether the table is replicated or or not (the default). """ if isinstance( val, bool ): self._is_replicated = val elif val.lower() in ["true", "false"]: self._is_replicated = True if (val == "true") else False else: raise GPUdbException( "Value for 'is_replicated' must be " "boolean or one of ['true', 'false']; " "given " + repr( val ) ) return self
# end is_replicated
[docs] def is_result_table(self, val): """When creating a new table, sets whether the table is an in-memory table or not (the default). An in-memory cannot contain *store-only*, *text-searchable*, or unrestricted length string columns; and it will not be retained if the server is restarted. """ if isinstance( val, bool ): self._is_result_table = val elif val.lower() in ["true", "false"]: self._is_result_table = True if (val == "true") else False else: raise GPUdbException( "Value for 'is_result_table' must be " "boolean or one of ['true', 'false']; " "given " + repr( val ) ) return self
# end is_result_table def foreign_keys(self, val): self._foreign_keys = val return self # end foreign_keys def foreign_shard_key(self, val): self._foreign_shard_key = val return self # end foreign_shard_key def ttl(self, val): self._ttl = val return self # end ttl def chunk_size(self, val): self._chunk_size = val return self
# end chunk_size # end class GPUdbTableOptions