##############################################
# Autogenerated GPUdb Python API file.
#
# *****Do NOT modify this file*****
#
##############################################
# ---------------------------------------------------------------------------
# gpudb.py - The Python API to interact with a GPUdb server.
#
# Copyright (c) 2018 Kinetica DB Inc.
# ---------------------------------------------------------------------------
from __future__ import print_function
try:
from builtins import DeprecationWarning
except:
import functools
from typing import List
import warnings
try:
from io import BytesIO
except:
from cStringIO import StringIO as BytesIO
try:
import httplib
except:
import http.client as httplib
try:
#python2
from urllib import urlencode
except ImportError:
#python3
from urllib.parse import urlencode
import base64
import copy
import os
import inspect
import json
import logging
import random
import re
import ssl
import sys
import time
import traceback
import uuid
from decimal import Decimal
# We'll need to do python 2 vs. 3 things in many places
IS_PYTHON_3 = (sys.version_info[0] >= 3) # checking the major component
IS_PYTHON_27_OR_ABOVE = sys.version_info >= (2, 7)
if IS_PYTHON_3:
long = int
basestring = str
class unicode:
"""Ensure python 3 doesn't complain about use of unicode."""
pass
from collections.abc import Iterator
else:
from collections import Iterator
# end if
# ---------------------------------------------------------------------------
# The absolute path of this gpudb.py module for importing local packages
gpudb_module_path = os.path.dirname(os.path.abspath(__file__))
# Search for our modules first, probably don't need imp or virt envs.
for gpudb_path in [gpudb_module_path, gpudb_module_path + "/packages"]:
if gpudb_path not in sys.path:
sys.path.append(gpudb_path)
# ---------------------------------------------------------------------------
# Local imports
# C-extension classes for avro encoding/decoding
from .protocol import RecordType
from .protocol import Record
from .protocol import Schema
try: # Installed
from gpudb.packages.avro import schema, datafile, io
import gpudb.packages.enum34 as enum
except ImportError:
from packages.avro import schema, datafile, io
import packages.enum34 as enum
if IS_PYTHON_27_OR_ABOVE:
import collections
else:
import ordereddict as collections # a separate package
# Override some python3 avro things
if IS_PYTHON_3:
schema.parse = schema.Parse
schema.RecordSchema.fields_dict = schema.RecordSchema.field_map
HAVE_SNAPPY = False
try:
import snappy
HAVE_SNAPPY = True
except ImportError:
HAVE_SNAPPY = False
try: # Installed
from gpudb.packages.kinetica_tabulate import tabulate
except ImportError: # Local
from packages.kinetica_tabulate import tabulate
# -----------------------------------------------------------------
# Logging -- Add a trace method
# -----------------------------
logging.TRACE = 9
logging.addLevelName( logging.TRACE, "TRACE" )
def trace( self, message, *args, **kws ):
if self.isEnabledFor( logging.TRACE ):
# Yes, logger takes its '*args' as 'args'
self._log( logging.TRACE, message, args, **kws )
# end if
# end def trace
logging.Logger.trace = trace
# -----------------------------------------------------------------
if IS_PYTHON_3:
import functools
import warnings
def deprecated(func):
"""This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used."""
@functools.wraps(func)
def new_func(*args, **kwargs):
warnings.simplefilter('always', DeprecationWarning) # turn off filter
warnings.warn("Call to deprecated function {}.".format(func.__name__),
category=DeprecationWarning,
stacklevel=2)
warnings.simplefilter('default', DeprecationWarning) # reset filter
return func(*args, **kwargs)
return new_func
# Some string constants used throughout the program
class C:
"""Some string constants used throughout the program."""
_fields = "fields"
# Some endpoints frequently used internally
_ENDPOINT_SHOW_SYSTEM_STATUS = "/show/system/status"
_ENDPOINT_SHOW_SYSTEM_PROPERTIES = "/show/system/properties"
# Some common endpoint strings
_collection_name = "collection_name"
# /show/table response
_table_descriptions = "table_descriptions"
_collection = "COLLECTION"
_view = "VIEW"
_replicated = "REPLICATED"
_join = "JOIN"
_result_table = "RESULT_TABLE"
_total_full_size = "total_full_size"
_additional_info = "additional_info"
_collection_names = "collection_names"
# /show/system/properties response
_property_map = "property_map"
_gaia_version = "version.gpudb_core_version"
_enable_ha = "conf.enable_ha"
_ha_ring_head_nodes = "conf.ha_ring_head_nodes_full"
# Special error messages coming from the database
_DB_HM_OFFLINE_ERROR_MESSAGE = "System is offline"
_DB_EXITING_ERROR_MESSAGE = "Kinetica is exiting"
_DB_OFFLINE_ERROR_MESSAGE = "Kinetica is offline"
_DB_SYSTEM_LIMITED_ERROR_MESSAGE = "system-limited-fatal"
_DB_CONNECTION_REFUSED = "Connection refused"
_DB_CONNECTION_RESET = "Connection reset"
_DB_EOF_FROM_SERVER_ERROR_MESSAGE = "Unexpected end of file from server"
# Some pre-fixes used in creating error messages
_FAILED_CONNECTION_HAS_HA = "Connection failed; all clusters in the HA ring have been tried! Error encountered: "
_FAILED_CONNECTION_NO_HA = "Connection failed (no HA available); Error encountered: "
# Internally used headers
_HEADER_ACCEPT = "Accept"
_HEADER_AUTHORIZATION = "Authorization"
_HEADER_CONTENT_TYPE = "Content-type"
_HEADER_HA_SYNC_MODE = "X-Kinetica-Group"
# Internally used header values
_REQUEST_ENCODING_JSON = "application/json"
_REQUEST_ENCODING_OCTET = "application/octet-stream"
_REQUEST_ENCODING_SNAPPY = "application/x-snappy"
# Connection constants
_REQUEST_GET = "GET"
_REQUEST_POST = "POST"
# Constants used in endpoint requests
_ENCODING_BINARY = "BINARY"
_ENCODING_JSON = "JSON"
_ENCODING_SNAPPY = "SNAPPY"
# Constants used in endpoint responses
_SHOW_SYSTEM_STATUS_RESPONSE_SYSTEM = "system"
_SHOW_SYSTEM_STATUS_RESPONSE_STATUS = "status"
_SHOW_SYSTEM_STATUS_RESPONSE_RUNNING = "running"
_SHOW_SYSTEM_STATUS_RESPONSE_TRUE = "true"
_SYSTEM_PROPERTIES_RESPONSE_ENABLE_HTTPD = "conf.enable_httpd_proxy"
_SYSTEM_PROPERTIES_RESPONSE_NUM_HOSTS = "conf.number_of_hosts"
_SYSTEM_PROPERTIES_RESPONSE_USE_HTTPS = "conf.use_https"
_SYSTEM_PROPERTIES_RESPONSE_HEAD_NODE_URLS = "conf.ha_ring_head_nodes_full"
_SYSTEM_PROPERTIES_RESPONSE_SERVER_URLS = "conf.worker_http_server_urls"
_SYSTEM_PROPERTIES_RESPONSE_SERVER_VERSION = "version.gpudb_core_version"
_SYSTEM_PROPERTIES_RESPONSE_TRUE = "TRUE"
# Other constants
_KINETICA_IS_RUNNING = "Kinetica is running!"
# end class C
# ---------------------------------------------------------------------------
# GPUdbException - Exception for Generic GPUdb Issues
# ---------------------------------------------------------------------------
[docs]class GPUdbException( Exception ):
# We need to handle the given exception differenlty for different pythons
if IS_PYTHON_3:
def __init__( self, value, had_connection_failure = False ):
self.value = value
self.__had_connection_failure = had_connection_failure
if isinstance(value, (basestring, unicode)):
# We got a message only
self.message = value
self.traceback_msg = ""
elif isinstance(value, Exception):
# Preserve the message and also the stack trace
self.message = value.args[0]
self.traceback_msg = "".join( traceback.format_exception( sys.exc_info()[0],
sys.exc_info()[1],
sys.exc_info()[2] ) )
# end __init__
else:
def __init__( self, value, had_connection_failure = False ):
self.value = value
self.__had_connection_failure = had_connection_failure
if isinstance(value, (basestring, unicode)):
# We got a message only
self.message = value
self.traceback_msg = ""
elif isinstance(value, Exception):
# Preserve the message and also the stack trace
self.message = value.message
self.traceback_msg = "".join( traceback.format_exception( sys.exc_info()[0],
sys.exc_info()[1],
sys.exc_info()[2] ) )
# end __init__
# end if
def __str__( self ):
return repr( self.value )
# end __str__
# end get_formatted_traceback
[docs] def is_connection_failure( self ):
"""Returns:
True if the error is related to a connection failure; False
otherwise.
"""
if ( ( C._FAILED_CONNECTION_NO_HA in self.message )
or ( C._FAILED_CONNECTION_HAS_HA in self.message )
or ( "Connection refused" in self.message )
or ( "Connection reset" in self.message ) ):
# if ( ( C._FAILED_CONNECTION_NO_HA in self.message )
# or ( C._FAILED_CONNECTION_HAS_HA in self.message ) ):
return True
return False
# end is_connection_failure
def had_connection_failure( self ):
return self.__had_connection_failure
# end had_connection_failure
[docs] @staticmethod
def stringify_exception( ex ):
"""Given any exception, return a stringified representation.
If an error message is available, return that. If no error message,
then return the exceptions representation using repr() method.
Note that this method should probably only be used for Python exceptions
that are not derived from GPUdbException (or itself). All GPUdbException
objects (and its children) ought to have a proper error message; so this
extra check should not be needed.
Parameters:
ex (Exception)
Python exception object.
Returns:
A string representing the exception.
"""
if str(ex):
return str(ex)
else:
return repr(ex)
# end stringify_exception
# end class GPUdbException
# ---------------------------------------------------------------------------
# GPUdbConnectionException - Exception for HTTP Issues
# ---------------------------------------------------------------------------
class GPUdbConnectionException( GPUdbException ):
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbConnectionException
# ---------------------------------------------------------------------------
# GPUdbDecodingException - Exception for HTTP Issues
# ---------------------------------------------------------------------------
class GPUdbDecodingException( GPUdbException ):
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbDecodingException
# ---------------------------------------------------------------------------
# GPUdbExitException - Exception for when Kinetica is quitting
# ---------------------------------------------------------------------------
class GPUdbExitException( GPUdbException ):
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbExitException
# ---------------------------------------------------------------------------
# GPUdbFailoverDisabledException - Exception to indicate that failover is
# disabled
# ---------------------------------------------------------------------------
class GPUdbFailoverDisabledException( GPUdbException ):
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbFailoverDisabledException
# ---------------------------------------------------------------------------
# GPUdbHAUnavailableException - Exception for when HA failover failed
# ---------------------------------------------------------------------------
class GPUdbHAUnavailableException( GPUdbException ):
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbHAUnavailableException
# ---------------------------------------------------------------------------
# GPUdbHostnameRegexFailureException - Exception for when a hostname regex
# match fails
# ---------------------------------------------------------------------------
class GPUdbHostnameRegexFailureException( GPUdbException ):
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbHostnameRegexFailureException
# ---------------------------------------------------------------------------
# GPUdbUnauthorizedAccessException - Exception for authorization problems
# ---------------------------------------------------------------------------
class GPUdbUnauthorizedAccessException( GPUdbException ):
"""Thrown when there is an authorization problem (bad username or password
given) or an SSL certificate verification failure occurred.
"""
def __init__( self, value ):
self.value = value
self.message = value
# end __init__
def __str__( self ):
return repr( self.value )
# end __str__
# end class GPUdbUnauthorizedAccessException
# ---------------------------------------------------------------------------
# _ConnectionToken - Private wrapper class to manage connection logic
# ---------------------------------------------------------------------------
class _ConnectionToken(object):
"""Internal wrapper class to handle multiple server logic."""
def __init__(self, host, port = 9191, host_manager_port = 9300, connection = "http"):
if not isinstance(host, (basestring, unicode)):
raise GPUdbException( "Expected a string host address, got: '"+str(host)+"'" )
# host may take the form of :
# - "https://user:password@domain.com:port/path/"
# Extract the protocol (default is HTTP)
connection = connection.upper()
url_protocol_split = host.split('://')
if len(url_protocol_split) > 2:
# Can't have :// in the URL multiple times
raise GPUdbException( "Can have '://' in the URL at most once; "
" bad URL: '{}'".format( host ) )
elif len(url_protocol_split) == 2:
# Found protocol separated by ://
# Remove the protocol from the host
host = url_protocol_split[ 1 ]
# Validate the protocol
connection = url_protocol_split[ 0 ]
connection = connection.upper()
if connection not in ["HTTP", "HTTPS"]:
raise GPUdbException( "Expected protocol to be 'HTTP' or "
"'HTTPS'; got '{}'"
"".format( connection ) )
# end if
# Parse the username and password, if supplied.
host_at_sign_pos = host.find('@')
if host_at_sign_pos != -1 :
user_pass = host[:host_at_sign_pos]
host = host[host_at_sign_pos+1:]
user_pass_list = user_pass.split(':')
username = user_pass_list[0]
if len(user_pass_list) > 1 :
password = user_pass_list[1]
url_path = ""
# Find the URL /path/ and remove it to get the ip address.
host_path_pos = host.find('/')
if host_path_pos != -1:
url_path = host[host_path_pos:]
if url_path[-1] == '/':
url_path = url_path[:-1]
host = host[:host_path_pos]
# Override default port if specified in ip address
host_port_pos = host.find(':')
if host_port_pos != -1 :
port = host[host_port_pos+1:]
host = host[:host_port_pos]
# Port does not have to be provided if using standard HTTP(S) ports.
if (port == None) or len(str(port)) == 0:
if connection == 'HTTP' :
port = 80
elif connection == 'HTTPS' :
port = 443
# Validate the head node port
try :
port = int( port )
except:
raise GPUdbException( "Expected a numeric port, got: '" + str(port) + "'" )
# Validate the host manager port
try :
host_manager_port = int( host_manager_port )
except:
raise GPUdbException( "Expected a numeric host manager port, got: '" + str(host_manager_port) + "'" )
# Port values must be within (0, 65536)
if ( (port <= 0) or (port >= 65536) ):
raise GPUdbException( "Expected a valid port (1-65535), got: '"+str(port)+"'" )
if ( (host_manager_port <= 0) or (host_manager_port >= 65536) ):
raise GPUdbException( "Expected a valid host manager port (1-65535), got: '"+str(host_manager_port)+"'" )
# Must have at least one host
if not (len(host) > 0):
raise GPUdbException( "Expected a valid host address, got an empty string." )
# Valid protocols: http and https
if connection not in ["HTTP", "HTTPS"]:
raise GPUdbException( "Expected connection to be 'HTTP' or 'HTTPS', got: '"+str(connection)+"'" )
# Construct the full URL
full_url = ( "{protocol}://{ip}:{port}{path}"
"".format( protocol = connection.lower(),
ip = host,
port = port,
path = url_path ) )
self._host = str( host )
self._port = int( port )
self._host_manager_port = int( host_manager_port )
self._connection = str( connection )
self._gpudb_url_path = str( url_path )
self._gpudb_full_url = str( full_url )
# end __init__
def __eq__( self, other ):
if isinstance(other, self.__class__):
if ( self._host != other._host ):
return False
if ( self._port != other._port ):
return False
if ( self._host_manager_port != other._host_manager_port ):
return False
if ( self._connection != other._connection ):
return False
if ( self._gpudb_url_path != other._gpudb_url_path ):
return False
if ( self._gpudb_full_url != other._gpudb_full_url ):
return False
return True
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
# end class _ConnectionToken
# ---------------------------------------------------------------------------
# Utility Functions
# ---------------------------------------------------------------------------
class _Util(object):
@staticmethod
def is_ok( response_object ):
"""Returns True if the response object's status is OK."""
return (response_object['status_info']['status'] == 'OK')
# end is_ok
@staticmethod
def get_error_msg( response_object ):
"""Returns the error message for the query, if any. None otherwise."""
if (response_object['status_info']['status'] != 'ERROR'):
return None
return response_object['status_info']['message']
# end get_error_msg
@staticmethod
def is_list_or_dict( arg ):
"""Returns whether the given argument either a list or a dict
(or an OrderedDict).
"""
return isinstance( arg, ( list, dict, collections.OrderedDict ) )
# end is_list_or_dict
if IS_PYTHON_3:
# Declaring the python 3 version of this static method
@staticmethod
def str_to_bytes(value):
return bytes( ord(b) for b in value )
# end str_to_bytes
else:
# Declaring the python 2 version of this static method
@staticmethod
def str_to_bytes(value):
if isinstance(value, unicode):
data = bytes()
for c in value:
data += chr(ord(c))
return data
else:
# The python 2 output
return value
# end str_to_bytes
# end py 2 vs. 3
@staticmethod
def ensure_bytes(value):
if isinstance(value, bytes) and not isinstance(value, str):
return value
elif isinstance(value, basestring):
return _Util.str_to_bytes(value)
else:
raise Exception("Unhandled data type: " + str(type(value)))
# end ensure_bytes
@staticmethod
def bytes_to_str(value):
return ''.join([chr(b) for b in value])
# end bytes_to_str
@staticmethod
def ensure_str(value):
if isinstance(value, basestring):
if ( ( not isinstance(value, unicode) )
and (not IS_PYTHON_3) ): # Python 2
return unicode( value, 'utf-8' )
# Python 3
return value
elif isinstance(value, bytes):
return _Util.bytes_to_str(value)
else:
raise Exception("Unhandled data type: " + str(type(value)))
# end ensure_str
@staticmethod
def convert_dict_bytes_to_str(value):
for key in list(value):
val = value[key]
if isinstance(val, bytes) and not isinstance(val, str):
value[key] = ''.join([chr(b) for b in val])
elif isinstance(val, dict):
value[key] = _Util.convert_dict_bytes_to_str(val)
return value
# end convert_dict_bytes_to_str
@staticmethod
def decode_binary_data( SCHEMA, encoded_data ):
"""Given a schema and binary encoded data, decode it.
"""
encoded_data = _Util.ensure_bytes( encoded_data )
output = BytesIO( encoded_data )
bd = io.BinaryDecoder( output )
reader = io.DatumReader( SCHEMA )
out = reader.read( bd ) # read, give a decoder
return out
# end decode_binary_data
@staticmethod
def encode_binary_data( SCHEMA, raw_data, encoding = "binary" ):
"""Given a schema and raw data, encode it.
"""
output = BytesIO()
be = io.BinaryEncoder( output )
# Create a 'record' (datum) writer
writer = io.DatumWriter( SCHEMA )
writer.write( raw_data, be )
result = None
if encoding.lower() == 'json':
result = _Util.ensure_str( output.getvalue() )
else:
result = output.getvalue()
return result
# end encode_binary_data
@staticmethod
def encode_binary_data_cext( SCHEMA, raw_data, encoding = "binary" ):
"""Given a schema and raw data, encode it.
"""
result = None
if encoding.lower() == 'json':
result = _Util.ensure_str( raw_data )
else:
if not isinstance( raw_data, (dict, collections.OrderedDict, Record) ):
raise GPUdbException( "Argument 'raw_data' must be a dict, "
"OrderedDict, or a Record object; given "
"{} ".format( str(type(raw_data)) ) )
# end if
result = SCHEMA.encode( raw_data )
return result
# end encode_binary_data_cext
# Regular expression needed for converting records to protocol.Record objects
re_datetime_full = re.compile("^\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}:\d{2}\.\d{1,3}\Z")
re_datetime_noMS = re.compile("^\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}:\d{2}\Z")
re_date_only = re.compile("^\d{4}-\d{2}-\d{2}\Z")
re_time_only_ms = re.compile("^\d{1,2}:\d{2}:\d{2}\.\d{1,3}\Z")
re_time_only_noMS = re.compile("^\d{1,2}:\d{2}:\d{2}\Z")
@staticmethod
def convert_binary_data_to_cext_records( db, table_name, records, record_type = None ):
"""Given a list of objects, convert them to either bytes or Record objects.
If the records are already of type Record, do nothing. If not, then, if the record
type is given, convert the records into Record objects.
If the associated RecordType is not given, then it is assumed that they have already
been encoded using the python avro package.
Each record can be a list of values (in that case, it is assumed that the values
are given in order of column declaration), a dict, or an OrderedDict.
Parameters:
db (GPUdb)
A client handle for the connection to the database.
table_name (str)
The name of the table to which the records are associated,
must be the name of an existing table.
records (list of Records, lists, dicts, or OrderedDicts)
A list of records. Each record can be a list of values,
a dict, an OrderedDict, or a Record.
record_type (RecordType)
The type for the records. If not given, then it will be
deduced by invoking :meth:`GPUdb.show_table`. Default None.
Returns:
A tuple the first element of which is a boolean indicating whether
the records are encoded into the c-extension Record objects, and the
second element is the list of encoded records. If avro encoding is used,
then the encoded records are simply bytes. If the c-extension avro
package is used, then the encoded records are Record objects.
"""
if not records: # empty list; nothing to do
return (True, records)
# If all the objects are of type Record, no conversion is necessary
if all( [ isinstance(r, Record) for r in records ] ):
# True == the records of c-extension Record type
return (True, records)
if not record_type:
# False == the records were NOT converted to c-extension Record objects
# (it is assumed that the python avro package has been used to encode
# the records)
return (False, records)
# If the record type is given, ensure that it is a RecordType
if not isinstance( record_type, RecordType):
raise GPUdbException( "Argument 'record_type' must be a RecordType object; "
"given {}".format( str(type( record_type )) ) )
# Now convert each record object into Record
converted_records = []
try:
for obj in records:
# Each record object's type will be individually assessed and the
# relevant conversion be applied
if isinstance( obj, Record ):
# Already a Record
converted_records.append( obj )
continue # skip to the next object
elif isinstance( obj, GPUdbRecord ):
# A GPUdbRecord ; get the (column name, column value) pairs
obj = obj.data
elif isinstance( obj, list ):
# A list is given; create (col name, col value) pairs; using the dict constructor
# to support python 2.6)
obj = dict( [ (record_type[ i ].name, col_val) for (i, col_val) in enumerate( obj ) ] )
# obj = { record_type[ i ].name: col_val for (i, col_val) in enumerate( obj ) }
elif not isinstance( obj, (dict, collections.OrderedDict)):
raise GPUdbException( "Unrecognized format for record (accepted: "
"Record, GPUdbRecord, list, dict, OrderedDict): "
+ str(type( obj )) )
# end if
# Create a Record object based on the column values
record = Record( record_type )
for column in record_type:
col_name = column.name
col_value = obj[ col_name ]
# Handle nulls
if col_value is None:
record[ col_name ] = col_value
continue
# end if
# Get column data type
col_data_type = column.data_type
# Handle unicode
if (col_data_type.lower() == "string"):
if (not IS_PYTHON_3):
col_value = _Util.ensure_str( col_value )
elif (col_data_type.lower() == "decimal"): # Handle decimal
raise GPUdbException("TODO: *********type 'decimal' not supported yet*********")
elif (col_data_type.lower() == "ipv4"): # Handle IPv4
raise GPUdbException("TODO: *********type 'ipv4' not supported yet*********")
elif (col_data_type.lower() == "bytes"):
col_value = _Util.ensure_bytes( col_value )
# NO NEED TO CHECK DATE & TIME FORMATS DUE TO "init_with_now";
# but keeping it around in case the C-module code changes again.
# # Handle datetime
# elif (col_data_type == "datetime"):
# # Conversion needed if it is NOT already a datetime struct
# if not isinstance( col_value, datetime.datetime ):
# # Better be a string if not a datetime object
# if not isinstance( col_value, basestring ):
# raise GPUdbException( "'datetime' type column value must be a datetime "
# "object or a string, given {}".format( str( type( col_value ) ) ) )
# col_value = col_value.strip()
# if _Util.re_datetime_full.match( col_value ):
# # Full datetime with time (including milliseconds)
# col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d %H:%M:%S.%f" )
# elif _Util.re_datetime_noMS.match( col_value ):
# # Date and time, but no milliseconds
# col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d %H:%M:%S" )
# elif _Util.re_date_only.match( col_value ):
# # Date only (no time)
# col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d" )
# else:
# raise GPUdbException( "Could not convert value to datetime pattern ('YYYY-MM-DD [HH:MM:SS[.mmm]]'); "
# "given '{}'".format( col_value ) )
# # end if
# # end if
# elif (col_data_type == "date"): # Handle date
# # Conversion needed if it is NOT already a date struct
# if not isinstance( col_value, datetime.date ):
# # Better be a string if not a date object
# if not isinstance( col_value, basestring ):
# raise GPUdbException( "'date' type column value must be a datetime.date "
# "object or a string, given {}".format( str( type( col_value ) ) ) )
# col_value = col_value.strip()
# # Check that it matches the date pattern
# if _Util.re_date_only.match( col_value ):
# col_value = datetime.datetime.strptime( col_value, "%Y-%m-%d" ).date()
# else:
# raise GPUdbException( "Could not convert value to date pattern ('YYYY-MM-DD'); "
# "given '{}'".format( col_value ) )
# # end if
# # end if
# elif (col_data_type == "time"): # Handle time
# # Conversion needed if it is NOT already a time struct
# if not isinstance( col_value, datetime.time ):
# # Better be a string if not a time object
# if not isinstance( col_value, basestring ):
# raise GPUdbException( "'time' type column value must be a datetime.time "
# "object or a string, given {}".format( str( type( col_value ) ) ) )
# col_value = col_value.strip()
# # Check that it matches the allowed time patterns
# if _Util.re_time_only_ms.match( col_value ):
# # Time with milliseconds
# col_value = datetime.datetime.strptime( col_value, "%H:%M:%S.%f" ).time()
# elif _Util.re_time_only_noMS.match( col_value ):
# # Time without milliseconds
# col_value = datetime.datetime.strptime( col_value, "%H:%M:%S" ).time()
# else:
# raise GPUdbException( "Could not convert value to time pattern ('HH:MM:SS[.mmm]'); "
# "given '{}'".format( col_value ) )
# # end if
# # end if
# end handling special data type conversions
record[ col_name ] = col_value
# end inner loop
converted_records.append( record )
# end loop
except GPUdbException as e:
raise
except KeyError as e:
raise GPUdbException( "Missing column value for '{}'"
"".format( GPUdbException.stringify_exception( e ) ) )
except:
raise GPUdbException( str( sys.exc_info()[1] ) )
# True == the records were converted to c-extension Record objects
return (True, converted_records)
# end convert_binary_data_to_cext_records
# ----------- Begin override of strftime ------------------
# Override datetime's strftime which in python does not accept
# years before 1900--how annoying!
# remove the unsupposed "%s" command. But don't
# do it if there's an even number of %s before the s
# because those are all escaped. Can't simply
# remove the s because the result of
# %sY
# should be %Y if %s isn't supported, not the
# 4 digit year.
_illegal_s = re.compile(r"((^|[^%])(%%)*%s)")
@staticmethod
def __findall(text, substr):
# Also finds overlaps
sites = []
i = 0
while 1:
j = text.find(substr, i)
if j == -1:
break
sites.append(j)
i=j+1
return sites
# end __findall
# Every 28 years the calendar repeats, except through century leap
# years where it's 6 years. But only if you're using the Gregorian
# calendar. ;)
@staticmethod
def strftime(dt, fmt):
if _Util._illegal_s.search(fmt):
raise TypeError("This strftime implementation does not handle %s")
if dt.year > 1900:
return dt.strftime(fmt)
# Handle the microsecond, if desired in the format
microsecond = None
if ".%f" in fmt:
# Zero-padded six-digit microseconds
microsecond = ( "."
+ ("{f}".format( f = dt.microsecond )).rjust( 6, '0' ) )
# Remove .%f from the format
fmt = fmt.replace(".%f", "")
# end if
year = dt.year
# For every non-leap year century, advance by
# 6 years to get into the 28-year repeat cycle
delta = 2000 - year
off = 6*(delta // 100 + delta // 400)
year = year + off
# Move to around the year 2000
year = year + ((2000 - year)//28)*28
timetuple = dt.timetuple()
s1 = time.strftime(fmt, (year,) + timetuple[1:])
sites1 = _Util.__findall(s1, str(year))
s2 = time.strftime(fmt, (year+28,) + timetuple[1:])
sites2 = _Util.__findall(s2, str(year+28))
sites = []
for site in sites1:
if site in sites2:
sites.append(site)
s = s1
syear = "%4d" % (dt.year,)
for site in sites:
s = s[:site] + syear + s[site+4:]
# end loop
if microsecond:
s += microsecond
return s
# end strftime
# ----------- end override ------------------
@staticmethod
def convert_cext_records_to_ordered_dicts( records ):
"""Given a list of Record objects, convert them to OrderedDicts if the
record type contains any date, time, datetime types. Otherwise,
the records (of Record type) will be returned without
any conversion since they are equivalent to OrderedDicts.
If the records are already of type GPUdbRecord or OrderedDicts, do
nothing (return those)
Parameters:
records (list of Records, lists, dicts, or OrderedDicts)
A list of records. Each record can be a list of values,
a dict, an OrderedDict, or a Record.
Returns:
If the record type contains any date, time, datetime, then they will
be converted to strings and a list of OrderedDicts will be returned.
Otherwise, the records (of Record type) will be returned without
any conversion since they are equivalent to OrderedDicts.
"""
if not records: # empty list
return records
# If all the objects are OrderedDicts or GPUdbRecords, no conversion is necessary
if isinstance( records[0], (GPUdbRecord, collections.OrderedDict) ):
return records
# If a conversion is necessary, make sure that all objects are Records
if not all( [ isinstance(r, Record) for r in records ] ):
raise GPUdbException( "Either all records must be Record objects or none; "
"a mix is given." )
# Check if the record contains any date, time, and datetime types
types_needing_conversion = ["datetime", "date", "time", "decimal", "ipv4"]
record_type = records[ 0 ].type
columns_needing_conversion = [ column for column in record_type
if (column.data_type in types_needing_conversion) ]
if not columns_needing_conversion:
return records
# Create OrderedDict objects with the special column values converted
# to strings
converted_records = []
for obj in records:
# Create an OrderedDict object based on the record
record = collections.OrderedDict( map( list, obj.items() ) )
# We only need to convert the special columns
for column in columns_needing_conversion:
col_name = column.name
col_value = record[ col_name ]
# Handle nulls
if col_value is None:
record[ col_name ] = col_value
continue
# end if
# Get column data type
col_data_type = column.data_type
# For now, all datetime formats are just simple strings; so need
# to do the following checks anymore; but keeping it around in case
# the C-module code changes again.
# if (col_data_type == "datetime"):
# col_value = _Util.strftime( col_value, "%Y-%m-%d %H:%M:%S.%f" )[ : -3 ]
# elif (col_data_type == "date"): # Handle date
# col_value = _Util.strftime( col_value, "%Y-%m-%d" )
# elif (col_data_type == "time"): # Handle time
# col_value = col_value.strftime( "%H:%M:%S.%f" )[ : -3 ]
# Handle decimal and IPv4
if (col_data_type.lower() == "decimal"): # Handle decimal
raise GPUdbException("TODO: *********type 'decimal' not supported yet*********")
elif (col_data_type.lower() == "ipv4"): # Handle IPv4
raise GPUdbException("TODO: *********type 'ipv4' not supported yet*********")
# end handling special data type conversions
record[ col_name ] = col_value
# end inner loop
converted_records.append( record )
# end loop
return converted_records
# end convert_cext_records_to_ordered_dicts
# end class _Util
# ---------------------------------------------------------------------------
# Utility Classes
# ---------------------------------------------------------------------------
class AttrDict(dict):
"""Converts a dictionary into a class object such that the entries in the
dict can be accessed using dot '.' notation.
"""
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
# end init
def is_ok( self ):
"""Returns True if the response object's status is OK."""
try:
return (self.__dict__['status_info']['status'] == 'OK')
except KeyError as ex:
raise GPUdbException( "Unknown wrapped object; could not find "
" the following key: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
# end is_ok
def get_error_msg( self ):
"""Returns the error message for the query, if any. None otherwise."""
try:
if (self.__dict__['status_info']['status'] != 'ERROR'):
return None
return self.__dict__['status_info']['message']
except KeyError as ex:
raise GPUdbException( "Unknown wrapped object; could not find "
" the following key: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
# end get_error_msg
# end class AttrDict
# ---------------------------------------------------------------------------
# GPUdbColumnProperty - Class to Handle GPUdb Column Properties
# ---------------------------------------------------------------------------
[docs]class GPUdbColumnProperty(object):
"""Column properties used for GPUdb record data types. The properties
are class-level read-only properties, so the user can use them as such::
GPUdbColumnProperty.prop_name
"""
DATA = "data"
"""str: Default property for all numeric and string type columns; makes the
column available for GPU queries.
"""
TEXT_SEARCH = "text_search"
"""str: Valid only for select 'string' columns. Enables full text search--see
`Full Text Search <../../../../concepts/full_text_search/>`__ for details
and applicable string column types. Can be set independently of *data* and
*store_only*.
"""
STORE_ONLY = "store_only"
"""str: Persist the column value but do not make it available to queries (e.g.
:meth:`GPUdb.filter`)-i.e. it is mutually exclusive to the *data* property.
Any 'bytes' type column must have a *store_only* property. This property
reduces system memory usage.
"""
DISK_OPTIMIZED = "disk_optimized"
"""str: Works in conjunction with the *data* property for string columns. This
property reduces system disk usage by disabling reverse string lookups.
Queries like :meth:`GPUdb.filter`, :meth:`GPUdb.filter_by_list`, and
:meth:`GPUdb.filter_by_value` work as usual but
:meth:`GPUdb.aggregate_unique` and :meth:`GPUdb.aggregate_group_by` are not
allowed on columns with this property.
"""
TIMESTAMP = "timestamp"
"""str: Valid only for 'long' columns. Indicates that this field represents a
timestamp and will be provided in milliseconds since the Unix epoch:
00:00:00 Jan 1 1970. Dates represented by a timestamp must fall between
the year 1000 and the year 2900.
"""
ULONG = "ulong"
"""str: Valid only for 'string' columns. It represents an unsigned long
integer data type. The string can only be interpreted as an unsigned long
data type with minimum value of zero, and maximum value of
18446744073709551615.
"""
UUID = "uuid"
"""str: Valid only for 'string' columns. It represents an uuid data type.
Internally, it is stored as a 128-bit integer.
"""
DECIMAL = "decimal"
"""str: Valid only for 'string' columns. It represents a SQL type NUMERIC(19,
4) data type. There can be up to 15 digits before the decimal point and up
to four digits in the fractional part. The value can be positive or
negative (indicated by a minus sign at the beginning). This property is
mutually exclusive with the *text_search* property.
"""
DATE = "date"
"""str: Valid only for 'string' columns. Indicates that this field represents
a date and will be provided in the format 'YYYY-MM-DD'. The allowable
range is 1000-01-01 through 2900-01-01. This property is mutually
exclusive with the *text_search* property.
"""
TIME = "time"
"""str: Valid only for 'string' columns. Indicates that this field represents
a time-of-day and will be provided in the format 'HH:MM:SS.mmm'. The
allowable range is 00:00:00.000 through 23:59:59.999. This property is
mutually exclusive with the *text_search* property.
"""
DATETIME = "datetime"
"""str: Valid only for 'string' columns. Indicates that this field represents
a datetime and will be provided in the format 'YYYY-MM-DD HH:MM:SS.mmm'.
The allowable range is 1000-01-01 00:00:00.000 through 2900-01-01
23:59:59.999. This property is mutually exclusive with the *text_search*
property.
"""
CHAR1 = "char1"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 1
character.
"""
CHAR2 = "char2"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 2
characters.
"""
CHAR4 = "char4"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 4
characters.
"""
CHAR8 = "char8"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 8
characters.
"""
CHAR16 = "char16"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 16
characters.
"""
CHAR32 = "char32"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 32
characters.
"""
CHAR64 = "char64"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 64
characters.
"""
CHAR128 = "char128"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 128
characters.
"""
CHAR256 = "char256"
"""str: This property provides optimized memory, disk and query performance
for string columns. Strings with this property must be no longer than 256
characters.
"""
BOOLEAN = "boolean"
"""str: This property provides optimized memory and query performance for int
columns. Ints with this property must be between 0 and 1(inclusive)
"""
INT8 = "int8"
"""str: This property provides optimized memory and query performance for int
columns. Ints with this property must be between -128 and +127 (inclusive)
"""
INT16 = "int16"
"""str: This property provides optimized memory and query performance for int
columns. Ints with this property must be between -32768 and +32767
(inclusive)
"""
IPV4 = "ipv4"
"""str: This property provides optimized memory, disk and query performance
for string columns representing IPv4 addresses (i.e. 192.168.1.1). Strings
with this property must be of the form: A.B.C.D where A, B, C and D are in
the range of 0-255.
"""
WKT = "wkt"
"""str: Valid only for 'string' and 'bytes' columns. Indicates that this field
contains geospatial geometry objects in Well-Known Text (WKT) or Well-Known
Binary (WKB) format.
"""
PRIMARY_KEY = "primary_key"
"""str: This property indicates that this column will be part of (or the
entire) `primary key <../../../../concepts/tables/#primary-keys>`__.
"""
SHARD_KEY = "shard_key"
"""str: This property indicates that this column will be part of (or the
entire) `shard key <../../../../concepts/tables/#shard-keys>`__.
"""
NULLABLE = "nullable"
"""str: This property indicates that this column is nullable. However,
setting this property is insufficient for making the column nullable. The
user must declare the type of the column as a union between its regular
type and 'null' in the avro schema for the record type in input parameter
*type_definition*. For example, if a column is of type integer and is
nullable, then the entry for the column in the avro schema must be: ['int',
'null'].
The C++, C#, Java, and Python APIs have built-in convenience for bypassing
setting the avro schema by hand. For those languages, one can use this
property as usual and not have to worry about the avro schema for the
record.
"""
DICT = "dict"
"""str: This property indicates that this column should be `dictionary encoded
<../../../../concepts/dictionary_encoding/>`__. It can only be used in
conjunction with restricted string (charN), int, long or date columns.
Dictionary encoding is best for columns where the cardinality (the number
of unique values) is expected to be low. This property can save a large
amount of memory.
"""
INIT_WITH_NOW = "init_with_now"
"""str: For 'date', 'time', 'datetime', or 'timestamp' column types, replace
empty strings and invalid timestamps with 'NOW()' upon insert.
"""
INIT_WITH_UUID = "init_with_uuid"
"""str: For 'uuid' type, replace empty strings and invalid UUID values with
randomly-generated UUIDs upon insert.
"""
# end class GPUdbColumnProperty
# ---------------------------------------------------------------------------
# GPUdbRecordColumn - Class to Handle GPUdb Record Column Data Types
# ---------------------------------------------------------------------------
[docs]class GPUdbRecordColumn(object):
"""Represents a column in a GPUdb record object (:class:`.GPUdbRecordType`).
"""
class _ColumnType(object):
"""A class acting as an enum for the data types allowed for a column."""
INT = "int"
LONG = "long"
FLOAT = "float"
DOUBLE = "double"
STRING = "string"
BYTES = "bytes"
# end class _ColumnType
# The allowe data types
_allowed_data_types = [ _ColumnType.INT,
_ColumnType.LONG,
_ColumnType.FLOAT,
_ColumnType.DOUBLE,
_ColumnType.STRING,
_ColumnType.BYTES
]
# All non-numeric data types
_non_numeric_data_types = [ _ColumnType.STRING,
_ColumnType.BYTES
]
# All allowed numeric data types
_numeric_data_types = [ _ColumnType.INT,
_ColumnType.LONG,
_ColumnType.FLOAT,
_ColumnType.DOUBLE
]
# All allowed integral numeric data types
_numeric_integral_data_types = [ _ColumnType.INT,
_ColumnType.LONG
]
# All allowed decimal numeric data types
_numeric_decimal_data_types = [ _ColumnType.FLOAT,
_ColumnType.DOUBLE
]
def __init__( self, name, column_type, column_properties = None, is_nullable = False ):
"""Construct a GPUdbRecordColumn object.
Parameters:
name (str)
The name of the column, must be a non-empty string.
column_type (str)
The data type of the column. Must be one of int, long,
float, double, string, bytes.
column_properties (list)
Optional list of properties for the column.
is_nullable (bool)
Optional boolean flag indicating whether the column is
nullable.
"""
# Validate and save the stringified name
if (not name):
raise GPUdbException( "The name of the column must be a non-empty "
"string; given " + repr(name) )
self._name = name
# Validate and save the data type
if not isinstance( column_type, (basestring, unicode) ):
raise GPUdbException( "Data type must be a string, given {}"
"".format( str(type( column_type )) ) )
# Allow all casing by normalizing to lower case internally here
column_type = column_type.lower()
if column_type not in self._allowed_data_types:
raise GPUdbException( "Data type must be one of "
+ str(self._allowed_data_types)
+ "; given " + str(column_type) )
self._column_type = column_type
# Validate and save the column properties
if not column_properties: # it's ok to not have any
column_properties = []
if not isinstance( column_properties, list ):
raise GPUdbException( "'column_properties' must be a list; given "
+ str(type(column_properties)) )
if not all( isinstance( prop, (basestring, unicode) )
for prop in column_properties ):
raise GPUdbException( "'column_properties' must contain strings only; "
"given {}"
"".format( [str(type(p))
for p in column_properties] ) )
# Normalize the properties by turning them all into lower case
column_properties = [prop.lower() for prop in column_properties]
# Sort and stringify the column properties so that the order for a given
# set of properties is always the same--handy for equivalency checks
self._column_properties = sorted( column_properties, key = lambda x : str(x[0]) )
# Check for nullability
self._is_nullable = False # default value
if (GPUdbColumnProperty.NULLABLE in self.column_properties):
self._is_nullable = True
# Check the optional 'is_nullable' argument
if is_nullable not in [True, False]:
raise GPUdbException( "'is_nullable' must be a boolean value; given " + repr(type(is_nullable)) )
if (is_nullable == True):
self._is_nullable = True
# Enter the 'nullable' property into the list of propertie, even though
# GPUdb doesn't actually use it (make sure not to make duplicates)
if (GPUdbColumnProperty.NULLABLE not in self._column_properties):
self._column_properties.append( GPUdbColumnProperty.NULLABLE )
# Re-sort for equivalency tests down the road
self._column_properties = sorted( self._column_properties, key = lambda x : str(x[0]) )
# end inner if
# end if
# end __init__
@property
def name(self): # read-only name
"""The name of the column."""
return self._name
# end name
@property
def column_type(self): # read-only column_type
"""The data type of the column."""
return self._column_type
# end column_type
@property
def column_properties(self): # read-only column_properties
"""The properties of the column."""
return self._column_properties
# end column_properties
@property
def is_nullable(self): # read-only is_nullable
"""The nullability of the column."""
return self._is_nullable
# end is_nullable
def __eq__( self, other ):
if isinstance(other, self.__class__):
if ( self._name != other.name ):
return False
if ( self._column_type != other.column_type ):
return False
if ( self._is_nullable != other.is_nullable ):
return False
if ( self._column_properties == other.column_properties ):
return True
# The column properties are tricky; need to disregard
# 'data' and 'text_search'
disregarded_props = [ GPUdbColumnProperty.TEXT_SEARCH, GPUdbColumnProperty.DATA ]
LHS_column_properties = [ prop for prop in self._column_properties \
if prop not in disregarded_props ]
RHS_column_properties = [ prop for prop in other.column_properties \
if prop not in disregarded_props ]
if (LHS_column_properties == RHS_column_properties):
return True
return False # Column properties did not match
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
# end class GPUdbRecordColumn
# ---------------------------------------------------------------------------
# GPUdbRecordType - Class to Handle GPUdb Record Data Types
# ---------------------------------------------------------------------------
[docs]class GPUdbRecordType(object):
"""Represent the data type for a given record in GPUdb. Has convenience
functions for creating the type in GPUdb (among others).
"""
def __init__( self, columns = None, label = "",
schema_string = None, column_properties = None ):
"""Create a GPUdbRecordType object which represents the data type for
a given record for GPUdb.
Parameters:
columns (list)
A list of :class:`.GPUdbRecordColumn` objects. Either this argument
or the schema_string argument must be given.
label (str)
Optional string label for the column.
schema_string (str)
The JSON string containing the schema for the type.
Either this argument or the columns argument must
be given.
column_properties (dict)
Optional dict that lists the properties for the
columns of the type. Meant to be used in conjunction
with schema_string only; will be ignored if
columns is given.
"""
# Validate and save the label
if not isinstance( label, basestring ):
raise GPUdbException( "Column label must be a string; given " + str(type( label )) )
self._label = label
# The server always uses this hardcoded name and trumps any label
self.name = "type_name"
# Either columns or schema_string must be given, but not both!
if ((columns == None) and (schema_string == None)):
raise GPUdbException( "Either columns or schema_string must be given, but none is!" )
elif ((columns != None) and (schema_string != None)):
raise GPUdbException( "Either columns or schema_string must be given, but not both!" )
# Construct the object from the given columns
try:
if (columns != None):
self.__initiate_from_columns( columns )
else:
self.__initiate_from_schema_string( schema_string, column_properties )
except Exception as ex:
raise GPUdbException( ex )
# The type hasn't been registered with GPUdb yet
self._type_id = None
# end __init__
def __initiate_from_columns( self, columns ):
"""Private method that constructs the object using the given columns.
Parameters:
columns (list)
A list of GPUdbRecordColumn objects or a list with the following
format: [name, type, ...] where ... is optional properties. For
example, ['x', 'int', 'int8']
"""
# Validate the columns
if not columns: # Must NOT be empty
raise GPUdbException( "Non-empty list of columns must be given. Given none." )
if not isinstance( columns, list ): # Must be a list
raise GPUdbException( "Non-empty list of columns must be given. Given " + str(type( columns )) )
# Check if the list contains only GPUdbRecordColumns, then nothing to do
if all( isinstance( x, GPUdbRecordColumn ) for x in columns ):
self._columns = columns
else: # unroll the information contained within
# If the caller provided one list of arguments, wrap it into a list of lists so we can
# properly iterate over
columns = columns if all( isinstance( elm, list ) for elm in columns ) else [ columns ]
# Unroll the information about the column(s) and create GPUdbRecordColumn objects
self._columns = []
for col_info in columns:
# Arguments 3 and beyond--these are properties--must be combined into one list argument
if len( col_info ) > 2:
self._columns.append( GPUdbRecordColumn( col_info[0], col_info[1], col_info[2:] ) )
elif len( col_info ) < 2:
# Need at least two elements: the name and the type
raise GPUdbException( "Need a list with the column name, type, and optional properties; "
"given '%s'" % col_info )
else:
self._columns.append( GPUdbRecordColumn( *col_info ) )
# end if-else
# Save the column names
self._column_names = [col.name for col in self._columns]
# Column property container
self._column_properties = {}
# Avro schema string field container
fields = []
# Validate each column and deduce its properties
for col in self._columns:
# Check that each element is a GPUdbRecordColumn object
if not isinstance( col, GPUdbRecordColumn ):
raise GPUdbException( "columns must contain only GPUdbRecordColumn objects. Given " + str(type( col )) )
# Extract the column's properties, if any
if col.column_properties:
self._column_properties[ col.name ] = sorted( col.column_properties, key = lambda x : str(x[0]) )
# done handling column props
# Create the field for the schema string
field_type = '"{_type}"'.format( _type = col.column_type )
# Handle nullable fields
if col.is_nullable:
field_type = ('[{_type}, "null"]'.format( _type = field_type ))
field = ('{{"name": "{_name}", "type": {_type} }}'.format( _name = col.name, _type = field_type ))
fields.append( field )
# end for loop
# Put the fields together
fields = ", ".join( fields )
# Generate the avro schema string
schema_string = """{{
"type" : "record",
"name" : "{_label}",
"fields" : [ {_fields} ]}}
""".format( _label = self.name,
_fields = fields )
schema_string = schema_string.replace( "\t", "" ).replace( "\n", "" )
# Generate the avro schema and save it
self._record_schema = schema.parse( schema_string )
# Save this version of the schema string so that it is standard
self._schema_string = json.dumps( self._record_schema.to_json() )
# Create and save a RecordType object
self._record_type = RecordType.from_type_schema( "",
self._schema_string,
self._column_properties )
# end __initiate_from_columns
def __initiate_from_schema_string( self, schema_string, column_properties = None ):
"""Private method that constructs the object using the given schema string.
Parameters:
schema_string (str)
The schema string for the record type.
column_properties (dict)
An optional dict containing property information for
some or all of the columns.
"""
# Validate the schema string
if not schema_string: # Must NOT be empty!
raise GPUdbException( "A schema string must be given. Given none." )
# Try to parse the schema string, this would also help us validate it
self._record_schema = schema.parse( schema_string )
# Rename the schema with a generic name just like the database
self._record_schema._props[ "name" ] = self.name
# If no exception was thrown above, then save the schema string
self._schema_string = json.dumps( self._record_schema.to_json() )
# Save the column properties, if any
self._column_properties = column_properties if column_properties else {}
# Now, deduce the columns from the schema string
schema_json = self._record_schema.to_json()
columns = []
for field in schema_json["fields"]:
# Get the field's type
field_type = field["type"]
# Is the type nullable?
is_nullable = False
if ( isinstance( field_type, list )
and ("null" in field_type) ):
is_nullable = True
# Then, also get the scalar type of the field
field_type = field_type[ 0 ]
# end if
field_name = field["name"]
# Get any properties for the column
col_props = None
if (self._column_properties and (field_name in self._column_properties)):
col_props = column_properties[ field_name ]
# end if
# Create the column object and to the list
column = GPUdbRecordColumn( field["name"], field_type, col_props,
is_nullable = is_nullable )
columns.append( column )
# end for
# Save the columns
self._columns = columns
# Save the column names
self._column_names = [col.name for col in columns]
# Create and save a RecordType object
self._record_type = RecordType.from_type_schema( "", self._schema_string,
self._column_properties )
# end __initiate_from_schema_string
@property
def columns(self): # read-only columns
"""A list of columns for the record type."""
return self._columns
# end columns
@property
def column_names(self): # read-only column_names
"""A list of the names of the columns for the record type."""
return self._column_names
# end column_names
@property
def label(self): # read-only label
"""A label for the record type."""
return self._label
# end label
@property
def schema_string(self): # read-only schema string
"""The schema string for the record type."""
return self._schema_string
# end schema_string
@property
def record_schema(self): # read-only avro schema
"""The avro schema for the record type."""
return self._record_schema
# end record_schema
@property
def record_type(self): # read-only RecordType object
"""The RecordType object for the record type."""
return self._record_type
# end record_schema
@property
def column_properties(self): # read-only column properties
"""The properties for the type's columns."""
return self._column_properties
# end column_properties
@property
def type_id(self): # read-only ID for the type
"""The ID for the type, if it has already been registered
with GPUdb."""
if not self._type_id:
raise GPUdbException( "The record type has not been registered with GPUdb yet." )
return self._type_id
# end type_id
[docs] def create_type( self, gpudb, options = None ):
"""Create the record type in GPUdb so that users can create
tables using this type.
Parameters:
gpudb (GPUdb)
A GPUdb object to connect to a GPUdb server.
option (dict)
Optional dictionary containing options for the /create/type call.
Returns:
The type ID.
"""
# Validate the GPUdb handle
if not isinstance( gpudb, GPUdb ):
raise GPUdbException( "'gpudb' must be a GPUdb object; given " + str(type( gpudb )) )
if not options:
options = {}
response = gpudb.create_type( self._schema_string, self._label, self._column_properties, options )
if not _Util.is_ok( response ): # problem creating the type
raise GPUdbException( _Util.get_error_msg( response ) )
self._type_id = response[ "type_id" ]
return self._type_id
# end create_type
def __eq__( self, other ):
if isinstance(other, self.__class__):
# Compare the schema strings of the two types
if (self._schema_string != other.schema_string):
return False
# Now compare the properties (need to disregard 'data' and 'text_search')
disregarded_props = [ GPUdbColumnProperty.TEXT_SEARCH, GPUdbColumnProperty.DATA ]
# Get the sanitized column properties
lhs_col_props = {}
for name, props in self._column_properties.items():
sanitized_props = [ prop for prop in props if (prop not in disregarded_props) ]
if sanitized_props:
lhs_col_props[ name ] = sorted( sanitized_props )
# end loop
# Get the sanitized column properties
rhs_col_props = {}
for name, props in other.column_properties.items():
sanitized_props = [ prop for prop in props if (prop not in disregarded_props) ]
if sanitized_props:
rhs_col_props[ name ] = sorted( sanitized_props )
# end loop
if (lhs_col_props == rhs_col_props):
return True # distilled props matched
return False # properties did not match
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
[docs] def get_column( self, column_id ):
"""Return the desired column; fetch by name or index.
Parameters:
column_id (str or int)
If string, then the name of the column. If an integer,
then the index of the column. Must be a valid column name
or be within bounds.
Returns:
The GPUdbRecordColumn object pertaining to the desired column.
"""
if not isinstance( column_id, (basestring, unicode, int) ):
raise GPUdbException( "Parameter 'column_id' must be a string "
"or an integer; given '{}'"
"".format( str( type( column_id ) ) ) )
# Handle strings--column name
if isinstance( column_id, (basestring, unicode) ):
try:
return self._columns[ self._column_names.index( column_id ) ]
except ValueError as ex:
raise GPUdbException( "Given key '{}' does not match any column"
"in the type!".format( column_id ) )
# end if
# Not a string, so it must be an integer; ensure it's within limits
if ( (column_id < 0)
or (column_id >= len( self._columns )) ):
raise GPUdbException( "Given index {} is out of bounds; # of "
"columns: {}".format( column_id,
len( self._columns ) ) )
return self._columns[ column_id ]
# end get_column
# end class GPUdbRecordType
# ---------------------------------------------------------------------------
# GPUdbRecord - Class to Handle GPUdb Record Data
# ---------------------------------------------------------------------------
[docs]class GPUdbRecord( object ):
"""Represent the data for a given record in GPUdb. Has convenience
functions for encoding/decoding the data.
"""
[docs] @staticmethod
def decode_binary_data( record_type, binary_data ):
"""Decode binary encoded data (generally returned by GPUdb) using
the schema for the data. Return the decoded data.
Parameters:
record_type (str or RecordType)
If string, then the schema string for the record type, or
a :class:`RecordType` object representing the type.
binary_data (obj or list)
The binary encoded data. Could be a single object or
a list of data.
Returns:
The decoded data (a single object or a list)
"""
# Convert a single data object to a list
if not isinstance( binary_data, list ):
binary_data = [ binary_data ]
# end if
decoded_data = []
# Using the in-house c-extension for avro encoding and decoding
if isinstance( record_type, RecordType ):
# Decode the list of data
for binary_datum in binary_data:
decoded_data.append( record_type.decode_records( binary_datum )[0] )
# end for
else: # use the python avro package to decode the data
# Create an avro schema from the schema string
record_type = schema.parse( record_type )
# Get an avro data reader
data_reader = io.DatumReader( record_type )
# Decode the list of data
for binary_datum in binary_data:
decoded_data.append( _Util.decode_binary_data( record_type, binary_datum ) )
# end for
# end if
return decoded_data
# end decode_binary_data
[docs] @staticmethod
def decode_dynamic_binary_data( record_type, binary_data ):
"""Decode binary encoded data (generally returned by GPUdb) using
the schema for the data. Return the decoded data.
Parameters:
record_type (str or RecordType)
If string, then the schema string for the record type, or
a :class:`RecordType` object representing the type.
binary_data (obj or list)
The binary encoded data. Could be a single object or
a list of data.
Returns:
The decoded data (a single object or a list)
"""
# Convert a single data object to a list
if not isinstance( binary_data, list ):
binary_data = [ binary_data ]
# end if
decoded_data = []
# Using the in-house c-extension for avro encoding and decoding
if isinstance( record_type, RecordType ):
# Decode the list of data
for binary_datum in binary_data:
decoded_data.append( record_type.decode_records( binary_datum )[0] )
# end for
else: # use the python avro package to decode the data
# Create an avro schema from the schema string
record_type = schema.parse( record_type )
# Decode the list of data
for binary_datum in binary_data:
decoded_data.append( _Util.decode_binary_data( record_type, binary_datum ) )
# end for
# end if
return decoded_data
# end decode_dynamic_binary_data
[docs] @staticmethod
def decode_json_string_data( json_string_data ):
"""Decode binary encoded data in string form (generally returned by GPUdb).
Return the decoded data.
Parameters:
json_string_data (str)
The stringified JSON encoded data. Could be
a single object or a list of data.
Returns:
The decoded data (a single object or a list)
"""
# Decode the single data object
if not isinstance( json_string_data, list ):
json_string_data = _Util.ensure_str( json_string_data )
decoded_datum = json.loads( json_string_data )
return decoded_datum
# end if
# Decode the list of data
decoded_data = []
for json_datum in json_string_data:
json_datum = _Util.ensure_str( json_datum )
decoded_datum = json.loads( json_datum,
object_pairs_hook = collections.OrderedDict )
decoded_data.append( decoded_datum )
# end for
return decoded_data
# end decode_json_string_data
[docs] @staticmethod
def decode_dynamic_json_data_column_major( dynamic_json_data, dynamic_schema ):
"""Decode JSON encoded data (generally returned by GPUdb) using
the embedded dynamic schema for the data. Return the decoded data.
Parameters:
dynamic_json_data (dict)
The JSON encoded data with a dynamic schema.
dynamic_schema (str)
The schema string for the data
Returns:
The decoded data (a single object or a list)
"""
# Convert the dynamic schema to an Avro schema
dynamic_schema = schema.parse( dynamic_schema )
decoded_data = collections.OrderedDict()
column_names = dynamic_json_data['column_headers']
for i, column_name in enumerate( column_names ):
column_index_name = "column_{}".format( i+1 )
# Double/float conversion here
#get the datatype of the underlying data
column_type = dynamic_schema.fields_dict[ column_index_name ].type.items.type
if ( (column_type.lower() == 'double') or (column_type.lower() == 'float') ):
decoded_data[ column_name ] = [float(x) for x in dynamic_json_data[ column_index_name ] ]
else:
decoded_data[ column_name ] = dynamic_json_data[column_index_name]
return decoded_data
# end decode_dynamic_json_data_column_major
[docs] @staticmethod
def decode_dynamic_json_data_row_major( dynamic_json_data, dynamic_schema ):
"""Decode JSON encoded data (generally returned by GPUdb) using
the embedded dynamic schema for the data. Return the decoded data.
Parameters:
dynamic_json_data (dict)
The JSON encoded data with a dynamic schema.
dynamic_schema (str)
The schema string for the data
Returns:
The decoded data in row-format (a single object or a list).
"""
# Convert the dynamic schema to an Avro schema
dynamic_schema = schema.parse( dynamic_schema )
decoded_records = []
# Get the actual column names
column_names = dynamic_json_data['column_headers']
# Get the index-based column names
idx_column_names = [ name for name in dynamic_json_data.keys()
if name not in ['column_headers', 'column_datatypes'] ]
# Get the column types
column_types = [ dynamic_schema.fields_dict[ n ].type.items.type for n in idx_column_names ]
# How many records in total do we have?
num_records = len( dynamic_json_data["column_1"] )
# Create all the records
for i in list( range(0, num_records) ):
record = collections.OrderedDict()
# Create a single record
for (col_name, col_idx_name, col_type) in zip(column_names, idx_column_names, column_types):
# Get the column value
col_val = dynamic_json_data[ col_idx_name ][ i ]
# Convert double/float
if ( (col_type.lower() == 'double') or (col_type.lower() == 'float') ):
col_val = float( col_val )
record[ col_name ] = col_val
# end inner loop
# Add this record to the list
decoded_records.append( record )
# end loop
return decoded_records
# end decode_dynamic_json_data_row_major
[docs] @staticmethod
def convert_data_col_major_to_row_major( col_major_data, col_major_schema_str ):
"""Given some column major data, convert it to row major data.
Parameters:
col_major_data (OrderedDict)
An OrderedDict of arrays containing the data by column names.
col_major_schema_str (str)
A JSON schema string describing the column major data.
Returns:
A list of GPUdbRecord objects.
"""
if not isinstance( col_major_data, collections.OrderedDict ):
raise GPUdbException( "Argument 'col_major_data' must be an OrderedDict;"
" given %s" % str( type( col_major_data ) ) )
try:
schema_json = json.loads( col_major_schema_str )
except Exception as ex:
raise GPUdbException( "Could not parse 'col_major_schema_str': "
"{}".format( GPUdbException.stringify_exception( ex ) ) )
# Create the schema for each record from the column-major format's schema
columns = []
for col_name, field in zip(col_major_data.keys(), schema_json[ C._fields ]):
field_type = field[ "type" ][ "items" ]
if isinstance( field_type, (str, unicode) ):
columns.append( [ col_name, field_type ] )
elif (isinstance( field_type, list ) and ("null" in field_type )):
# The column is nullable
columns.append( [ col_name, field_type[0], GPUdbColumnProperty.NULLABLE ] )
else:
raise GPUdbException( "Unknown column type: {0}".format( field_type ) )
# end loop
# Create a record type
record_type = GPUdbRecordType( columns )
# Create the records
records = []
for record in zip( *col_major_data.values() ):
records.append( GPUdbRecord( record_type, list( record ) ) )
# end loop
return records
# end convert_data_col_major_to_row_major
[docs] @staticmethod
def transpose_data_to_col_major( row_major_data ):
"""Given some row major data, convert it to column major data.
Parameters:
row_major_data (list of :class:`Record` or collections.OrderedDicts)
A list of :class:`Record` or collections.OrderedDicts objects
containing the data.
Returns:
A dict of lists where the keys are column names and the values are
lists (containing the values for the pertinent column of all the records)
"""
if not row_major_data: # Handle empty/none etc.
return row_major_data
# Turn a single record into a list, if applicable
row_major_data = [ row_major_data ] if not isinstance( row_major_data, list ) else row_major_data
# Get the record type
if isinstance( row_major_data[ 0 ], Record ):
column_names = row_major_data[ 0 ].type.keys()
column_values = map( list, zip( *row_major_data ) )
# Need to use the dict constructor to be python 2.6 compatible
transposed_data = collections.OrderedDict( zip( column_names, column_values ) )
else:
column_names = row_major_data[ 0 ].keys()
# column_values = zip([ record.values() for record in row_major_data ])
# Trasnpose the data
transposed_data = collections.OrderedDict()
for col_name in column_names:
column_values = [ record[ col_name ] for record in row_major_data ]
transposed_data[ col_name ] = column_values
# end loop
# end if
return transposed_data
# end transpose_data_to_col_major
def __init__( self, record_type, column_values ):
"""Create a GPUdbRecord object which holds the data for
a given record.
Parameters:
record_type (GPUdbRecordType)
A :class:`.GPUdbRecordType` object that describes the columns
of this record.
column_values (dict or list)
Either a dict or a list that contains the values for
the columns. In either case, must contain values for
ALL columns. If a list, then the columns must be in the
correct order.
"""
# Validate and save the record type
if not isinstance( record_type, GPUdbRecordType ):
raise GPUdbException( "'record_type' must be a GPUdbRecordType; given " + str(type( record_type )) )
self._record_type = record_type
if isinstance( column_values, GPUdbRecord ):
return column_values
# Validate the column values
if not _Util.is_list_or_dict( column_values ):
# Must be a list or a dict
raise GPUdbException( "Columns must be one of the following: list, dict, OrderedDict. "
"Given " + str(type( column_values )) )
if not column_values: # Must NOT be empty
raise GPUdbException( "Column values must be given. Given none." )
# The column values must be saved in the order they're declared in the type
self._column_values = collections.OrderedDict()
# Get the expected number of columns based on the data type provided
num_columns = len( self._record_type.columns )
# Check that there are correct number of values
if (len( column_values ) != num_columns ):
raise GPUdbException( "Given list of column values does not have the correct (%d) "
"number of values; it has %d" % (num_columns, len( column_values )) )
# Check and save the column values
# --------------------------------
# Case 1: The values are given in a list
if isinstance( column_values, list ):
# Check that the order of the columns is ok
# (we can only check string vs. numeric types, really;
# we can also check for nulls)
for i in list( range(0, num_columns) ):
column_name = self._record_type.columns[ i ].name
# The given value for this column
column_val = column_values[ i ]
# Validate the column value and save it (if valid)
( is_valid,
column_value ) = self.__validate_column_value( column_val,
self._record_type.columns[ i ] )
if is_valid:
self._column_values[ column_name ] = column_value
# end for loop
else: # the values are given either in a dict or an OrderedDict
# Check that the column names given match those of the record's type
given_column_names = set( column_values.keys() )
record_type_column_names = set( [c.name for c in self._record_type.columns] )
if ( given_column_names != record_type_column_names ):
if (given_column_names - record_type_column_names):
raise GPUdbException( "Given column names do not match that of the record type. "
"Extra column names are: " + str( (given_column_names - record_type_column_names) ))
else:
raise GPUdbException( "Given column names do not match that of the record type. "
"Missing column names are: " + str( (record_type_column_names - given_column_names) ))
# end if
# We will disregard the order in which the column values were listed
# in column_values (this should help the user somewhat)
for i in list( range(0, num_columns) ):
column_name = self._record_type.columns[ i ].name
column_val = column_values[ column_name ]
# Validate the column value and save it (if valid)
( is_valid,
column_value ) = self.__validate_column_value( column_val,
self._record_type.columns[ i ] )
if is_valid:
self._column_values[ column_name ] = column_value
# end checking and save column values
# Encode the record into binary and save it
# -----------------------------------------
self._binary_encoded_data = _Util.encode_binary_data( self._record_type.record_schema,
self._column_values )
# end __init__
@property
def record_type(self): # read-only record type
"""The type for this record."""
return self._record_type
# end record_type
@property
def column_values(self): # read-only column_values
"""The values for this record."""
return self._column_values
# end column_values
@property
def data(self): # read-only column_values, just a convenient name
"""The values for this record."""
return self._column_values
# end data
@property
def binary_data(self): # read-only binary_data
"""The binary encoded values for this record."""
return self._binary_encoded_data
# end binary_data
@property
def json_data_string(self): # JSON encoded column_values in a string
"""The stringified JSON encoded values for this record."""
return json.dumps( _Util.convert_dict_bytes_to_str(self._column_values) )
# end json_data_string
[docs] def keys( self ):
"""Return a list of the column names of the record.
"""
return self.data.keys()
# end values
[docs] def values( self ):
"""Return a list of the values of the record.
"""
return self.data.values()
# end values
[docs] def insert_record( self, gpudb, table_name, encoding = "binary", options = None ):
"""Insert this record into GPUdb.
Parameters:
gpudb (GPUdb)
A :class:`.GPUdb` client handle.
table_name (str)
The name of the table into which we need to insert the record.
encoding (str)
Optional encoding with which to perform the insertion. Default
is binary encoding.
options (dict)
Optional parameter. If given, use the options for the insertion
function.
Returns:
The response from GPUdb.
"""
# Validate the GPUdb handle
if not isinstance( gpudb, GPUdb ):
raise GPUdbException( "'gpudb' must be a GPUdb object; given " + str( type( gpudb ) ) )
if not options:
options = {}
# Based on the encoding, format the data appropriately
if (encoding.lower() == "binary"):
data = [ self._binary_encoded_data ]
elif (encoding.lower() == "json"):
data = [ json.dumps( _Util.convert_dict_bytes_to_str( self._column_values ) ) ]
else:
raise GPUdbException( "Unknown encoding: " + str( encoding ) )
# Insert the record
response = gpudb.insert_records( table_name = table_name,
data = data,
list_encoding = encoding,
options = options )
return response
# end insert_record
def __validate_column_value( self, column_value, column, do_throw = True ):
"""Private function that validates the given value for a column.
Parameters:
column_value
The value for the given column
column (GPUdbRecordColumn)
A :class:`.GPUdbRecordColumn` object that has information about
the column. This is used to validate the column value.
do_throw (bool)
Throw an exception for invalid columns
Returns:
A tuple where the first element is True if the value can be
validated, False otherwise. The second element is the validated
value (might have been transformed).
"""
if not isinstance( column, GPUdbRecordColumn ):
raise GPUdbException( "'column' must be a GPUdbRecordColumn object; given "
+ str(type( column )) )
# Check that the value is of the given type
# -----------------------------------------
is_valid = True
column_type = column.column_type
if (column_value == None): # Handle null values
if not column.is_nullable: # but the column is not nullable
if do_throw:
raise GPUdbException( "Non-nullable column '%s' given a null value" % column.name )
else:
is_valid = False
elif column_type in GPUdbRecordColumn._numeric_decimal_data_types:
# Floating or double
try:
column_value = float( column_value )
except:
if do_throw:
raise GPUdbException( ("Column '%s' must be a floating point"
" type (float or double); "
"given " % column.name )
+ str(type( column_value )) )
else:
is_valid = False
elif column_type == GPUdbRecordColumn._ColumnType.INT:
# Integer
try:
column_value = int( column_value )
except:
if do_throw:
raise GPUdbException( ("Column '%s' must be an integer; "
"given " % column.name )
+ str(type( column_value )) )
else:
is_valid = False
elif column_type == GPUdbRecordColumn._ColumnType.LONG:
# Long
try:
column_value = long( column_value )
except:
if do_throw:
raise GPUdbException( ("Column '%s' must be a long; "
"given " % column.name )
+ str(type( column_value )) )
else:
is_valid = False
else: # string/bytes type
if not isinstance( column_value, (str, Decimal, unicode, bytes) ):
if do_throw:
raise GPUdbException( ("Column '%s' must be string or bytes; given " % column.name)
+ str(type( column_value )) )
else:
is_valid = False
# end if-else checking type-correctness
# The value checks out; it is valid
return (is_valid, column_value)
# end __validate_column_value
def __eq__( self, other ):
if isinstance(other, self.__class__):
return self.__dict__ == other.__dict__
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
# end class GPUdbRecord
# ---------------------------------------------------------------------------
# GPUdb - Lightweight client class to interact with a GPUdb server.
# ---------------------------------------------------------------------------
[docs]class GPUdb(object):
"""
This is the main class to be used to provide the client functionality to
interact with the server.
Usage patterns
* Secured setup (Default)
This code given below will set up a secured connection. The property 'skip_ssl_cert_verification'
is set to 'False' by default. SSL certificate check will be enforced by default.
::
options = GPUdb.Options()
options.username = "user"
options.password = "password"
options.logging_level = "debug"
gpudb = GPUdb(host='https://your_server_ip_or_FQDN:8082/gpudb-0', options=options )
* Unsecured setup
The code given below will set up an unsecured connection to the server. The property 'skip_ssl_cert_verification'
has been set explicitly to 'True'. So, irrespective of whether an SSL setup is there or not all certificate checks
will be bypassed.
::
options = GPUdb.Options()
options.username = "user"
options.password = "password"
options.skip_ssl_cert_verification = True
options.logging_level = "debug"
gpudb = GPUdb(host='https://your_server_ip_or_FQDN:8082/gpudb-0', options=options )
Another way of setting up an unsecured connection is as given by the code below. In this case, the URL
is not a secured one so no SSL setup comes into play.
::
options = GPUdb.Options()
options.username = "user"
options.password = "password"
options.logging_level = "debug"
gpudb = GPUdb(host='http://your_server_ip_or_FQDN:9191', options=options )
"""
# Logging related string constants
# Note that the millisecond is put in the message format due to a shortcoming
# of the python datetime format shortcoming
_LOG_MESSAGE_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
_LOG_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
# Headers that are protected and cannot be overridden by users
_protected_headers = [
C._HEADER_ACCEPT,
C._HEADER_AUTHORIZATION,
C._HEADER_CONTENT_TYPE,
C._HEADER_HA_SYNC_MODE
]
[docs] class HASynchronicityMode( enum.Enum ):
"""Inner enumeration class to represent the high-availability
synchronicity override mode that is applied to each endpoint call.
Available enumerations are:
* DEFAULT -- No override; defer to the HA process for synchronizing
endpoints (which has different logic for different
endpoints). This is the default mode.
* NONE -- Do not replicate the endpoint calls to the backup cluster.
* SYNCHRONOUS -- Synchronize all endpoint calls
* ASYNCHRONOUS -- Do NOT synchronize any endpoint call
"""
# No override; defer to the HA process for synchronizing
# endpoints (which has different logic for different endpoints)
# For this default mode, we don't put any header in the endpoint call
DEFAULT = "default"
# Do not replicate the endpoint calls at all
NONE = "REPL_NONE"
# Synchronize all endpoint calls
SYNCHRONOUS = "REPL_SYNC"
# Do NOT synchronize any endpoint call
ASYNCHRONOUS = "REPL_ASYNC"
# end inner class HASynchronicityMode
[docs] class HAFailoverOrder( enum.Enum ):
"""Inner enumeration class to represent the high-availability
failover order that is applied to ring resiliency or inter-cluster
failover. The order dictates in which pattern backup clusters will
be chosen when a failover needs to happen in the client API.
Available enumerations are:
* RANDOM -- Randomly choose the backup cluster from the available
clusters. This is the default mode.
* SEQUENTIAL -- Choose the cluster sequentially from the list of
clusters (the union of the user given clusters and
auto-discovered clusters).
"""
# Randomly choose the backup cluster from the available
# clusters. This is the default mode.
RANDOM = "RANDOM"
# Choose the cluster sequentially from the list of
# clusters (the union of the user given clsuters and
# auto-discovered clusters).
SEQUENTIAL = "SEQUENTIAL"
# end inner class HAFailoverOrder
[docs] class Options(object):
"""
Encapsulates the various options used to create a :class:`GPUdb`
object. The same object can be used on multiple GPUdb client handles
and state modifications are chained together:
For backward compatibility, we will support the following options from the
7.0 GPUdb keyword arguments and map them to the following properties:
* connection -> protocol
* no_init_db_contact -> disable_auto_discovery
::
opts = GPUdb.Options.default()
opts.disable_failover = True
db1 = gpudb.GPUdb( host = "http://1.2.3.4:9191",
options = opts )
opts.primary_host = "http://7.8.9.0:9191"
db2 = gpudb.GPUdb( host = "http://1.2.3.4:9191",
options = opts )
"""
# Names of properties
__disable_auto_discovery_str = "_Options__disable_auto_discovery"
__disable_failover_str = "_Options__disable_failover"
__encoding_str = "_Options__encoding"
__ha_failover_order_str = "_Options__ha_failover_order"
__host_manager_port_str = "_Options__host_manager_port"
__hostname_regex_str = "_Options__hostname_regex"
__http_headers_str = "_Options__http_headers"
__server_connection_timeout_str = "_Options__server_connection_timeout"
__initial_connection_attempt_timeout_str = "_Options__initial_connection_attempt_timeout"
__logging_level_str = "_Options__logging_level"
__password_str = "_Options__password"
__primary_host_str = "_Options__primary_host"
__protocol_str = "_Options__protocol"
__skip_ssl_cert_verification_str = "_Options__skip_ssl_cert_verification"
__timeout_str = "_Options__timeout"
__username_str = "_Options__username"
_supported_options = [ __disable_auto_discovery_str,
__disable_failover_str,
__encoding_str,
__ha_failover_order_str,
__host_manager_port_str,
__hostname_regex_str,
__http_headers_str,
__server_connection_timeout_str,
__initial_connection_attempt_timeout_str,
__logging_level_str,
__password_str,
__primary_host_str,
__protocol_str,
__skip_ssl_cert_verification_str,
__timeout_str,
__username_str
]
@staticmethod
def default():
return GPUdb.Options()
def __init__(self, options = None):
"""Create a default set of options for GPUdb object creation.
Parameters:
options (dict or :class:`GPUdb.Options`)
Optional dictionary with options already loaded. If a
:class:`GPUdb.Options` object, then this will work like
a copy constructor and make a full copy of the input
argument.
Returns:
An :class:`Options` object.
"""
# Set default values
self.__disable_auto_discovery = False
self.__disable_failover = False
self.__encoding = C._ENCODING_BINARY
self.__ha_failover_order = GPUdb.HAFailoverOrder.RANDOM
self.__host_manager_port = GPUdb._DEFAULT_HOST_MANAGER_PORT
self.__hostname_regex = None
self.__http_headers = {}
self.__initial_connection_attempt_timeout = 0
self.__server_connection_timeout = GPUdb._DEFAULT_SERVER_CONNECTION_TIMEOUT
self.__logging_level = None
self.__password = None
self.__primary_host = None
self.__protocol = None
self.__skip_ssl_cert_verification = False
self.__timeout = None # means indefinite wait
self.__username = None
if (options is None):
return # nothing to do
# Work like a copy constructor; get the dict out of an Options object
if isinstance( options, GPUdb.Options ):
options = options.__dict__
elif isinstance( options, dict ): # Handle kwargs which is a dict
options = self.__prepend_class_prefix_to_keys(options, '_Options__')
# end if
if not isinstance( options, dict ):
raise GPUdbException( "Argument 'options' must be a dict; given '%s'."
% type( options ) )
# end if
# For backward compatibility, we will support the following options
# and map them to the new names:
# * connection -> protocol
# * no_init_db_contact -> disable_auto_discovery
if "_Options__connection" in options:
options[ self.__protocol_str ] = options.pop( "_Options__connection" )
if "_Options__no_init_db_contact" in options:
options[ self.__disable_auto_discovery_str ] = options.pop( "_Options__no_init_db_contact" )
if "_Options__cluster_reconnect_count" in options:
del options["_Options__cluster_reconnect_count"]
if "_Options__intra_cluster_failover_timeout" in options:
del options["_Options__intra_cluster_failover_timeout"]
# Check for invalid options
unsupported_options = set( options.keys() ).difference( self._supported_options )
if unsupported_options:
raise GPUdbException( "Invalid options: %s" % unsupported_options )
# Extract and save each option
for (key, val) in options.items():
setattr( self, key, val.upper() if key == '_Options__encoding' else val )
# end __init__
def __prepend_class_prefix_to_keys(self, d, prefix):
return {prefix + key if not key.startswith(prefix) else key: value for key, value in d.items()}
def __exposed_fields(self):
return ['_Options__disable_auto_discovery',
'_Options__disable_failover',
'_Options__encoding',
'_Options__ha_failover_order',
'_Options__host_manager_port',
'_Options__hostname_regex',
'_Options__http_headers',
'_Options__server_connection_timeout',
'_Options__logging_level',
'_Options__primary_host',
'_Options__protocol',
'_Options__skip_ssl_cert_verification',
'_Options__timeout',
'_Options__username',
]
def __str__( self ):
"""String representation of the cluster.
"""
field_list = [s[len('_Options__'):] if s.startswith('_Options__') else s for s in self.__dict__.keys() if s in self.__exposed_fields()]
value_list = [value for key, value in vars(self).items() if key in self.__exposed_fields()]
final_list = list(zip(field_list, value_list))
return '\n'.join(str(x) for x in final_list)
# end __str__
def __eq__( self, other ):
"""Override the equality operator.
"""
if self is other:
return True
# Check the type of the other object
if not isinstance( other, GPUdb.Options ):
return False
# Check member count
if len(vars(self)) != len(vars(other)):
return False
# Compare each member
for key in vars(self):
if getattr(self, key) != getattr(other, key):
return False
# The primary host equivalence is only strict for given hosts;
# None and empty string are considered to be equivalent to each
# other
if not self.primary_host:
if other.primary_host:
return False
# end inner if
elif ( self.primary_host != other.primary_host ):
return False
return True
# end __eq__
def __ne__( self, other ):
"""Override the inequality operator.
"""
return not self.__eq__( other )
# end __ne__
[docs] def as_json(self):
"""Return the options as a dictionary. Will stringify parameters as
needed. For example, GPUdb.URL and GPUdb.HAFailoverOrder objects
will be stringified.
"""
result = self.__dict__.copy()
# Special handling of some properties is required
if self.__primary_host:
result[ self.__primary_host_str ] = str(self.__primary_host)
else:
result[ self.__primary_host_str ] = None
# end if
if self.__ha_failover_order:
result[ self.__ha_failover_order_str ] = str(self.__ha_failover_order)
else:
result[ self.__ha_failover_order_str ] = None
# end if
return result
# end as_json
@property
def username(self):
"""Gets the password to be used for authentication to GPUdb."""
return self.__username
@username.setter
def username(self, value):
self.__username = value
@property
def password(self):
"""Gets the password to be used for authentication to GPUdb."""
return self.__password
@password.setter
def password(self, value):
self.__password = value
@property
def skip_ssl_cert_verification(self):
"""Gets the password to be used for authentication to GPUdb."""
return self.__skip_ssl_cert_verification
@skip_ssl_cert_verification.setter
def skip_ssl_cert_verification(self, value):
self.__skip_ssl_cert_verification = value
@property
def cluster_reconnect_count(self):
"""Gets the number of times the API tries to reconnect to the
same cluster (when a failover event has been triggered),
before actually failing over to any available backup
cluster. Does not apply when only a single cluster is
available.
This method is now deprecated.
"""
return 1
@cluster_reconnect_count.setter
def cluster_reconnect_count(self, value):
"""Sets the number of times the API tries to reconnect to the
same cluster (when a failover event has been triggered),
before actually failing over to any available backup
cluster. Does not apply when only a single cluster is
available. Must be an integer value greater than or equal to zero.
The default is 1.
This method is now deprecated.
"""
try:
value = int( value )
except:
raise GPUdbException( "Property 'cluster_reconnect_count' must be numeric; "
"given {}".format( str(type(value)) ) )
# Must be >= 0
if (value < 0):
raise GPUdbException( "Property 'cluster_reconnect_count' must be "
"greater than or equal to 0; given {}"
"".format( str(value) ) )
# end setter
@property
def disable_auto_discovery(self):
"""Gets the property indicating whether to disable automatic
discovery of backup clusters or worker rank URLs. If set to true,
then the GPUdb object will not connect to the database at initialization
time, and will only work with the URLs given.
"""
return self.__disable_auto_discovery
@disable_auto_discovery.setter
def disable_auto_discovery(self, value):
"""Sets the property indicating whether to disable automatic
discovery of backup clusters or worker rank URLs. If set to true,
then the GPUdb object will not connect to the database at initialization
time, and will only work with the URLs given. Must be a boolean value.
Default is False.
"""
if not isinstance( value, bool ):
raise GPUdbException( "Property 'disable_auto_discovery' must be "
"boolean; given '{}' type {}"
"".format( value, str(type(value)) ) )
self.__disable_auto_discovery = value
# end setter
@property
def disable_failover(self):
"""Gets the whether failover upon failures is to be completely disabled.
"""
return self.__disable_failover
@disable_failover.setter
def disable_failover(self, value):
"""Sets the whether failover upon failures is to be completely disabled.
Default is False.
"""
if not isinstance( value, bool ):
raise GPUdbException( "Property 'disable_failover' must be boolean;"
" given '{}' type {}"
"".format( value, str(type(value)) ) )
self.__disable_failover = value
# end setter
@property
def encoding(self):
"""Gets the encoding used by the client. Supported values are:
* **binary**
* **snappy**
* **json**
"""
return self.__encoding
@encoding.setter
def encoding(self, value):
"""Sets the encoding used by the client. Supported values are:
* **binary**
* **snappy**
* **json**
The default is *binary*.
"""
if not isinstance( value, (basestring, unicode) ):
raise GPUdbException( "Option 'encoding' must be a string; given "
"'{}' type {}"
"".format( value, str(type(value)) ) )
# Handle all cases
value = value.upper()
if (value not in [C._ENCODING_BINARY, C._ENCODING_JSON, C._ENCODING_SNAPPY]):
raise GPUdbException( "Expected encoding to be either 'BINARY', "
"'JSON' or 'SNAPPY'; got '{}' "
"".format( value ) )
self.__encoding = value
# end encoding setter
@property
def ha_failover_order(self):
"""Gets the current inter-cluster failover order. This indicates
in which order--sequential or random--the backup clusters would be
used when an inter-cluster failover event happens. Default is RANDOM.
"""
return self.__ha_failover_order
@ha_failover_order.setter
def ha_failover_order(self, value):
"""Sets the inter-cluster failover order. This indicates
in which order--sequential or random--the backup clusters would be
used when an inter-cluster failover event happens. Default is RANDOM.
"""
if isinstance( value, (basestring, unicode) ):
# Need to convert to all upper case for normalization
value = value.upper()
# Get a list of the values of the enums
accepted_values = [x.value
for x in GPUdb.HAFailoverOrder.__members__.values()]
if (value not in accepted_values):
raise GPUdbException( "Expected 'ha_failover_order' to be one of "
"{}; got '{}' "
"".format( accepted_values, value ) )
# end inner if
# Choose the appropriate enum based on the value
value = GPUdb.HAFailoverOrder._value2member_map_[ value ]
elif not isinstance( value, GPUdb.HAFailoverOrder ):
# Has to be either our enum or an acceptable string value
raise GPUdbException( "Option 'ha_failover_order' must be a string "
" or a GPUdb.HAFailoverOrder object; given "
"'{}' type {}"
"".format( value, str(type(value)) ) )
self.__ha_failover_order = value
# end setter
@property
def host_manager_port(self):
"""Gets the host manager port number. Some endpoints are supported only
at the host manager, rather than the head node of the database.
"""
return self.__host_manager_port
@host_manager_port.setter
def host_manager_port(self, value):
"""Sets the host manager port number. Some endpoints are supported only
at the host manager, rather than the head node of the database.
"""
try:
value = int( value )
except:
raise GPUdbException( "Property 'host_manager_port' must be numeric; "
"given '{}' type {}".format( value, str(type(value)) ) )
# Must be a valid port number
if ( (value <= 0) or (value >= 65536) ):
raise GPUdbException( "Host manager port must be in the range "
"[1, 65535]; given {}".format( value ) )
self.__host_manager_port = value
# end setter
@property
def hostname_regex(self):
"""Gets the regex pattern to be used to filter URLs of the servers.
If null, then the first URL encountered per rank will be used.
Returns a compiled regex object or None if no regex is being used.
"""
return self.__hostname_regex
@hostname_regex.setter
def hostname_regex(self, value):
"""Sets the IP address or hostname regex against which the server's
rank URLs would be matched when auto-discovering them. If None, then
the first URL for any given rank would be used (if multiple are
available in the system properties).
Note that the regex MUST NOT have the protocol or the port; it
should be a regular expression ONLY for the hostname/IP address.
Note also that the regex must match all servers in all clusters of
the ring, as there is only one in use per connection.
Allowed types are string, a compiled regex object, or None (to
indicate no regex is to be used for URL matching--just take the
first one from a list of URLs). The default is None.
"""
if value is None:
self.__hostname_regex = value
return
# end if
# Otherwise, must be a valid regex string or an already compiled
# regex object
if isinstance( value, (basestring, unicode) ):
# Save the compiled regex
try:
self.__hostname_regex = re.compile( value )
except Exception as ex:
raise GPUdbException( "Property 'hostname_regex' must be a "
"valid regex; given '{}'; error: {}"
"".format( value, str( ex ) ) )
elif isinstance( value, re._pattern_type ):
# No extra processing is needed since we're givne a compiled regex
self.__hostname_regex = value
else:
raise GPUdbException( "Property 'hostname_regex' must be a regex"
" given either as a string or as a compiled"
" regex pattern object; given '{}'"
"".format( str(type( value )) ) )
# end setter
@property
def http_headers(self):
"""Gets the custom HTTP headers that will be used per HTTP endpoint
submission by the :class:`GPUdb` to the server. The header keys
and values must be strings. Returns a deep copy.
"""
return copy.deepcopy( self.__http_headers )
@http_headers.setter
def http_headers(self, value):
"""Sets the custom HTTP headers that will be used per HTTP endpoint
submission by the :class:`GPUdb` to the server. The header keys
and values must be strings. Also, the following headers are
protected, and cannot be overridden by the user:
* "Accept"
* "Authorization"
* "Content-type"
* "X-Kinetica-Group"
Parameters:
value (dict of str to str)
The headers to set.
"""
if value is None:
self.__http_headers = None
return
# end if
# Otherwise, must be a dictionary with all strings for keys and
# headers
if not isinstance( value, (dict, collections.OrderedDict) ):
raise GPUdbException( "Property 'http_headers' must be a dict;"
" given '{}'"
"".format( str(type( value )) ) )
# end if
# Validate that each of the keys and values are actually strings
are_keys_strings = [ isinstance(key, (basestring, unicode))
for key in list(value.keys()) ]
are_vals_strings = [ isinstance(val, (basestring, unicode))
for val in list(value.values()) ]
if ( (not all( are_keys_strings ))
or
(not all( are_vals_strings )) ):
raise GPUdbException( "Property 'http_headers' must be a dict "
"with all keys and values being strings;"
" given '{}'"
"".format( value ) )
# end if
# Ensure that none of the protected headers are being overriddent
for header in list( value.keys() ):
if header in GPUdb._protected_headers:
raise GPUdbException( "Header '{}' is protected and cannot "
"be overridden by the user"
"".format( header ) )
# end for
self.__http_headers = value
# end setter
# end add_http_header
@property
def initial_connection_attempt_timeout(self):
"""Gets the timeout used when trying to establish a connection to the
database at GPUdb initialization. The value is given in milliseconds
and the default is 0. 0 indicates no retry will be done; instead,
the user given URLs will be stored without farther discovery.
If multiple URLs are given by the user, then API will try all of them
once before retrying or giving up. When this timeout is set
to a non-zero value, and the first attempt failed, then
the API will wait (sleep) for a certain amount of time and
try again. Upon consecutive failures, the sleep amount
will be doubled. So, before the first retry (i.e. the second
attempt), the API will sleep for one minute. Before the second
retry, the API will sleep for two minutes, the next sleep interval
would be four minutes, and onward.
"""
return self.__initial_connection_attempt_timeout
@initial_connection_attempt_timeout.setter
def initial_connection_attempt_timeout(self, value):
"""Sets the timeout used when trying to establish a connection to the
database at GPUdb initialization. The value is given in milliseconds
and the default is 0. 0 indicates no retry will be done; instead,
the user given URLs will be stored without farther discovery.
If multiple URLs are given by the user, then API will try all of them
once before retrying or giving up. When this timeout is set
to a non-zero value, and the first attempt failed, then
the API will wait (sleep) for a certain amount of time and
try again. Upon consecutive failures, the sleep amount
will be doubled. So, before the first retry (i.e. the second
attempt), the API will sleep for one minute. Before the second
retry, the API will sleep for two minutes, the next sleep interval
would be four minutes, and onward.
"""
try:
value = int( value )
except:
raise GPUdbException( "Property 'initial_connection_attempt_timeout' "
"must be numeric; "
"given {}".format( str(type(value)) ) )
# Must be >= 0
if (value < 0):
raise GPUdbException( "Property 'initial_connection_attempt_timeout' "
"must be greater than or equal to zero; "
"given {}".format( value ) )
self.__initial_connection_attempt_timeout = value
# end setter
@property
def server_connection_timeout(self):
"""Gets the timeout used when trying to establish a connection to the
database at GPUdb initialization. The value is given in milliseconds
and the default is 0. 0 indicates no retry will be done; instead,
the user given URLs will be stored without farther discovery.
If multiple URLs are given by the user, then API will try all of them
once before retrying or giving up. When this timeout is set
to a non-zero value, and the first attempt failed, then
the API will wait (sleep) for a certain amount of time and
try again. Upon consecutive failures, the sleep amount
will be doubled. So, before the first retry (i.e. the second
attempt), the API will sleep for one minute. Before the second
retry, the API will sleep for two minutes, the next sleep interval
would be four minutes, and onward.
"""
return self.__server_connection_timeout
@server_connection_timeout.setter
def server_connection_timeout(self, value):
"""Sets the timeout used when trying to establish a connection to the
database at GPUdb initialization. The value is given in milliseconds
and the default is 0. 0 indicates no retry will be done; instead,
the user given URLs will be stored without farther discovery.
If multiple URLs are given by the user, then API will try all of them
once before retrying or giving up. When this timeout is set
to a non-zero value, and the first attempt failed, then
the API will wait (sleep) for a certain amount of time and
try again. Upon consecutive failures, the sleep amount
will be doubled. So, before the first retry (i.e. the second
attempt), the API will sleep for one minute. Before the second
retry, the API will sleep for two minutes, the next sleep interval
would be four minutes, and onward.
"""
try:
value = int( value )
except:
raise GPUdbException( "Property 'initial_connection_attempt_timeout' "
"must be numeric; "
"given {}".format( str(type(value)) ) )
# Must be >= 0
if (value < 0):
raise GPUdbException( "Property 'initial_connection_attempt_timeout' "
"must be greater than or equal to zero; "
"given {}".format( value ) )
self.__server_connection_timeout = value
# end setter
@property
def intra_cluster_failover_timeout(self):
"""Gets the timeout used when trying to recover from an intra-cluster
failover event. The value is given in seconds. The default is
equivalent to 5 minutes.
This method is now deprecated.
"""
return 0
@intra_cluster_failover_timeout.setter
def intra_cluster_failover_timeout(self, value):
"""Sets the timeout used when trying to recover from an intra-cluster
failover event. The value is given in seconds. The default is
equivalent to 5 minutes.
This method is now deprecated.
"""
try:
value = int( value )
except:
raise GPUdbException( "Property 'intra_cluster_failover_timeout' "
"must be numeric; "
"given {}".format( str(type(value)) ) )
# Must be >= 0
if (value < 0):
raise GPUdbException( "Property 'intra_cluster_failover_timeout' "
"must be greater than or equal to zero; "
"given {}".format( value ) )
# end setter
@property
def logging_level(self):
"""Gets the logging level that will be used by the API. By default,
logging is set by the root logger (possibly set by the end user
application). If the user sets the logging level explicitly via
this options class, then the programmatically set level will be used
instead.
"""
return self.__logging_level
@logging_level.setter
def logging_level(self, value):
"""Sets the logging level for the :class:`GPUdb` object. Allowed
values are:
Level -- Numeric Value
--------------------------
CRITICAL -- 50
ERROR -- 40
WARNING -- 30
INFO -- 20
DEBUG -- 10
TRACE -- 9
NOTSET -- 0
Both the string values (in any case) and the numeric values are
acceptable. Also, None is acceptable (then the module will honor
the root logger's log level).
"""
if value is not None:
# Validate the value by actually trying to set it to a dummy
# logger object
if isinstance(value, (basestring, unicode)):
value = value.upper()
else:
value = int(value)
try:
dummy_logger = logging.getLogger( "dummy_test_logger" )
dummy_logger.setLevel( value )
except (ValueError, TypeError, Exception) as ex:
raise GPUdbException("Invalid log level value: '{}'"
"".format( GPUdbException.stringify_exception( ex ) ))
# end if
self.__logging_level = value
# end setter
@property
def password(self):
"""Gets the password to be used for authentication to GPUdb."""
return self.__password
@password.setter
def password(self, value):
"""Sets the password to be used for authentication to GPUdb."""
if not isinstance( value, (basestring, unicode, type(None)) ):
raise GPUdbException( "Property 'password' must be a string or "
"None; given '{}' type {}"
"".format( value, str(type(value)) ) )
self.__password = value
# end setter
@property
def primary_host(self):
"""Gets the hostname of the primary cluster of the HA environment."""
return self.__primary_host
@primary_host.setter
def primary_host(self, value):
"""Identifies the primary cluster of the HA environment (by setting
the hostname). Can be either a string or a :class:`GPUdb.URL` object.
Can be a fully qualified URL, e.g. "https://1.2.3.4:9191", or just a
hostname (in which case, it needs to be a string only). Will save
the hostname only (if a fully qualified URL is given).
"""
# Handle none or empty primary hosts
if not value:
self.__primary_host = ""
return
# end - Handle none or empty primary hosts
if isinstance( value, (basestring, unicode) ):
# Convert the string to a URL obejct and keep the hostname only
try:
value = GPUdb.URL( value ).host
except GPUdbException as ex:
raise GPUdbException( "Problem parsing string value '{}' for"
" property 'primary_host' as a URL: {}"
"".format( value, str(ex) ) )
elif isinstance( value, GPUdb.URL ):
# Keep the hostname only
value = value.host
else:
raise GPUdbException( "Property 'primary_host' must be a string"
" or a GPUdb.URL object; given '{}' type "
"{}".format( value, str(type(value)) ) )
self.__primary_host = value
# end primary_host setter
@property
def protocol(self):
"""Gets the protocol being used by the client.
"""
return self.__protocol
@protocol.setter
def protocol(self, value):
"""Sets the protocol being used by the client. Allowed values are:
* HTTP
* HTTPS
The default is HTTP.
.. note:
If the *host* argument of the GPUdb constructor contains fully
qualified URLs, then this protocol will not be used.
"""
# None is a valid value
if value is None:
self.__protocol = value
return
# end if
if not isinstance( value, (basestring, unicode) ):
raise GPUdbException( "Option 'protocol' must be a string; "
"given '{}' type {}"
"".format( value, str(type(value)) ) )
# Handle all cases
value = value.upper()
if (value not in ["HTTP", "HTTPS"]):
raise GPUdbException( "Expected protocol to be either 'HTTP' or "
"'HTTPS' got: '{}'".format( value ) )
self.__protocol = value
# end setter
@property
def skip_ssl_cert_verification(self):
"""Gets the value of the property indicating whether to verify the SSL
certificate for HTTPS connections.
"""
return self.__skip_ssl_cert_verification
@skip_ssl_cert_verification.setter
def skip_ssl_cert_verification(self, value):
"""Sets the value of the property indicating whether to verify the SSL
certificate for HTTPS connections.
"""
if not isinstance( value, bool ):
raise GPUdbException( "Property 'skip_ssl_cert_verification' must "
"be boolean; given '{}' type {}"
"".format( value, str(type(value)) ) )
self.__skip_ssl_cert_verification = value
# end setter
@property
def timeout(self):
"""Gets the timeout value, in milliseconds, after which a lack of
response from the GPUdb server will result in requests being aborted.
A timeout of zero is interpreted as an infinite timeout. Note that
this applies independently to various stages of communication, so
overall a request may run for longer than this without being aborted.
"""
return self.__timeout
@timeout.setter
def timeout(self, value):
"""Sets the timeout value, in milliseconds, after which a lack of
response from the GPUdb server will result in requests being aborted.
A timeout of zero is interpreted as an infinite timeout. Note that
this applies independently to various stages of communication, so
overall a request may run for longer than this without being aborted.
The default is zero.
"""
# None is a valid value
if value is None:
self.__timeout = value
return
# end if
try:
value = int( value )
except:
raise GPUdbException( "Property 'timeout' must be numeric; "
"given {}".format( str(type(value)) ) )
# Must be >= 0
if (value < 0):
raise GPUdbException( "Property 'timeout' must be greater than "
"or equal to zero; given "
"{}".format( value ) )
self.__timeout = value
# end setter
@property
def username(self):
"""Gets the username to be used for authentication to GPUdb."""
return self.__username
@username.setter
def username(self, value):
"""Sets the username to be used for authentication to GPUdb."""
if not isinstance( value, (basestring, unicode, type(None) ) ):
raise GPUdbException( "Property 'username' must be a string or "
"None; given '{}' type {}"
"".format( value, str(type(value)) ) )
self.__username = value
# end setter
# end class Options
[docs] class Version( object ):
"""An internal class to handle Kinetica Version (client API or server).
"""
def __init__(self, version_str ):
"""Takes in a string containing a Kinetica version and creates a
:class:`GPUdb.Version` object from it.
Parameters:
version_str (str)
A string containing the Kinetica version (client or
server). Expect at least four components separated
by a period (.). There may be additional parts after
the fourth component that will be discarded.
"""
# Handle a Version object
if isinstance(version_str, self.__class__):
self.__first = version_str.first
self.__second = version_str.second
self.__third = version_str.third
self.__fourth = version_str.fourth
self.__version_str = version_str.__version_str
return
# Parse the string into four components of the version
# If not a Version object, must be a string
if not isinstance( version_str, (basestring, unicode) ):
raise GPUdbException( "Expected a string version; got: '{}', "
"type {}".format( version_str,
str(type(version_str)) ) )
# Extract the four components from the string
components = version_str.split( "." )
if len( components ) < 4:
# Better have all four components!
raise GPUdbException( "Expected four components of the version; "
"did not get them. Got str '{}'"
"".format( version_str ) )
# end if
# Note that we disregard anything beyond the four components
major = components[ 0 ]
minor = components[ 1 ]
revision = components[ 2 ]
abi_version = components[ 3 ]
# Check that all the components are integers
try:
major = int( major )
minor = int( minor )
revision = int( revision )
abi_version = int( abi_version )
except Exception as ex:
raise GPUdbException( "Need integers for all four components of "
"the version; got '{}' (type '{}'), "
"'{}' (type '{}'), '{}' (type '{}'), "
"'{}' (type '{}')"
"".format( major, str(type(major)),
minor, str(type(minor)),
revision, str(type(revision)),
abi_version, str(type(abi_version)) ) )
# end try
# Save the components
self.__first = major
self.__second = minor
self.__third = revision
self.__fourth = abi_version
self.__version_str = "{}.{}.{}.{}".format( major, minor, revision,
abi_version )
# end __init__
def __eq__( self, other ):
if isinstance(other, self.__class__):
if ( self.__first != other.__first ):
return False
if ( self.__second != other.__second ):
return False
if ( self.__third != other.__third ):
return False
if ( self.__fourth != other.__fourth ):
return False
return True
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
def __gt__( self, other ):
"""Checks if the this version is greater than the other version
(not equal to).
"""
if isinstance(other, self.__class__):
if ( self.__first > other.__first ):
# Example: _7_.2.1.0 is newer/greater than _5_.0.0.0
return True
if ( (self.__first == other.__first)
and ( self.__second > other.__second ) ):
# Example: 6._2_.0.0 is newer/greater than 6._1_.1.0, but
# 7._2_.0.0 is NOT newer/greater than 8._1_.0.0
return True
if ( (self.__first == other.__first)
and ( self.__second == other.__second )
and ( self.__third > other.__third ) ):
# Example: 6.2._2_.0 is newer/greater than 6.2._1_.0, but
# 7.7._3_.0 is NOT newer/greater than 9.9._0_.0
return True
if ( (self.__first == other.__first)
and ( self.__second == other.__second )
and ( self.__third == other.__third )
and ( self.__fourth > other.__fourth ) ):
# Example: 6.2.1._3_ is newer than 6.2.1._1_, but
# 7.7.3._1_ is NOT newer than 9.9.0._0_
return True
return False
else:
raise GPUdbException( "Comparing a non-compatible object of "
"type {} with a Version object."
"".format( str(type( other )) ) )
# end __gt__
def __lt__( self, other ):
"""Checks if the this version is greater than the other version
(not equal to).
"""
if isinstance(other, self.__class__):
if ( self.__first < other.__first ):
# Example: _6_.2.1.0 is older than _7_.0.0.0
return True
if ( (self.__first == other.__first)
and ( self.__second < other.__second ) ):
# Example: 6._1_.1.0 is older than 6._2_.0.0, but
# 8._1_.0.0 is NOT older than 7._2_.0.0
return True
if ( (self.__first == other.__first)
and ( self.__second == other.__second )
and ( self.__third < other.__third ) ):
# Example: 6.2._1_.0 is older than 6.2._2_.0, but
# 9.9._0_.0 is NOT older than 7.7._3_.0
return True
if ( (self.__first == other.__first)
and ( self.__second == other.__second )
and ( self.__third == other.__third )
and ( self.__fourth < other.__fourth ) ):
# Example: 6.2.1._1_ is older than 6.2.1._3_, but
# 9.9.0._0_ is NOT older than 7.7.3._1_
return True
return False
else:
raise GPUdbException( "Comparing a non-compatible object of "
"type {} with a Version object."
"".format( str(type( other )) ) )
# end __lt__
def __str__(self):
"""String representation of the URL."""
return self.__version_str
# end __str__
def __hash__(self):
"""Hash the object."""
return self.__version_str.__hash__()
# end __hash__
@property
def first(self):
"""Read-only property--first component of the version."""
return self.__first
@property
def second(self):
"""Read-only property--second component of the version."""
return self.__second
@property
def third(self):
"""Read-only property--third component of the version."""
return self.__third
@property
def fourth(self):
"""Read-only property--fourth component of the version."""
return self.__fourth
[docs] def is_version_compatible( self, other ):
"""Given another version, are the two compatible based on just the
first two components taken into account? We don't take the 3rd
and 4th components into account since the server and the API
ought to work as long as the first two components match.
TODO: Possibly add another optional parameter for taking how many
components to take into account when checking for
compatibility.
Parameters:
other( :class:`GPUdb.Version` )
The other version object.
Returns:
True if the two are compatible, False otherwise.
"""
# Check the other version's type first
if not isinstance(other, self.__class__):
raise GPUdbException( "Need another GPUdb.Version object for "
"comparison; got {}"
"".format( str( type( other ) ) ) )
# end if
# For now, we only care about the first two components matching
# exactly. Anything else is irrelevant as far as compatibility
# goes.
return ( (self.first == other.first)
and (self.second == other.second) )
# end is_version_compatible
# end class Version
[docs] class ValidateUrl(object):
"""An internal class to handle connection URL parsing
"""
[docs] @staticmethod
def validate_url(url=None):
"""Takes in a string URL, validates it, adds defaults where
necessary, and returns a tuple with the URL components.
Parameters:
url (str)
A string containing a Kinetica connection URL.
Returns:
A two-part tuple, the first is whether or not the URL was able
to be parsed, and the second is a 7-part tuple containing the
parsed URL and its components:
* Full URL
* Protocol (HTTP,HTTPS)
* Hostname
* Port
* Path
* Username (if specified in the URL)
* Password (if specified in the URL)
"""
if IS_PYTHON_3:
from urllib.parse import urlparse
else:
from urlparse import urlparse
parsed_url = '' if url is None else url
if parsed_url.count('://') > 1:
# Malformed URL
return False, None
if '://' not in parsed_url:
# If the URL doesn't start with any protocol
# we prepend the default 'http'
parsed_url = 'http://' + parsed_url
parsed_url = urlparse(parsed_url)
scheme = parsed_url.scheme
if scheme not in ['http', 'https']:
# We don't deal with any protocol other than these
return False, None
hostname = parsed_url.hostname
# Default hostname
if hostname is None:
hostname = "127.0.0.1"
# Check if port is out-of-bounds
try:
port = parsed_url.port
except:
return False, None
path = parsed_url.path.rstrip('/')
# Default port; for HTTP, default port only assigned if no path
if port is None:
if scheme == 'https':
port = 443
else:
if path == '':
port = 9191
# Don't list 443 in the URL if using HTTPS or no port given
if (scheme == 'https' and port == 443) or port == None:
port_suffix = ''
else:
port_suffix = ':{}'.format(port)
# Later parts of the API rely on protocol being uppercase
protocol = scheme.upper()
# Construct the full URL
full_url = ("{protocol}://{ip}{port_suffix}{path}"
"".format(protocol=scheme.lower(),
ip=hostname,
port_suffix=port_suffix,
path=path))
ret_val = True, (full_url, protocol, hostname, port, path, parsed_url.username, parsed_url.password)
return ret_val
# End class ValidateUrl
[docs] class URL(object):
"""An internal class to handle URLs. Stores the hostname/IP address,
port, protocol, path, and the full URL (as a string).
"""
def __init__(self, url=None, port=None, protocol=None, accept_full_urls_only=False):
"""Takes in a string containing a full URL, or another :class:`URL`
object, and creates a :class:`URL` object from it.
Parameters:
url (str or GPUdb.URL)
Either a hostname/URL string or another GPUdb.URL object
to create this object for. Note that the port is not a
mandatory part of the URL.
port (int)
Optional port. If specified, will be appended to any host
specified and will override the port of any URL specified.
protocol (str)
Optional protocol. If specified, will be prepended to any
host specified and will override the protocol of any URL
specified.
accept_full_urls_only (bool)
Optional argument. If False, then be flexible in the parsing;
for example, if no port is given, use the default port. If
True, then only accept full URLs only. Default is False.
"""
# Handle an URL object
if isinstance(url, self.__class__):
self.__host = url.host
self.__port = url.port
self.__protocol = url.protocol
self.__url_path = url.path
self.__full_url = url.url
self.__username = url.username
self.__password = url.password
return
# end if
# If not a URL object, must be a string or None
if url is not None and not isinstance(url, (basestring, unicode)):
raise GPUdbException("Expected a string URL; got: '{}', "
"type {}".format(url,
str(type(url))))
full_url = None
# Use the ValidateUrl class to validate the URL
url_valid = GPUdb.ValidateUrl.validate_url(url)
if not url_valid[0]:
raise GPUdbException("Failed to parse given url '{}'"
"".format(url))
full_url, protocol_, hostname, port_, path, username, password = url_valid[1]
if protocol is None and port is None:
self.__username = username
self.__password = password
else:
# If port and/or protocol were specified separately, re-init URL
# with those injected into the URL
protocol_ = protocol if protocol is not None else protocol_
port_ = port if port is not None else port_
full_url = (
"{protocol}://{username}:{password}@{ip}:{port}{path}".format(
protocol = protocol_.lower(),
username = username,
password = password,
ip = hostname,
port = port_,
path = path
))
self.__init__(full_url)
return
self.__using_default_protocol = protocol_.lower() == 'http'
self.__using_default_port = str(port_) == '9191'
self.__host = hostname
self.__port = int(port_) if port_ else None
self.__protocol = protocol_
self.__url_path = path
self.__full_url = full_url
# end __init__
def __eq__(self, other):
if isinstance(other, self.__class__):
if (self.__host != other.__host):
return False
if (self.__port != other.__port):
return False
if (self.__protocol != other.__protocol):
return False
if (self.__url_path != other.__url_path):
return False
if (self.__full_url != other.__full_url):
return False
if (self.__username != other.__username):
return False
if (self.__password != other.__password):
return False
return True
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
def __str__(self):
"""String representation of the URL."""
return self.url
# end __str__
def __hash__(self):
"""Hash the object."""
return self.url.__hash__()
# end __hash__
@property
def host(self):
"""Read-only property--hostname or IP address."""
return self.__host
@property
def port(self):
"""Read-only property--port."""
return self.__port
@property
def using_default_port(self):
"""Read-only property--boolean indicating if we're using
a default port, or using the user given port (or the lack
thereof)."""
return self.__using_default_port
@property
def protocol(self):
"""Read-only property--protocol (HTTP or HTTPS)."""
return self.__protocol
@property
def using_default_protocol(self):
"""Read-only property--boolean indicating if we're using
a default protocol, or using the user given protocol."""
return self.__using_default_protocol
@property
def path(self):
"""Read-only property--URL path."""
return self.__url_path
@property
def url(self):
"""Read-only property--fully qualified URL."""
return self.__full_url
@property
def username(self):
"""Read-only property--username in URL, if present."""
return self.__username
@property
def password(self):
"""Read-only property--password in URL, if present."""
return self.__password
# end class URL
[docs] class ClusterAddressInfo( object ):
"""Inner class to keep track of all relevant information for a given
Kinetica cluster. It mostly keeps track of URLs and hostnames, with
some additional information like whether the cluster is primary or not.
"""
def __init__( self, head_rank_url,
worker_rank_urls = None,
host_names = None,
host_manager_url = None,
host_manager_port = None,
is_primary_cluster = None,
server_version = None,
logging_level = None ):
"""Creates a :class:`ClusterAddressInfo` object with the given
information.
Parameters:
head_rank_url (str or :class:`GPUdb.URL`)
Only required argument. Must be a full URL string or
:class:`GPUdb.URL` object. E.g. "http://1.2.3.4:8082/gpudb-0".
worker_rank_urls (list of str or :class:`GPUdb.URL`)
Optional argument. Must be a list of fully qualified URLs.
These URLs correspond to the worker ranks' addresses.
host_names (list of str)
Optional argument. Must be a list of strings. These strings
contain hostnames or IP addresses for all the nodes/hosts in
the cluster. May contain the protocol (e.g. "http://host0").
host_manager_url (str or :class:`GPUdb.URL`)
Optional argument, mutually exclusive with `host_manager_port`.
If given, must be a fully qualified URL for the host manager
of this cluster.
host_manager_port (int)
Optional argument, mutually exclusive with `host_manager_url`.
If given, must be an integer in the range [1, 65535].
is_primary_cluster (bool)
Optional boolean argument. Indicates if this cluster is to
be treated as the primary cluster. Default is False.
server_version (str or :class:`GPUdb.Version`)
Optional string containing the server version. If given,
will be parsed as a :class:`GPUdb.Version` object. Default
is None.
logging_level (int)
Optional level at which logs should be output. Default is
None.
"""
self.__construct( head_rank_url,
worker_rank_urls,
host_names,
host_manager_url,
host_manager_port,
is_primary_cluster,
server_version,
logging_level )
# end __init__
def __construct( self, head_rank_url,
worker_rank_urls = None,
host_names = None,
host_manager_url = None,
host_manager_port = None,
is_primary_cluster = None,
server_version = None,
logging_level = None ):
"""Constructs a :class:`GPUdb.ClusterAddressInfo` object.
Parameters:
head_rank_url (str or :class:`GPUdb.URL`)
Only required argument. Must be a full URL string or
:class:`GPUdb.URL` object. E.g. "http://1.2.3.4:8082/gpudb-0".
worker_rank_urls (list of str or :class:`GPUdb.URL`)
Optional argument. Must be a list of fully qualified URLs.
These URLs correspond to the worker ranks' addresses.
host_names (list of str)
Optional argument. Must be a list of strings. These strings
contain hostnames or IP addresses for all the nodes/hosts in
the cluster. May contain the protocol (e.g. "http://host0").
host_manager_url (str or :class:`GPUdb.URL`)
Optional argument, mutually exclusive with `host_manager_port`.
If given, must be a fully qualified URL for the host manager
of this cluster.
host_manager_port (int)
Optional argument, mutually exclusive with `host_manager_url`.
If given, must be an integer in the range [1, 65535].
is_primary_cluster (bool)
Optional boolean argument. Indicates if this cluster is to
be treated as the primary cluster. Default is False.
server_version (str or :class:`GPUdb.Version`)
Optional string containing the server version. If given,
will be parsed as a :class:`GPUdb.Version` object. Default
is None.
logging_level (int)
Optional level at which logs should be output. Default is
None.
"""
# Class level logger so that setting it for one instance doesn't
# set it for ALL instances after that change (even if it is
# outside of the scope of the first instance whose log level was
# changed
self.log = logging.getLogger( "gpudb.GPUdb.ClusterAddressInfo_instance_"
+ str( uuid.uuid4() ) )
# Handlers need to be instantiated only ONCE for a given module
# (i.e. not per class instance)
handler = logging.StreamHandler()
formatter = logging.Formatter( fmt = GPUdb._LOG_MESSAGE_FORMAT,
datefmt = GPUdb._LOG_DATETIME_FORMAT )
handler.setFormatter( formatter )
self.log.addHandler( handler )
if logging_level:
self.log.setLevel(logging_level)
# Prevent logging statements from being duplicated
self.log.propagate = False
# Set default values
self.__worker_rank_urls = []
self.__host_names = []
self.__host_manager_url = None
self.__is_primary_cluster = False
self.__server_version = None
# Validate all the input arguments and save them.
# Note that we are using the property setters, rather than directly
# setting the values of the private members. This ensures that we
# validate the values appropriately.
self.head_rank_url = head_rank_url
# Check if a host manager URL or port was given (both can't be given)
if ( (host_manager_url is not None)
and (host_manager_port is not None) ):
raise GPUdbException("User can provide either 'host_manager_url' "
"or 'host_manager_port', not both; both given! "
" URL: {}, port: {}"
"".format( host_manager_url,
host_manager_port ) )
# Validate the host manager port, if given
if (host_manager_port is not None):
try :
port = int( host_manager_port )
except :
raise GPUdbException( "Argument 'host_manager_port' must "
"be a numeric value; got '{}',"
" type {}"
"".format( host_manager_port,
str(type( host_manager_port )) ) )
# Port value must be within (0, 65536)
if ( (port <= 0) or (port >= 65536) ):
raise GPUdbException( "Argument 'host_manager_port' must "
"be within the range [1, 65535]; "
"got '{}'".format( port ) )
# end if
# Use this port to create a host manager URL (to be saved later)
head_url = self.head_rank_url
host_manager_url = ("{protocol}://{ip}:{port}{path}"
"".format( protocol = head_url.protocol.lower(),
ip = head_url.host,
port = host_manager_port,
path = head_url.path ) )
self.__log_debug( "Created host manager URL: {}".format( host_manager_url ) )
# end if
# Validate and save the rest.
# Note that we are using the property setters, rather than directly
# setting the values of the private members. This ensures that we
# validate the values appropriately.
if worker_rank_urls is not None:
self.worker_rank_urls = worker_rank_urls
if host_names is not None:
self.host_names = host_names
if host_manager_url is not None:
self.host_manager_url = host_manager_url
if is_primary_cluster is not None:
self.is_primary_cluster = is_primary_cluster
if server_version is not None:
self.server_version = server_version
# Save the protocol for use
self.__protocol = self.head_rank_url.protocol
# Update the hostnames with all the ranks' hostnames
self.__update_hostnames_based_on_rank_urls()
# end __construct
def __eq__( self, other ):
"""A custom equality method."""
if isinstance(other, self.__class__):
if ( self.head_rank_url != other.head_rank_url ):
return False
if ( self.worker_rank_urls != other.worker_rank_urls ):
return False
if ( self.host_names != other.host_names ):
return False
if ( self.host_manager_url != other.host_manager_url ):
return False
if ( self.is_primary_cluster != other.is_primary_cluster ):
return False
if ( self.server_version != other.server_version ):
return False
return True
else:
return False
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
def __str__(self):
"""String representation of this cluster."""
return ("{{ head_rank_url: {rank0}"
", worker_rank_urls: {workers}"
", host_names: {hostnames}"
", host_manager_url: {hm}"
", is_primary_cluster: {primary}"
", server_version: {version}"
" }}"
"".format( rank0 = self.head_rank_url,
workers = [str(u) for u in self.worker_rank_urls],
hostnames = self.host_names,
hm = self.host_manager_url,
primary = self.is_primary_cluster,
version = str( self.server_version ) ) )
# end __str__
@property
def head_rank_url( self ):
"""Returns the current head node :class:`GPUdb.URL` for this cluster.
"""
return self.__head_rank_url
@head_rank_url.setter
def head_rank_url( self, value ):
"""Sets the URL for the active head node of this cluster. Must be
a fully qualified URL if a string is given, for example,
"http://1.3.4.5:9191". Or a valid :class:`GPUdb.URL` object must
be given.
"""
if not isinstance( value, (basestring, unicode, GPUdb.URL) ):
raise GPUdbException( "Property 'head_rank_url' must be "
"a string or a GPUdb.URL object; given "
"'{}' type {}"
"".format( value, str(type(value)) ) )
# Convert to an URL object (may throw an exception upon validation)
try:
self.__head_rank_url = GPUdb.URL( value )
except Exception as ex:
raise GPUdbException( "Unable to set property 'head_rank_url'"
"; error: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
# end setter
@property
def protocol( self ):
"""Returns the protocol used ('HTTP' or 'HTTPS'). This is
derived from the head rank URL. A read-only property.
"""
return self.__protocol
@property
def worker_rank_urls( self ):
"""Returns the list of the worker rank :class:`GPUdb.URL` objects
for this cluster. May be empty if worker http servers are disabled.
"""
return self.__worker_rank_urls
@worker_rank_urls.setter
def worker_rank_urls( self, value ):
"""Sets the list of the worker rank URLs for this cluster. Must be
a list of strings.
"""
# It needs to be a list
if not isinstance( value, list ):
raise GPUdbException( "Property 'worker_rank_urls' must be "
"a list of strings or GPUdb.URL objects; "
"given '{}' type {}"
"".format( value, str(type(value)) ) )
# Each element must be a string or a URL object
if not all( [ isinstance( x, (basestring, unicode, GPUdb.URL) )
for x in value ] ):
raise GPUdbException( "Property 'worker_rank_urls' must be "
"a list of strings or GPUdb.URL objects; "
"given some non-string elements: '{}'; "
"type {}"
"".format( value, str(type(value)) ) )
# end if
# Convert each URL string to an URL object
try:
self.__worker_rank_urls = [ GPUdb.URL(x) for x in value ]
except Exception as ex:
raise GPUdbException( "Unable to set property 'worker_rank_urls'"
"; error: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
# end setter
@property
def host_names( self ):
"""Returns the list of hostnames for this cluster.
"""
return self.__host_names
@host_names.setter
def host_names( self, value ):
"""Sets the list of the hostnames for this cluster. May contain
the protocol in each string (e.g. "http://host0"). Must be a list
of strings.
"""
# It needs to be a list
if not isinstance( value, list ):
raise GPUdbException( "Property 'host_names' must be "
"a list of strings; given '{}' type {}"
"".format( value, str(type(value)) ) )
# Each element must be a string
if not all( [ isinstance( x, (basestring, unicode) )
for x in value ] ):
raise GPUdbException( "Property 'host_names' must be "
"a list of strings; given some non-string"
" elements: '{}' type {}"
"".format( value, str(type(value)) ) )
# end if
self.__host_names = value
# end setter
@property
def host_manager_url( self ):
"""Returns the host manager :class:`GPUdb.URL` for this cluster.
"""
return self.__host_manager_url
@host_manager_url.setter
def host_manager_url( self, value ):
"""Sets the URL for the active host manager of this cluster. Must
be a fully qualified URL.
"""
if not isinstance( value, (basestring, unicode, GPUdb.URL) ):
raise GPUdbException( "Property 'host_manager_url' must be "
"a string or a GPUdb.URL object; given "
"'{}' type {}"
"".format( value, str(type(value)) ) )
# Convert to an URL object (may throw an exception upon validation)
try:
self.__host_manager_url = GPUdb.URL( value )
except Exception as ex:
raise GPUdbException( "Unable to set property 'host_manager_url'"
"; error: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
# end setter
@property
def is_primary_cluster( self ):
"""Returns whether this cluster is the primary cluster in the ring.
"""
return self.__is_primary_cluster
@is_primary_cluster.setter
def is_primary_cluster( self, value ):
"""Sets whether this cluster is the primary cluster in the ring.
Must be a boolean value. The default is False.
"""
if not isinstance( value, bool ):
raise GPUdbException( "Property 'is_primary_cluster' must be "
"boolean; given '{}' type {}"
"".format( value, str(type(value)) ) )
self.__is_primary_cluster = value
# end setter
@property
def is_intra_cluster_failover_enabled( self ):
"""Returns whether this cluster has intra-cluster failover
enabled.
This method is now deprecated.
"""
return False
@is_intra_cluster_failover_enabled.setter
def is_intra_cluster_failover_enabled( self, value ):
"""Sets whether this cluster has intra-cluster failover enabled.
Must be a boolean value. The default is False.
This method is now deprecated.
"""
if not isinstance( value, bool ):
raise GPUdbException( "Property must be boolean; given '{}' type {}"
"".format( value, str(type(value)) ) )
# end setter
@property
def server_version( self ):
"""Returns the version of this cluster, if known; None otherwise.
"""
return self.__server_version
@server_version.setter
def server_version( self, value ):
"""Sets the version of this cluster. Must be either a string or a
:class:`GPUdb.Version` object. The default is None (but cannot set
it as None).
"""
try:
self.__server_version = GPUdb.Version( value )
except Exception as ex:
raise GPUdbException( "Could not save given value '{}' as the "
"cluster's version; error: {}"
"".format( value, str(ex) ) )
# end setter
# Internal Helper Methods
# -----------------------
def __log_debug( self, message ):
if not self.log.isEnabledFor( logging.DEBUG ):
# No-op if debug is not enabled. This is important
# because the inspect module is a time killer!
return
# end if
try:
# Get calling method's information from the stack
stack = inspect.stack()
# stack[1] gives the previous/calling function
filename = stack[1][1].split("/")[-1]
ln = stack[1][2]
func = stack[1][3]
self.log.debug( "[GPUdb.ClusterAddressInfo::{fn}::{line}::{func}]"
" {msg}"
"".format( fn = filename,
func = func, line = ln,
msg = message ) )
except:
# Some error occurred with inspect; just log the debug message
self.log.debug( "[GPUdb.ClusterAddressInfo]"
" {msg}"
"".format( msg = message ) )
# end __debug
def __log_warn( self, message ):
self.log.warning( "[GPUdb.ClusterAddressInfo] {}".format( message ) )
# end __warn
def __log_info( self, message ):
self.log.info( "[GPUdb.ClusterAddressInfo] {}".format( message ) )
# end __log_info
def __log_error( self, message ):
self.log.error( "[GPUdb.ClusterAddressInfo] {}".format( message ) )
# end __log_error
def __update_hostnames_based_on_rank_urls( self ):
"""Add the hostnames of the head and worker ranks URLs to the
list of hostnames if they are not already part of it.
"""
self.__log_debug( "Updating hostname list: {}".format([host_name for host_name in self.host_names]) )
# Put the head rank's hostname in the saved hostnames (only if
# it doesn't exist there already)
head_rank_hostname = ("{protocol}://{host}"
"".format( protocol = self.head_rank_url.protocol.lower(),
host = self.head_rank_url.host ) )
if head_rank_hostname not in self.host_names:
self.__log_debug( "Adding head rank's hostname to hostname list: {}".format( head_rank_hostname ) )
self.__host_names.append( head_rank_hostname )
# Put each worker rank's hostname in the saved hostnames (only if
# it doesn't exist there already)
for worker_url in self.worker_rank_urls:
worker_rank_hostname = ("{protocol}://{host}"
"".format( protocol = worker_url.protocol.lower(),
host = worker_url.host ) )
if worker_rank_hostname not in self.host_names:
self.__log_debug( "Adding worker rank's hostname to hostname list: {}".format( worker_rank_hostname ) )
self.__host_names.append( worker_rank_hostname )
# end __update_hostnames_based_on_rank_urls
# Convenience Methods
# --------------------
[docs] def does_cluster_contain_node( self, host_name ):
"""Checks if the given hostname (or IP address) is part of this
cluster.
Parameters:
host_name (str)
String containing a hostname or an IP address.
Returns:
True if this cluster contains a machine with the given
hostname or IP address, False otherwise.
"""
self.__log_debug( "Check for hostname {} in hostname list".format( host_name ) )
if not isinstance( host_name, (basestring, unicode)):
msg = ("Need a string for the host name, given '{}'"
"".format( str(type(host_name)) ) )
self.__log_debug( msg )
return False
# end if
for host_name_ in self.host_names:
# We need to check for a string subset match since the
# hostnames contain the protocol as well as the actual hostname
# or IP address
if( host_name in host_name_ ):
self.__log_debug( "Found matching hostname in hostname list")
return True
# end for
self.__log_debug( "Hostname not found in hostname list")
return False # found no match
# end does_cluster_contain_node
# end class ClusterAddressInfo
# ------------------------- GPUdb Members --------------------------------
__http_response_triggering_failover = [
httplib.SERVICE_UNAVAILABLE, # most likely
httplib.INTERNAL_SERVER_ERROR,
httplib.GATEWAY_TIMEOUT,
httplib.BAD_GATEWAY # rank-0 killed with HTTPD gives this
]
__endpoint_server_error_magic_strings = [
C._DB_EXITING_ERROR_MESSAGE,
C._DB_OFFLINE_ERROR_MESSAGE,
C._DB_SYSTEM_LIMITED_ERROR_MESSAGE,
C._DB_CONNECTION_REFUSED,
C._DB_CONNECTION_RESET
]
# Default host manager port for http and httpd
_DEFAULT_HOST_MANAGER_PORT = 9300
_DEFAULT_HTTPD_HOST_MANAGER_PORT = 8082
# The timeout (in seconds) used for checking the status of a node; we used
# to use a small timeout so that it does not take a long time to figure out
# that a rank is down, but connections over high-traffic networks or the
# cloud may encounter significant connection wait times. Using 20 seconds.
# __DEFAULT_INTERNAL_ENDPOINT_CALL_TIMEOUT = 20
_DEFAULT_SERVER_CONNECTION_TIMEOUT = 5 # in seconds
# The number of times that the API will attempt to submit a host
# manager endpoint request. We need this in case the user chose
# a bad host manager port. We don't want to go into an infinite
# loop
__HOST_MANAGER_SUBMIT_REQUEST_RETRY_COUNT = 3
__SSL_ERROR_MESSAGE_TEMPLATE = (
"<{}>. "
"To fix, either: "
"1) Add the server's certificate or a CA cert to the system CA certificates file, or "
"2) Skip the certificate check using the skip_ssl_cert_verification option. "
"Examples: https://docs.kinetica.com/7.1/api/concepts/#https-without-certificate-validation"
)
END_OF_SET = -9999
"""(int) Used for indicating that all of the records (till the end of the
set are desired)--generally used for /get/records/\* functions.
"""
# The version of this API
api_version = "7.1.10.1"
# ------------------------- GPUdb Methods --------------------------------
def __init__( self, host = None, options = None, *args, **kwargs ):
"""
Construct a new GPUdb client instance. This object communicates to
the database server at the given address. This class implements
HA failover, which means that upon certain error conditions, this class
will try to establish connection with one of the other clusters
(specified by the user or known to the ring) to continue service.
There are several options related to how to control that in the
:class:`GPUdb.Options` class that can be controlled via `options`.
.. note::
Please read the docstring of `options` about backward-
compatibility related notes.
Parameters:
host (str or list of str)
The URL(s) of the GPUdb server. May be provided as a comma
separated string or a list of strings containing head or worker
rank URLs of the server clusters. Must be full and valid URLs.
Example: "https://domain.com:port/path/".
If only a single URL or host is given, and no *primary_host* is
explicitly specified via the options, then the given URL will be
used as the primary URL. Default is 'http://127.0.0.1:9191'
(implemented internally).
Note that in versions 7.0 and prior, the URL also allowed
username:password@ in front of the hostname. That is now
deprecated. For now, anything in the hostname separated
by the @ symbol will be discarded. (But the constructor
will still function). Please use the appropriate properties
of the `options` argument to set the username and
password.
options (GPUdb.Options or dict)
Optional arguments for creating this GPUdb object. To be
backward compatible to 7.0 versions, keyword arguments will
be honored (only if no options is given). I.e., if options
is given, no positional or keyword argument can be given. See
:class:`Options` for all available properties.
In order to be backward-compatible, this argument will be
checked to see if it is the *port* argument from the 7.0.x.y
version of the API. Users are encouraged to update their code
to use the current interface. Please see the documentation for
the latest 7.0 version to understand the comment about this
`port` argument. If you are getting started with this API with
version 7.1.0.0 or later, then ignore this whole paragraph.
.. seealso:: :class:`GPUdb.Options`
"""
# Call the internal function to initialize the object
self.__construct( host, options, *args, **kwargs )
# end __init__
def __construct( self, host = None, options = None, *args, **kwargs ):
"""
Construct a new GPUdb client instance.
Parameters:
host (str or list of str)
The IP address of the GPUdb server. May be provided as a comma-
separated string or a list of strings to support HA. Must be
full and valid URLs:
"https://domain.com:port/path/".
Default is 'http://127.0.0.1:9191' (implemented internally).
options (GPUdb.Options)
Optional arguments for creating this GPUdb object. To be
backward compatible to 7.0 versions, other keyword arguments
will be honored (only if this parameter is not given). I.e.,
users can't provide both this argument and keyword arguments.
See :class:`GPUdb.Options` for all available properties.
.. seealso:: :class:`GPUdb.Options`
"""
# Identification for this instance (useful in logging)
self._id = str( uuid.uuid4() )
# Class level logger so that setting it for ond GPUdb instance doesn't
# set it for ALL GPUdb instances after that change (even if it is
# outside of the scope of the first instance whose log level was
# changed
self.log = logging.getLogger( "gpudb.GPUdb_instance_" + self._id )
# Handlers need to be instantiated only ONCE for a given module
# (i.e. not per class instance)
handler = logging.StreamHandler()
formatter = logging.Formatter( fmt = GPUdb._LOG_MESSAGE_FORMAT,
datefmt = GPUdb._LOG_DATETIME_FORMAT )
handler.setFormatter( formatter )
self.log.addHandler( handler )
# Prevent logging statements from being duplicated
self.log.propagate = False
# Keep track of this API's client version
self.__client_version = GPUdb.Version( self.api_version )
# Handle constructor arguments in a backward-compatible manner
port, options = self.__parse_options( options, *args, **kwargs )
# Save the options and its individual properties
self.__options = options
self.__encoding = self.options.encoding
self.__username = self.options.username
self.__password = self.options.password
self.__logging_level = self.options.logging_level
self.__primary_host = self.options.primary_host
self.__protocol = self.options.protocol
self.__timeout = self.options.timeout
self.__custom_http_headers = self.options.http_headers
self.__skip_ssl_cert_check = self.options.skip_ssl_cert_verification
self.__disable_auto_discovery = self.options.disable_auto_discovery
self.__disable_failover = self.options.disable_failover
self.__ha_failover_order = self.options.ha_failover_order
self.__initial_connection_attempt_timeout = self.options.initial_connection_attempt_timeout
self.__server_connection_timeout = self.options.server_connection_timeout
# Set the logging level (only if the user set something)
if self.__logging_level is not None:
self.set_client_logger_level( self.__logging_level )
# end if
self.__log_debug( "Host: {}".format( str(host) ) )
self.__log_debug( "Port: {}".format( port ) )
self.__log_debug( "Options: {}".format( str(options) ) )
if self.__skip_ssl_cert_check:
self.__log_debug( "Bypassing SSL certificate check for HTTPS connections" )
else:
self.__log_debug( "Using system trust store for HTTPS connections" )
# Validate the encoding
if (self.encoding.upper() == C._ENCODING_SNAPPY and not HAVE_SNAPPY):
self.__log_warn('SNAPPY encoding specified but python-snappy is not installed; reverting to BINARY')
self.__encoding = C._ENCODING_BINARY
# Set default values for some internal information
self.__use_httpd = False
self.client_to_object_encoding_map = { \
C._ENCODING_BINARY: "binary",
C._ENCODING_SNAPPY: "binary",
C._ENCODING_JSON: "json",
}
# Set the synchronicity override mode to be default
self._ha_sync_mode = GPUdb.HASynchronicityMode.DEFAULT
# Load all gpudb schemas
self.load_gpudb_schemas()
self.__load_logger_schemas()
# Load the mapping of function names to endpoints
self.load_gpudb_func_to_endpoint_map()
# Initiate the type store
self._known_types = {}
# Keep a count of how many times a request has been tried
self._num_retries = 0
# Note: The encoding, HTTP header etc. information must be set
# before setting hosts since we'll be calling /show/system/properties
# Handle the host, port, protocol, and the host manager port the same
# way as 7.0 and prior versions for backward compatibility
protocol = self.options.protocol
host_manager_port = self.options.host_manager_port
if not isinstance( host, list ):
# Host is not a list; if a comma separated list is given, then
# split it
comma = ','
if ( isinstance( host, (basestring, unicode) )
and (comma in host) ):
# Multiple hosts given
host = host.strip( comma ).split( comma )
else:
# Single host given as a string; make it a list
host = [host]
# end if
# end if
# If the ports and protocol are not lists, make them so
if not isinstance(port, list):
port = [port]*len(host)
if not isinstance(host_manager_port, list):
host_manager_port = [host_manager_port]*len(host)
if not isinstance(protocol, list):
protocol = [protocol]*len(host)
# Ensure that all the args have the same length
if ( ( len(host) != len(port) )
or ( len(host) != len(host_manager_port) )
or ( len(host) != len(protocol) ) ):
raise GPUdbException( "Host, port, host_manager_port and protocol"
" list must have the same number of elements;"
" gave host {}, port {}, host_manager_port {},"
" protocol {}"
"".format( host, port, host_manager_port,
protocol ) )
# end if
# Check that no duplicate host name was given
if ( ( len(host) > 1) and (len(host) != len( set(host) )) ):
self.__log_warn( "Given list of hosts has a duplicate; might cause unpredictable behavior ({})"
"".format( host ) )
# If the user explicitly gave a separate port & protocol via
# the options, reconcile that with the user given URLs.
hosts = []
for (host_, port_, protocol_) in zip(host, port, protocol):
self.__log_debug( "Normalizing host '{}' port '{}' protocol '{}'"
"".format( host_, port_, protocol_ ) )
url = GPUdb.URL( host_, port_, protocol_ )
if host_:
self.__log_debug( "Converted to user @ host: <%s> @ <%s>" % (url.username, url.url) )
# If no user/pass set, attempt to pull from any URLs
if self.__username is None:
self.__username = url.username
if self.__password is None:
self.__password = url.password
# end if
# Add the possibly modified host to the final hosts list
hosts.append( url.url )
# end for
self.__log_debug( "Using (possibly modified) URLs: {}".format( hosts ) )
# Set up the credentials to be used per POST
self.auth = None
if self.username is not None:
self.__log_debug('Setting up credentials with username <%s>' % self.username)
if IS_PYTHON_3:
# base64 encode the username and password
self.auth = ('%s:%s' % (self.username, self.password) )
self.auth = _Util.str_to_bytes( self.auth )
self.auth = base64.b64encode( self.auth ).decode( "ascii" ).replace('\n', '')
self.auth = ("Basic %s" % self.auth)
else: # Python 2.x
self.auth = base64.encodestring('%s:%s' % (self.username, self.password)).replace('\n', '')
self.auth = ("Basic %s" % self.auth)
# end if
# We need to keep a running count of how many times we've failed and had
# to switch to a different HA ring head node (useful for multi-head I/O)
self.__num_cluster_switches = 0 # For the new submit_request route
# Some defaults
self.__cluster_indices = []
self.__curr_cluster_index_pointer = 0
# Parse the user given URLs (will throw an error if no connection
# can be established)
self.__parse_urls( hosts )
# Check version compatibility with the server
# -------------------------------------------
self.__update_server_version()
if( not self.__perform_version_check() ):
self.__log_warn("API and server versions don't match")
# end __construct
def __eq__( self, other ):
"""Override the equality operator. Note that
we ignore the timeout setting. The only things checked
are the DB server URL, connection protocol (http vs. https),
encoding (binary, json, or snappy), the username, the
password, and the various options.
"""
# Check the type of the other object
if not isinstance( other, GPUdb ):
return False
# Check the ring information (all clusters' all URLs)
if (self.__cluster_info != other.__cluster_info):
return False
# Check the options used
if (self.options != other.options):
return False
return True
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
def __getstate__( self ):
"""Defines how to pickle the GPUdb object.
"""
pickle_this = { "gpudb_url": self.get_url(),
"options": self.options.as_json()
}
return pickle_this
# end __getstate__
def __setstate__( self, state ):
"""Re-creates a GPUdb object from the pickled state. For a
description of the pickled state, see :meth:`.__getstate__`.
"""
# Call the internal function to initialize the object
options = GPUdb.Options( state["options"] )
self.__construct( host = state["gpudb_url"],
options = options )
# end __setstate__
def __get_ha_ring_size( self ):
"""Returns the size of the ring that this client is talking to."""
return len( self.__cluster_info )
# end __get_ha_ring_size
def __parse_options( self, options, *args, **kwargs ):
"""Parse the user given options to the constructor.
Parameters:
options (GPUdb.Options)
Optional arguments for creating this GPUdb object. To be
backward compatible to 7.0 versions, other keyword arguments
will be honored (only if this parameter is not given). I.e.,
users can't provide both this argument and keyword arguments.
See :class:`GPUdb.Options` for all available properties.
.. seealso:: :class:`GPUdb.Options`
Returns:
The parsed port and the options.
"""
# For backward compatibility, check for 'port' in the keyword argument
# Note: If it is not in keyword arguments, then the only other spot
# it could be would be 'options' itself.
port = kwargs.pop( "port", None )
# Check if the given options, if any, is actually the old 'port'
parse_args_and_kwargs = False
if options is not None:
# The options are given, but is it actually options or port
# from the 7.0 interface?
# Validate type and parse the argument
if not isinstance( options, (GPUdb.Options, dict) ):
# Not real options; could this be a positional 'port' argument?
if isinstance( options, (basestring, unicode, list, int) ):
# This could be the port; treat it as such, but first
# check if it was given as a keyword argument
if port:
# The user gave the port via the keyword args; clash!
error_msg = ("Argument 'options' must be a GPUdb.Options"
" object or a dict, given {}"
"".format( str(type(options)) ) )
self.__log_debug( error_msg )
raise GPUdbException( error_msg )
# end inner if
# The user didn't also give port as a keyword argument; so
# we're treating this as the port
self.__log_debug( "Treating argument `options` as the `port` "
"argument of the 7.0 version of the API "
" because of its type {}"
"".format( str(type(options)) ) )
# Save the optios as the port and ensure we parse the
# rest of ths args and kwargs later
port = options
parse_args_and_kwargs = True
else:
# It is not port, nor is it GPUdb.Options or a dict; then it
# is wholly illegal
error_msg = ("Argument 'options' must be a GPUdb.Options "
"object or a dict; given {}"
"".format( str(type(options)) ) )
self.__log_debug( error_msg )
raise GPUdbException( error_msg )
# end 2nd inner if
else:
# Well, it is options after all. Now, ensure that args and
# kwargs aren't also given along with options--we either do
# 7.0 style or 7.1 style; not mixed!
if bool( args ) or bool( kwargs ):
# Can't give use positional or keyword args on top of an option
# object!
error_msg = ("Please provide one of the two: 1) options 2) args "
"and/or kwargs. Given options: {}, args {}, and "
"kwargs: {}"
"".format( options, args, kwargs ))
self.__log_debug( error_msg )
raise GPUdbException( error_msg )
# end 2nd inner if
# Parse the options into the appropriate object
options = GPUdb.Options( options )
# end inner if
else:
# Ensure we parse the rest of ths args and kwargs later since no
# option was given
parse_args_and_kwargs = True
# end if
# Create the options:
# 1) From args and kwargs, if needed (including when options was
# actually the port)
# 2) Default options if nothing given
if parse_args_and_kwargs:
# Parse the positional arguments, if needed
if bool( args ):
# The positional arguments will start from host_manager_port
positional_arg_names_from_v70 = [ "host_manager_port",
"encoding",
"connection",
"username",
"password",
"timeout",
"no_init_db_contact",
"primary_host",
"skip_ssl_cert_verification"
]
# Put each positional argument into kwargs
for i, arg_value in enumerate(args):
arg_name = positional_arg_names_from_v70[ i ]
kwargs[ arg_name ] = arg_value
# end for
# end inner if
# Create the options from the resultant keyword args
if bool( kwargs ):
options = GPUdb.Options( options = kwargs )
else:
# No keyword arguments are given; use the default options
options = GPUdb.Options.default()
# end if
return port, options
# end __parse_options
def __parse_urls( self, urls ):
"""Parse the given list of URLs which may have, in any order, URLs
for head node (rank-0) or worker ranks of any number of clusters.
Sort it all out and save information in a list of objects each
of which stores all pertinent information on a given cluster.
If the first attempt fails, do this for a set period of initial
connection attempt timeout, as many times as is possible.
***This method should be called from the constructor initialization
method only!***
"""
# The very first time we sleep, if at all, will be for one minute
parse_urls_reattempt_wait_interval_min = 1 # in minutes
parse_urls_reattempt_wait_interval_sec = 60 # in seconds
# Stringify the URLs for logging purposes
urls_str = [ str(u) for u in urls ]
# We need to keep an eye on the clock (do NOT use
# System.currentTimeMillis() as that often gets adjusted by the
# operating system)
start_time = time.time()
# Try to parse the URLs for a preset amount of time (in case the
# first attempt fails)
keep_trying = True
while ( keep_trying ):
try:
# Parse the URLs (a single attempt)
self.__log_debug( "Attempting to parse the user-given URLs: {}"
"".format( [str(u) for u in urls]) )
self.__parse_urls_once( urls )
self.__log_debug( "Parsed the user-given URLs successfully: {}"
"".format( [ str(c)
for c in self.all_cluster_info ]) )
return # one successful attempt is all we need
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except GPUdbHostnameRegexFailureException as ex:
# There's no point in keep trying since the URLs aren't
# going to magically change
raise GPUdbException( "Could not connect to any working "
"Kinetica server due to hostname "
"regex mismatch (given URLs: {}); {}"
"".format( urls_str, str(ex) ) )
except GPUdbException as ex:
self.__log_debug( "Attempt at parsing URLs failed: {}"
"".format( str(ex) ) )
# raise # temporary fix
# Commenting out for now to revert to 7.0-style error upon
# problem. TODO: Fix properly taking into consideration
# how to handle new options effectively.
# If the user does not want us to retry, parse the URLs as is
if ( self.__initial_connection_attempt_timeout == 0 ):
self.__log_debug( "Initial connection attempt timeout set to 0; "
"parse the given URLs without auto discovery." )
self.__disable_auto_discovery = True
else:
# Do we keep trying another time? Has enough time passed?
curr_time = time.time()
keep_trying = ( (curr_time - start_time)
<= self.__initial_connection_attempt_timeout )
self.__log_debug( "Keep trying to parse URLs?: {}"
"".format( keep_trying ) )
if ( keep_trying ):
self.__log.warn( "Attempt at parsing user given URLs {}"
" failed; waiting for {}"
" minute(s) before retrying"
"".format( urls_str, parse_urls_reattempt_wait_interval_min) )
try:
# We will sleep before trying again
self.__log_debug( "Sleeping for {} minutes before "
"trying again"
"".format( parse_urls_reattempt_wait_interval_min ) )
time.sleep( parse_urls_reattempt_wait_interval_sec )
# The next time, we will sleep for twice as long
parse_urls_reattempt_wait_interval_min = (2 * parse_urls_reattempt_wait_interval_min)
parse_urls_reattempt_wait_interval_sec = (60 * parse_urls_reattempt_wait_interval_min)
except ( KeyboardInterrupt, SystemExit ) as ex2:
self.__log_debug( "Sleep interrupted ({}); throwing exception"
"".format( str(ex2) ) )
raise GPUdbException( "Initial parsing of user "
"given URLs interrupted: {}"
"".format( str(ex2) ), ex2 )
# end try
# end if
# end if
# end try
# end while
# We should never get here, but just in case, check that we have got
# at least one working URL
if ( self.__get_ha_ring_size() == 0 ):
self.__log_debug( "No cluster found!" )
raise GPUdbException( "Could not connect to any working Kinetica "
"server! Given URLs: {}"
"".format( urls_str ) )
# end if
# end __parse_urls
def __parse_urls_once( self, urls ):
"""
Parse the given list of URLs which may have, in any order, URLs
for head node (rank-0) or worker ranks of any number of clusters.
Sort it all out and save information in a list of objects each
of which stores all pertinent information on a given cluster.
***This method should be called from the constructor initialization
method only (via :method:`__parse_urls`, not directly)!***
Parameters:
urls (URL or str or list of str or URLs)
Can be a single :class:`GPUdb.URL` or a single valid URL in a
string form. Can also be a list of :class:`GPUdb.URL` objects
or strings containing full and valid URL objects.
"""
# Validate the input
if not isinstance( urls, list ):
# Not a list; must be a single URL or a fully valid URL in a string
if not isinstance( urls, (basestring, unicode, GPUdb.URL) ):
raise GPUdbException( "Argument 'urls' must be a list of "
"strings or GPUdb.URL objects, or a "
"single string or a GPUdb.URL object; "
"given '{}' type {}"
"".format( urls, str(type(urls)) ) )
# Validate the single string/URL and put in a list
try:
urls = [ GPUdb.URL( urls ) ]
except Exception as ex:
raise GPUdbException( "Unable to parse argument 'urls'"
"; error: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
else:
# Got a list, verify that all elements are either URL objects
# or a valid URL in a string
# Each element must be a string
if not all( [ isinstance( x, (basestring, unicode, GPUdb.URL) )
for x in urls ] ):
raise GPUdbException( "Argument 'urls' must be "
"a list of strings or GPUdb.URL objects; "
"given some non-string/non-URL elements: "
"'{}'; type {}"
"".format( urls, str(type(urls)) ) )
# end inner if
# end if
# Convert each URL string to an URL object
try:
urls = [ GPUdb.URL(x) for x in urls ]
except Exception as ex:
raise GPUdbException( "Unable to parse argument 'urls'; "
"error: {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
# end try
# Keep a stringified version to be used in logs
urls_str = [str(u) for u in urls]
# Convert the list of URLs to a set (to remove duplicates) and then
# into a queue (so that we can add HA ring addresses as we get them
# from servers and add them to the end of the queue while iterating
# over it--other forms of collections don't allow for it)
# Note: Doing this extra step to maintain order from the original list
duplicates_removed_in_order = sorted( set( urls ), key = urls.index )
url_deque = collections.deque( duplicates_removed_in_order )
# Save the hostname of the primary URL (which could be an empty string)
if ( self.primary_host ):
try:
# If it's a full URL, add it to the queue for processing
primary_url = GPUdb.URL( self.primary_host,
accept_full_urls_only = True )
# Add this URL to the list of URLs to process if it's not
# already in it
if primary_url not in duplicates_removed_in_order:
self.__log_debug( "Primary URL not in user-given URLs; adding it" )
url_deque.append( primary_url )
# end if
# Update the hostname of the primary cluster's URL for
# future use (instead of having the full URL)
self.__primary_host = primary_url.host
except GPUdbException as ex:
self.__log_debug( "Problem parsing primary host '{}': {}"
"".format( str(self.primary_host),
str(ex) ) )
# No-op if it's not a fully qualified URL (e.g. the user
# may have only given a hostname)
# end try
# end if
self.__log_debug( "Consolidated list of {} URLs to process: {}"
"".format( len(url_deque),
[str(u) for u in list( url_deque )] ) )
# Note that we're updating the member here
self.__cluster_info = []
# We will store API-discovered URLs even if we cannot communicate with
# any server at that address (it might be temporarily down)
num_user_given_urls = len( url_deque )
num_processed_urls = 0
is_discovered_url = False
# We need to keep track of whether all the user given URLs all belong to
# the same cluster (for the purpose of primary choosing)
cluster_index_for_user_given_urls = []
# Parse each user given URL (until the queue is empty)
while ( url_deque ):
url = url_deque.popleft()
url_str = str( url )
self.__log_debug( "Processing URL: {}".format( url_str ) )
self.__log_debug( "Remaining {} URL(s): {}"
"".format( len(url_deque),
[ str(u) for u in list( url_deque ) ] ) )
# Figure out if this URL is user given or discovered by the API
if (num_processed_urls >= num_user_given_urls):
self.__log_debug( "This URL is API-discovered" )
is_discovered_url = True
# end if
num_processed_urls += 1
# Skip processing this URL if the hostname/IP address is used in
# any of the known (already registered) clusters
index_of_hostname_in_ring = self.__get_index_of_cluster_containing_node( url.host )
if ( index_of_hostname_in_ring != -1 ):
# Save the fact that this user given URL belongs to an existing
# cluster
if not is_discovered_url:
self.__log_debug("Skipping user-given URL {} (already found); adding index {} to user-given processed cluster list"
"".format(url_str, index_of_hostname_in_ring) )
cluster_index_for_user_given_urls.append( index_of_hostname_in_ring )
else:
self.__log_debug("Skipping discovered URL {} (already found)".format( url_str ) )
# end if
continue
# end if
# Skip auto-discovery of cluster information if the user says so
if ( self.__disable_auto_discovery ):
if not is_discovered_url:
self.__log_debug("Skipping connect verification of user-given URL {} (auto-discovery disabled); "
"adding index {} to user-given processed cluster list"
"".format(url_str, len(self.__cluster_info)) )
# Mark this user-given URL as a valid cluster
cluster_index_for_user_given_urls.append( len(self.__cluster_info) )
else:
self.__log_debug("Skipping connect verification of API-discovered URL {} (auto-discovery disabled)"
"".format( url_str ) )
# end if
# Create a cluster info object with just the given URL and the
# host manager port in the option
cluster_info = GPUdb.ClusterAddressInfo( url,
host_manager_port = self.options.host_manager_port,
logging_level = self.log.getEffectiveLevel() )
self.__cluster_info.append( cluster_info )
self.__log_debug( "Added cluster: {}".format( str(cluster_info) ) )
continue # skip to the next URL
# end if
# Skip processing this URL if Kinetica is not running at this address
if not self.__is_system_running( url = url ):
# If this URL has been discovered by the API, then add it to
# the cluster list anyway
if ( is_discovered_url ):
# Create a cluster info object with just the given URL and the
# host manager port in the option
cluster_info = self.__create_cluster_address_info_with_hm_port( url,
self.options.host_manager_port )
self.__cluster_info.append( cluster_info )
self.__log_debug( "Added non-running cluster with API-discovered URL: {}".format( str(cluster_info) ) )
else:
self.__log_debug( "Skipping non-running user-given URL: {}".format( url_str ) )
# end if
continue
# end if
# Get system properties of the cluster, if can't get it, skip
# to the next one
try:
sys_props = self.__get_system_properties( url )
except GPUdbException as ex:
# If this URL has been discovered by the API, then add it to
# the cluster list anyway
if ( is_discovered_url ):
# Create a cluster info object with just the given URL and the
# host manager port in the option
cluster_info = self.__create_cluster_address_info_with_hm_port( url,
self.options.host_manager_port )
self.__cluster_info.append( cluster_info )
self.__log_debug( "Added failed system properties lookup cluster with API-discovered URL: {}".format( str(cluster_info) ) )
else:
self.__log_debug( "Skipping failed system properties lookup user-given URL: {}".format( url_str ) )
# end if
continue
# end try
# Create an object to store all the information about this cluster
# (this could fail due to a host name regex mismatch)
cluster_info = self.__create_cluster_address_info( url, sys_props )
# We need to evaluate if we should save the user-given addresses
if not is_discovered_url:
# Check if the user-given URL is in the server's list of rank URLs;
# if not, the connection may need to be handled differently
if not cluster_info.does_cluster_contain_node( url.host ):
self.__log_debug("Obtained cluster addresses do not contain user given URL: {}".format(url_str) )
# Check if the server given head node address is reachable.
# If so, use that URL instead of the user-given one.
# If not, the user will not be able to use the server-known
# address for connecting normally. The API will need to
# reprocess the user-given URLs with auto-discovery
# disabled, so that the user can issue database commands,
# but where multi-head operations will not be available.
if not self.__is_system_running( cluster_info.head_rank_url ):
self.__log_warn("Disabling auto-discovery & multi-head operations--"
"cluster reachable with user-given URL <{}> but not with server-known URL <{}>"
"".format(url_str, cluster_info.head_rank_url))
# Disable auto-discovery and throw exception to reprocess user-given URLs
self.__disable_auto_discovery = True
raise GPUdbException( "Could not connect to server-known head node address: {} (user given URL: {})"
.format(cluster_info, url_str) )
# end if
# end if
self.__log_debug( "Verified connectivity with user-given URL {}; adding index {} to user-given processed cluster list"
"".format( url_str, len( self.__cluster_info) ) )
cluster_index_for_user_given_urls.append( len(self.__cluster_info) )
# end if
self.__cluster_info.append( cluster_info )
self.__log_debug( "Added URL {} -> cluster {}".format( url_str, str(cluster_info) ) )
self.__log_debug( "URLs queue after processing this URL (size {}): {}"
"".format( len(url_deque),
[ str(u) for u in list(url_deque) ] ) )
# Parse the HA ring head nodes in the properties and add them
# to this queue (only if we haven't processed them already).
# This could fail due to a hostname regex mismatch.
ha_ring_head_node_urls = self.__get_ha_ring_head_node_urls( sys_props )
self.__log_debug( "Got HA ring head node URLs: {}"
"".format( [ str(u)
for u in ha_ring_head_node_urls] ) )
for ha_url in ha_ring_head_node_urls:
if ( self.__get_index_of_cluster_containing_node( ha_url.host ) == -1 ):
# We have not encountered this cluster yet; add it to the
# deque of URLs to process
self.__log_debug( "HA ring head node URL {} not found in known clusters; "
"adding to queue to process".format(str(ha_url)))
url_deque.append( ha_url )
else:
self.__log_debug( "HA ring head node URL {} found in known clusters; "
"skipping".format(str(ha_url)))
# end if
# end for
self.__log_debug( "URLs queue after processing this HA ring's head node URLs (size {}: {})"
"".format( len(url_deque),
[ str(u) for u in list(url_deque) ] ) )
# end while
# Check that we have got at least one working URL
if ( self.__get_ha_ring_size() == 0 ):
self.__log_error( "No clusters found at user given URLs {}!"
"".format( urls_str ) )
raise GPUdbException( "Could not connect to any working Kinetica server, given URLs: {}"
"".format( urls_str ) )
# end if
# Set the primary cluster & head node
if ( self.__get_ha_ring_size() == 1 ):
# Mark the single cluster as the primary cluster
self.__cluster_info[ 0 ].is_primary_cluster = True
# Update the primary cluster head node hostname, as the original
# one may have been a worker node
original_primary_host = self.primary_host
self.__primary_host = self.__cluster_info[ 0 ].head_rank_url.host
# Also save it in the options for the future
self.options.primary_host = self.primary_host
self.__log_debug( "Updated primary host name {} -> {} for single-cluster connection"
"".format( original_primary_host, self.primary_host ) )
else:
# If the user has not given any primary host AND all the user
# given URLs belong to a single cluster, set that as the primary
if (not self.primary_host):
all_urls_in_same_cluster = ( cluster_index_for_user_given_urls.count(
cluster_index_for_user_given_urls[0] )
== len( cluster_index_for_user_given_urls ) )
if all_urls_in_same_cluster:
primary_index = cluster_index_for_user_given_urls[0]
# Save the hostname of the newly identified primary cluster
original_primary_host = self.primary_host
self.__primary_host = self.__cluster_info[ primary_index ].head_rank_url.host
# Also save it in the options
self.options.primary_host = self.primary_host
self.__log_debug( "Updated primary host name {} -> {} for multi-cluster connection"
"".format( original_primary_host, self.primary_host ) )
else:
self.__log_debug( "Could not update primary host name for multi-cluster connection, as user-given URLs belong to different clusters" )
# end innermost if
# end if
# end if
# Flag the primary cluster as such and ensure it's the first element in
# host_addresses
# ----------------------------------------------------------------------
if self.primary_host:
# Check if the primary host exists in the list of user given hosts
primary_index = self.__get_index_of_cluster_containing_node( self.primary_host )
self.__log_debug( "Checking if the primary cluster is in the ring; index: {}"
"".format( primary_index ) )
if ( primary_index != -1 ):
self.__log_debug( "Setting that cluster as primary" )
# There is a match; mark the respective cluster as the primary cluster
primary_cluster = self.__cluster_info[ primary_index ]
primary_cluster.is_primary_cluster = True
if ( primary_index > 0 ):
self.__log_debug( "Moving primary cluster to the front of the list" )
# Note: Do not combine the nested if with the top level if; will change
# logic and may end up getting duplicates of the primary URL
# Move the primary URL to the front of the list
self.__cluster_info.remove( primary_cluster )
self.__cluster_info.insert( 0, primary_cluster )
# end inner if
else:
# Note that if no primary URL is specified by the user, then primary_index
# above would be -1; but we need not handle that case since it would be
# a no-op
self.__log_debug( "Designated primary cluster with host {} not found in cluster list"
"".format( self.primary_host ) )
# end if
# end if
# Randomize the URL indices taking care that the primary cluster is
# always at the front
self.__randomize_clusters()
# end __parse_urls_once
def __randomize_clusters( self ):
"""Randomly shuffles the list of high availability URL indices so that HA
failover happens at a random fashion. One caveat is when a primary host
is given by the user; in that case, we need to keep the primary host's
index as the first one in the list so that upon failover, when we cricle
back, we always pick the first/primary host up again.
Also, with the new ha failover order, it's not always random. We might
want to keep it in the order found.
"""
# Re-create the list of HA URL indices (automatically in an
# monotonically increasing order)
self.__cluster_indices = []
for i in range(0, self.__get_ha_ring_size()):
self.__cluster_indices.append( i )
# end
# If the user chose to failover in a random fashion, we need to
# shuffle the list (while ensure the primary always gets chosen
# first)
if ( self.__ha_failover_order == GPUdb.HAFailoverOrder.RANDOM ):
if not self.primary_host:
self.__log_debug( "Randomizing all clusters for HA failover--no primary host given" )
# We don't have any primary URL; so treat all URLs similarly
# Randomly order the HA clusters and pick one to start working with
random.shuffle( self.__cluster_indices )
else:
self.__log_debug( "Randomizing all cluster for HA failover except for primary host {}"
"".format( str( self.primary_host ) ) )
# Shuffle from the 2nd element onward, only if there are more than
# two elements, of course
if ( len( self.__cluster_indices ) > 2 ):
# Shuffle from the 2nd element onward
non_primary_host_indices = list( range(1, len(self.__cluster_indices)) )
random.shuffle( non_primary_host_indices )
# Put them back together
self.__cluster_indices = ([ 0 ] + non_primary_host_indices)
# end inner if
# end inner if
# end if
# This will keep track of which cluster to pick next (an index of
# randomly shuffled indices)
self.__set_curr_cluster_index_pointer( 0 )
# end __randomize_clusters
def __get_index_of_cluster_containing_node( self, hostname ):
"""Given a hostname or IP address, check if the known clusters
have/contain it.
Parameters:
hostname (str)
The hostname or IP address to search for.
Returns:
The index of the cluster that contains this node; -1
if not found in the system.
"""
if ( not self.__cluster_info ):
return -1
# end if
# Check each cluster for the hostname/IP
i = 0
for cluster_address in self.__cluster_info:
if ( cluster_address.does_cluster_contain_node( hostname ) ):
self.__log_debug( "Host match found in cluster #{}".format( i ) )
return i
# end if
i += 1 # need to increase the index!
# end for
# Did not find any cluster that uses/has the given hostname/IP address
self.__log_debug( "Did not find any cluster with hostname <{}>".format( hostname ) )
return -1
# end __get_index_of_cluster_containing_node
def __get_system_status_information( self, url ):
"""Given a URL, return the system status information.
Parameters:
url (:class:`GPUdb.URL`)
The URL of the server to get information from.
Returns:
A dict containing the system status
"""
# Call /show/system/status at the given URL
try:
self.__log_debug( "Getting system status for URL: {}".format( str(url) ) )
sys_status = self.__submit_request( C._ENDPOINT_SHOW_SYSTEM_STATUS,
{"options": {}},
url = url,
timeout = self.__server_connection_timeout,
convert_to_attr_dict = True )
if not sys_status.is_ok():
raise GPUdbException( "Could not obtain system status: {}"
"".format( sys_status.get_error_msg() ) )
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
# Also propagate special connection or exit errors
self.__log_debug("Caught conn/exit exception: {}".format( str(ex) ))
raise
except Exception as ex:
raise GPUdbException( "Error calling {} at URL {}: {}"
"".format( C._ENDPOINT_SHOW_SYSTEM_STATUS,
str(url),
GPUdbException.stringify_exception( ex ) ) )
# end try
# Get the 'system' entry in the status response and parse it
if C._SHOW_SYSTEM_STATUS_RESPONSE_SYSTEM not in sys_status.status_map:
raise GPUdbException( "No entry for <{}> in {} status map!"
"".format( C._SHOW_SYSTEM_STATUS_RESPONSE_SYSTEM, C._ENDPOINT_SHOW_SYSTEM_STATUS ) )
# end if
system_status_str = sys_status.status_map[ C._SHOW_SYSTEM_STATUS_RESPONSE_SYSTEM ]
self.__log_debug( "Got system status {} for URL: {}".format( system_status_str, str(url)) )
try:
system_status = json.loads( system_status_str )
except Exception as ex:
raise GPUdbException( "Could not parse system status {} for URL: {}"
"".format( system_status_str, str(url) ) )
return system_status
# end __get_system_status_information
def __is_system_running( self, url = None, sys_status_info = None ):
"""Given a URL, return whether the server is running at that address.
Parameters:
url (:class:`GPUdb.URL`)
The URL of the server to get information from.
Returns:
True if the server is running, False otherwise.
"""
# Validate argument type
if (url is not None):
# If the URL is given, it must be a URL object
if not isinstance( url, GPUdb.URL ):
raise GPUdbException( "Parameter 'url' must be a GPUdb.URL object; "
"given '{}'".format( str(type( url )) ) )
# end inner if
else:
# If the URL is not given, then system status information must be
# given
if sys_status_info is None:
raise GPUdbException( "Parameter 'url' is not given; therefore, "
"parameter 'sys_status_info' must be "
"given; but it is not!")
# end
is_running = False
try:
if sys_status_info is None:
sys_status_info = self.__get_system_status_information( url )
# end if
# Then look for 'status' and see if it is 'running'
# Get the 'system' entry in the status response and parse it
if C._SHOW_SYSTEM_STATUS_RESPONSE_STATUS not in sys_status_info:
raise GPUdbException( "Could not find key '{}' is system status information!"
"".format( C._SHOW_SYSTEM_STATUS_RESPONSE_STATUS ) )
# end if
system_status_str = sys_status_info[ C._SHOW_SYSTEM_STATUS_RESPONSE_STATUS ]
if ( system_status_str == C._SHOW_SYSTEM_STATUS_RESPONSE_RUNNING ):
self.__log_debug( "System running at URL {}"
"".format( str(url) ) )
is_running = True
else:
self.__log_debug( "System not confirmed running at URL {}"
"".format( str(url) ) )
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except Exception as ex:
# Any error means we don't know whether the system is running
self.__log_warn( "Exception checking running status of URL {} -- {}"
"".format( str(url), str(ex) ) )
# end try
return is_running
# end __is_system_running
def __get_system_properties( self, url ):
"""Given a URL, return the system properties information.
Parameters:
url (:class:`GPUdb.URL`)
The URL of the server to get information from.
Returns:
The properties map, a dict object.
"""
# Call /show/system/properties at the given URL
try:
self.__log_debug( "Getting system properties for URL: {}".format( str(url) ) )
sys_prop_resp = self.__submit_request( C._ENDPOINT_SHOW_SYSTEM_PROPERTIES,
{"options": {}},
url = url,
convert_to_attr_dict = True )
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
# Also propagate special connection or exit errors
self.__log_debug("Caught conn/exit exception: {}".format( str(ex) ))
raise
except Exception as ex:
raise GPUdbException( "Error calling {} at URL {}: {}"
"".format( C._ENDPOINT_SHOW_SYSTEM_PROPERTIES,
str(url),
GPUdbException.stringify_exception( ex ) ) )
# end try
if not sys_prop_resp.is_ok():
raise GPUdbException( "Could not get system properties for URL: {} -- {}"
"".format( str(url), sys_prop_resp.get_error_msg() ) )
# Get the property map from the response and return it
property_map = sys_prop_resp.property_map
self.__log_debug( "Got system properties for URL: {}".format( str(url) ) )
# Is HTTPD being used (helps in figuring out the host manager URL)
if C._SYSTEM_PROPERTIES_RESPONSE_ENABLE_HTTPD in property_map:
if (property_map[ C._SYSTEM_PROPERTIES_RESPONSE_ENABLE_HTTPD ].lower() == C._SYSTEM_PROPERTIES_RESPONSE_TRUE):
self.__log_debug( "Setting use httpd to true for URL: {}".format( str(url) ) )
self.__use_httpd = True
# end inner if
# end if
return property_map
# end __get_system_properties
def __update_server_version(self):
"""
Retrieves the server version by calling `__getsystem_properties` and updates
"""
if self.server_version is not None:
return
try:
sys_props = self.__get_system_properties(GPUdb.URL(self.gpudb_full_url))
if C._SYSTEM_PROPERTIES_RESPONSE_SERVER_VERSION in sys_props:
self.server_version = sys_props[C._SYSTEM_PROPERTIES_RESPONSE_SERVER_VERSION]
except GPUdbException as ex:
msg = "Failed to get database version from the server; " + str(ex)
raise GPUdbException(msg)
def __get_server_version( self, sys_properties ):
"""Given system properties, extract the version of Kinetica being run.
If not available, return None.
Parameters:
sys_properties (dict)
A dict containing system properties.
Returns
The string containing the server version.
"""
# Get the conf param for the server version
if C._SYSTEM_PROPERTIES_RESPONSE_SERVER_VERSION not in sys_properties:
return None
return sys_properties[ C._SYSTEM_PROPERTIES_RESPONSE_SERVER_VERSION ]
# end __get_server_version
def __get_rank_urls( self, sys_props, hostname_regex = None ):
"""Given system properties, extract the head- and worker rank URLs.
Parameters:
sys_props (dict)
A dictionary containing all relevant system properties.
hostname_regex (str)
The regex to match the URLs against; if None, then in case the
system properties has multiple URLs for a given rank, choose the
first one.
Returns:
A list of :class:`GPUdb.URL` objects, where the first entry is the
rank-0 URL.
"""
# Get the protocol being used (default is http)
protocol = "http"
if C._SYSTEM_PROPERTIES_RESPONSE_USE_HTTPS in sys_props:
if (sys_props[ C._SYSTEM_PROPERTIES_RESPONSE_USE_HTTPS ]
== C._SYSTEM_PROPERTIES_RESPONSE_TRUE):
protocol = "https"
# end if
rank_urls = []
# Get the rank URLs and process them
if ( (C._SYSTEM_PROPERTIES_RESPONSE_SERVER_URLS in sys_props)
and sys_props[ C._SYSTEM_PROPERTIES_RESPONSE_SERVER_URLS ] ):
server_urls = sys_props[ C._SYSTEM_PROPERTIES_RESPONSE_SERVER_URLS ]
self.__log_debug(
"Known rank URLs <{}> from server: {}{}".format(
C._SYSTEM_PROPERTIES_RESPONSE_SERVER_URLS,
server_urls,
"" if not hostname_regex else " vs. user-given regex: " + hostname_regex
)
)
# Get the URL for each of the ranks
# ---------------------------------
url_lists = server_urls.split(";")
# Parse each entry (corresponds to a rank, could be an
# empty slot for a removed rank)
for i in range(0, len(url_lists)):
rank_url_str = url_lists[i]
# Handle removed ranks (corresponds to empty strings)
if ( not rank_url_str ):
continue
# end if
# Each rank can have multiple URLs associated with it
urls = rank_url_str.split(",")
found = False
for url_str in urls:
# If a regex is given, get a matching URL--if there isn't
# a match, throw an error. If no regex is given, take
# the first URL.
do_add = False
# Ensure it's a valid URL
try:
url = GPUdb.URL( url_str )
except Exception as ex:
raise GPUdbException( "Unable to parse rank URL '{}' "
"".format( url_str) )
# end try
if hostname_regex is None:
# No regex is given, so we'll take the first one
self.__log_debug( "Keeping rank URL: {}".format( url_str ) )
do_add = True
else:
# Check if this URL matches the given regex
do_add = re.match( hostname_regex, url.host )
do_add = (do_add is not None)
if do_add:
self.__log_debug( "Keeping matching rank URL: {}".format( url_str ) )
else:
self.__log_debug( "Skipping non-matching rank URL: {}".format( url_str ) )
# end if
if do_add:
# Found a match (whether a regex is given or not)
rank_urls.append( url )
found = True
break
# end if
# end for
if not found:
# If there's no valid URL matching the regex throw a match error
if (hostname_regex is not None):
raise GPUdbHostnameRegexFailureException(
"No valid matching IP/hostname found for worker: {}".format( i )
)
# end if
# If there's no valid URL throw an error
raise GPUdbException("No valid IP/hostname found for worker: {}".format( i ) )
# end if
# end for
else:
self.__log_debug( "No entry for <{}> in {} response".format(
C._SYSTEM_PROPERTIES_RESPONSE_SERVER_URLS,
C._ENDPOINT_SHOW_SYSTEM_PROPERTIES ))
# end if
return rank_urls
# end __get_rank_urls
def __get_host_names_from_system_properties( self, sys_props, hostname_regex ):
"""Given system properties, extract the hostnames or IP addresses of all the
physical nodes (machines) used in the cluster, whether or not there are
active ranks running on any of them. Each string will contain the protocol
and the hostname or the IP address, e.g. "http://abcd.com",
"https://123.4.5.6".
Parameters:
sys_props (dict)
A dictionary containing all relevant system properties.
hostname_regex (str)
The regex to match the URLs against; if None, then in case the
system properties has multiple URLs for a given rank, choose the
first one.
Returns:
A list of strings containing hostnames or IP addresses along with
the protocol. These are not full URLs.
"""
self.__log_debug( "Extracting server-known host names from system properties{}"
"".format( "" if not hostname_regex else " using user-given regex: " + hostname_regex ) )
# Get the total number of hosts/machines in the cluster
if C._SYSTEM_PROPERTIES_RESPONSE_NUM_HOSTS not in sys_props:
raise GPUdbException( "Missing value for {}"
"".format( C._SYSTEM_PROPERTIES_RESPONSE_NUM_HOSTS ) )
num_hosts = sys_props[ C._SYSTEM_PROPERTIES_RESPONSE_NUM_HOSTS ]
try:
num_hosts = int( num_hosts )
except:
raise GPUdbException( "Unparsable entry for '{}' ({}); need an integer"
"".format( C._SYSTEM_PROPERTIES_RESPONSE_NUM_HOSTS, num_hosts ) )
# end try
# Extract the hostnames from the system properties
cluster_host_names = []
for i in range(0, num_hosts):
# Each hostname is listed individually in the system properties
# as 'conf.host<i>_public_urls'
hostname_key = "conf.host{}_public_urls".format( i )
if hostname_key not in sys_props:
raise GPUdbException( "Missing value for {}th hostname '{}'"
"".format( i, hostname_key ) )
hostname_str = sys_props[ hostname_key ]
# Each host can have multiple hostnames associated with it
hostnames = hostname_str.split(",")
found = False
# Try to find a usable hostname for this host
for j in range(0, len(hostnames)):
hostname = hostnames[ j ]
# If a regex is given, get a matching hostname--if there isn't
# a match, throw an error. If no regex is given, take
# the first hostname.
do_add = False
if hostname_regex is None:
# No regex is given, so we'll take the first one
self.__log_debug( "Keeping hostname: {}".format( hostname ) )
do_add = True
else:
# The hostname might have the protocol; strip that out
split_hostname = hostname.split( "://" )
if ( len(split_hostname) > 1 ):
host = split_hostname[ 1 ]
else:
host = split_hostname[ 0 ]
# end if
# Check if this hostname matches the regex
do_add = re.match( hostname_regex, host )
do_add = (do_add is not None)
if do_add:
self.__log_debug( "Keeping matching hostname: {}".format( hostname ) )
else:
self.__log_debug( "Skipping non-matching hostname: {}".format( hostname ) )
# end if
if ( do_add ):
# We've decided to use this hostname
cluster_host_names.append( hostname )
found = True
break
# end if
# end for
if not found:
# No eligible hostname found!
if (hostname_regex is not None):
# The reason we don't have a URL is because it didn't
# match the given reges
msg = ("No matching hostname found for host #{} (given hostname regex {})"
"".format( i, hostname_regex.pattern ) )
raise GPUdbHostnameRegexFailureException( msg )
# end inner if
raise GPUdbException("No matching hostname found for host #{}."
"".format( i ) )
# end if
# end for
return cluster_host_names
# end __get_host_names_from_system_properties
def __create_host_manager_url( self, url, host_manager_port ):
"""Given a :class:`GPUdb.URL` and a host manager port, create
another :class:`GPUdb.URL` object that represents the host manager
URL. Return that.
"""
# Create the host manager URL
try:
# Create the host manager URL using the user given (or default) port
if ( ( self.__use_httpd == True )
and ( len(url.path) > 0) ):
# We're using HTTPD, so use the appropriate URL
# (likely, http[s]://hostname_or_IP:port/gpudb-host-manager)
# We won't always have a port (e.g. for cloud instances). So,
# just replace the last bit of the path, i.e. gpudb-X with
# gpudb-host-manager
url_split_on_slash = url.url.rsplit( '/' )
url_split_on_slash[ -1 ] = "gpudb-host-manager"
host_manager_url = '/'.join( url_split_on_slash )
host_manager_url = GPUdb.URL( host_manager_url )
else:
# The host manager URL shouldn't use any path and
# use the host manager port
host_manager_url = GPUdb.URL( "{protocol}://{host}:{port}"
"".format( protocol = url.protocol,
host = url.host,
port = host_manager_port ) )
# end if
except Exception as ex:
raise GPUdbException( "Error creating the host manager URL: {}".format(GPUdbException.stringify_exception( ex )) )
self.__log_debug( "Created host manager URL: {}".format( host_manager_url ) )
return host_manager_url
# end __create_host_manager_url
def __create_cluster_address_info_with_hm_port( self, url,
host_manager_port ):
"""Given the host manager port and a URL, create a
:class:`GPUdb.ClusterAddressInfo` object and return it.
-- active head rank URL
-- all worker rank URLs
-- host manager URL
-- hostnames for all the nodes in the cluster
Parameters:
url (GPUdb.URL)
The URL of the cluster.
host_manager_port (string)
The host manager port.
Returns:
A :class:`GPUdb.ClusterAddressInfo` object.
"""
# Create the host manager URL
try:
host_manager_url = self.__create_host_manager_url( url,
host_manager_port )
except Exception as ex:
raise GPUdbException( GPUdbException.stringify_exception( ex ) )
# Create an object to store all the information about this cluster
cluster_info = GPUdb.ClusterAddressInfo( url,
host_manager_url = host_manager_url,
logging_level = self.log.getEffectiveLevel() )
# Check if this cluster is the primary cluster
self.__log_debug( "Checking if this is the primary cluster; "
"self.primary_host: {}"
"".format( self.primary_host ) )
if ( self.primary_host
and cluster_info.does_cluster_contain_node( self.primary_host ) ):
# Yes, it is; mark this cluster as the primary cluster
cluster_info.is_primary_cluster = True
# end if
self.__log_debug( "Is primary cluster?: {}"
"".format( cluster_info.is_primary_cluster ) )
return cluster_info
# end __create_cluster_address_info_with_hm_port
def __create_cluster_address_info( self, url, sys_props ):
"""Given system properties, extract all the relevant address information
about the cluster and create an object containing the following:
-- active head rank URL
-- all worker rank URLs
-- host manager URL
-- hostnames for all the nodes in the cluster
Parameters:
url (GPUdb.URL)
The URL of the cluster the information about which we need to
create.
sys_props (dict)
A dictionary containing system properties for the cluster.
Returns:
A :class:`GPUdb.ClusterAddressInfo` object.
"""
self.__log_debug( "Establishing a cluster record associated with URL: {}"
"".format( str(url) ) )
# Get the rank URLs (head and worker ones)
rank_urls = self.__get_rank_urls( sys_props, self.options.hostname_regex )
# Get the head node URL and keep it separately
if ( len(rank_urls) > 0 ):
self.__log_debug( "Assigning head rank URL {} from server-known rank URLs: {}"
"".format( rank_urls[0], [str(u) for u in rank_urls] ) )
head_rank_url = rank_urls.pop( 0 )
else:
# No ranks were found from system properties; so just use the given
# URL as the head rank URL
self.__log_debug( "Assigning head rank URL to the user-given one {}, as no server-known worker rank URLs found"
"".format( str(url) ) )
head_rank_url = url
# end if
# Get hostnames for all the nodes/machines in the cluster
cluster_hostnames = self.__get_host_names_from_system_properties( sys_props,
self.options.hostname_regex )
# Create the host manager URL
try:
host_manager_url = self.__create_host_manager_url( head_rank_url,
self.options.host_manager_port )
except Exception as ex:
raise GPUdbException( GPUdbException.stringify_exception( ex ) )
# Get the version of Kinetica run at this cluster (None if not found)
server_version = self.__get_server_version( sys_props )
# Create an object to store all the information about this cluster
cluster_info = GPUdb.ClusterAddressInfo( head_rank_url,
rank_urls,
cluster_hostnames,
host_manager_url,
is_primary_cluster = False,
server_version = server_version,
logging_level = self.log.getEffectiveLevel() )
# Check if this cluster is the primary cluster
if ( self.primary_host
and cluster_info.does_cluster_contain_node( self.primary_host ) ):
# Yes, it is; mark this cluster as the primary cluster
cluster_info.is_primary_cluster = True
self.__log_debug( "Marked this cluster as primary" )
# end if
return cluster_info
# end __create_cluster_address_info
def __get_ha_ring_head_node_urls( self, sys_props ):
"""Given system properties, extract the head node URLs for the
high-availability cluster.
Parameters:
sys_props (dict)
A dictionary containing system properties for some cluster.
Returns:
A list of full URLs for each of the head nodes in the
high availability cluster, if any is set up.
"""
hostname_regex = self.options.hostname_regex
self.__log_debug( "Extracting server-known HA head node URLs from system properties{}"
"".format( "" if not hostname_regex else " using user-given regex: " + hostname_regex ) )
# If HA is no set up, just return an empty list
if C._enable_ha not in sys_props:
return []
# Same deal as above...
if sys_props[ C._enable_ha ].lower() != "true":
return []
# Return an empty list if no HA ring head node URLs are found
if C._SYSTEM_PROPERTIES_RESPONSE_HEAD_NODE_URLS not in sys_props:
return []
ha_ring_head_nodes_str = sys_props[ C._SYSTEM_PROPERTIES_RESPONSE_HEAD_NODE_URLS ]
if ( len( ha_ring_head_nodes_str ) == 0 ):
return []
# Parse the HA ring head node addresses
# -------------------------------------
ha_ring_head_node_urls = []
ha_ring_head_nodes_url_lists = ha_ring_head_nodes_str.split(";")
# Parse each entry (corresponds to a cluster)
for i in range(0, len(ha_ring_head_nodes_url_lists)):
# Each cluster's head node can have multiple URLs associated with it
urls = ha_ring_head_nodes_url_lists[ i ].split(",")
found = False
# Pick one URL out of the many
for j in range(0, len(urls)):
url_str = urls[ j ]
# If a regex is given, get a matching URL--if there isn't
# a match, throw an error. If no regex is given, take
# the first URL.
do_add = False
# Ensure it's a valid URL
try:
url = GPUdb.URL( url_str )
except Exception as ex:
raise GPUdbException( "Unable to parse HA head node URL '{}' "
"".format( url_str) )
# end try
if hostname_regex is None:
# No regex is given, so we'll take the first one
self.__log_debug( "Keeping head node URL: {}".format( url_str ) )
do_add = True
else:
# Check if this URL matches the given regex
do_add = re.match( hostname_regex, url.host )
do_add = (do_add is not None)
if do_add:
self.__log_debug( "Keeping matching head node URL: {}".format( url_str ) )
else:
self.__log_debug( "Skipping non-matching head node URL: {}".format( url_str ) )
# end if
if ( do_add ):
# Found a match (whether a regex is given or not)
ha_ring_head_node_urls.append( url )
found = True
break
# end if
# end for
if not found:
# No eligible hostname found!
if (hostname_regex is not None):
# The reason we don't have a URL is because it didn't
# match the given reges
msg = ("No matching IP/hostname found for cluster with head "
"node URLs {} (given hostname regex {})"
"".format( ha_ring_head_nodes_url_lists[ i ],
hostname_regex.pattern ) )
raise GPUdbHostnameRegexFailureException( msg )
# end if
# We couldn't find it for some other reason
raise GPUdbException("No matching IP/hostname found "
"for cluster with head node URLs {}"
"".format( ha_ring_head_nodes_url_lists[ i ] ) )
# end if
# end for
return ha_ring_head_node_urls
# end __get_ha_ring_head_node_urls
def __increment_num_cluster_switches( self ):
"""Gets the number of times the client has switched to a different
cluster amongst the high availability ring.
This method is not thread safe.
"""
self.__num_cluster_switches = (self.__num_cluster_switches + 1)
# end __increment_num_cluster_switches
def __get_curr_cluster_index_pointer( self ):
"""Internal helper method.
"""
# Return the pointer to the current cluster. This is the index
# for `self.__cluster_indices`. This member is a list of integers
# which are the *actual* indices for `self.__cluster_info`. So, this
# is how it works:
#
# `self.__cluster_info` is a list of :class:`GPUdb.ClusterAddressInfo`
# objects. Each member of this list has all the relevant information
# for a single Kinetica cluster. For example, it may be like this:
# [cluster1, cluster2, cluster3].
#
# `self.__cluster_indices` is a list of integers. This list contains
# values from range(0, len(self.__cluster_info)). But they could
# be in any order. For example, we could have [2, 0, 1].
#
# Now, `self.__curr_cluster_index_pointer` is a single integer with
# a value from range(0, len(self.__cluster_indices)). We always
# increment this monotonically until it needs to go back to 0.
#
# We pick the "current" cluster as such:
# self.__cluster_info[ self.__cluster_indices[ self._curr_cluster_index_pointer ] ]
# or, in other words:
# self.__cluster_info[ self.__cluster_indices[ value_returned_by_this_method ] ]
return self.__curr_cluster_index_pointer
# end __get_curr_cluster_index_pointer
def __set_curr_cluster_index_pointer( self, value ):
"""Set the current cluster index. Only positive integers allowed.
"""
try:
value = int( value )
except:
msg = ("Argument 'value' must be a positive integer;"
" given '{}'".format( str(type(value)) ))
self.__log_debug( msg )
raise GPUdbException( msg )
# end try
if value < 0:
msg = ("Argument 'value' must be a positive integer value; "
"given '{}'".format( value ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
self.__curr_cluster_index_pointer = value
# end __set_curr_cluster_index_pointer
def __get_curr_cluster_index( self ):
"""Internal helper method. Returns the current cluster index
so that the correct cluster can be picked.
Note that there is no matching setter method for this. We *only*
use :meth:`__set_curr_cluster_index_pointer`.
"""
# Return the index for picking the current cluster. This is the *value*
# from `self.__cluster_indices`.
#
# We pick the "current" cluster as such:
# self.__cluster_info[ value_returned_by_this_method ]
return self.__cluster_indices[ self.__get_curr_cluster_index_pointer() ]
# end __get_curr_cluster_index
def __perform_version_check( self, do_print_warning = True ):
"""Perform a version check with the database server.
Parameters:
do_print_warning (bool)
If True, print a warning on version mismatch.
@returns True if versions match, False if they don't, and None if
the server version is unknown.
"""
# Get the active server's version
server_version = self.server_version
# When the server's version is unknown, return None and print a
# warning, if applicable
if server_version is None:
if (do_print_warning == True):
msg = ("Server version is unknown; cannot perform a "
"compatibility check with this running client" )
self.__log_warn( msg )
# end if
return None
# end if
if not self.__client_version.is_version_compatible( server_version ):
# Version available but is not compatible
if (do_print_warning == True):
msg = ("Client version ({}) does not match that of the "
"server ({})".format( str( self.__client_version ),
str( server_version ) ) )
self.__log_warn( msg )
# end if
return False
# end if
return True # all is well
# end __perform_version_check
[docs] def get_version_info( self ):
"""Return the version information for this API."""
return self.api_version
# end get_version_info
[docs] def get_host( self ):
"""Return the host this client is talking to."""
return self.get_url( stringified = False ).host
# end get_host
[docs] def get_primary_host( self ):
"""Return the primary host for this client."""
return self.__primary_host
# end get_primary_host
[docs] def set_primary_host( self, new_primary_host,
start_using_new_primary_host = False,
delete_old_primary_host = False ):
"""Set the primary host for this client. Start using this host
per the user's directions. Also, either delete any existing primary
host information, or relegate it to the ranks of a backup host.
Parameters:
value (str)
A string containing the full URL of the new primary host (of
the format 'http[s]://X.X.X.X:PORT[/httpd-name]'). Must have
valid URL format. May be part of the given back-up hosts, or
be a completely new one.
start_using_new_primary_host (bool)
Boolean flag indicating if the new primary host should be used
starting immediately. Please be cautious about setting the value
of this flag to True; there may be unintended consequences regarding
query chaining. Caveat: if values given is False, but
*delete_old_primary_host* is True and the old primary host, if any,
was being used at the time of this function call, then the client
still DOES switch over to the new primary host. Default value is False.
delete_old_primary_host (bool)
Boolean flag indicating that if a primary host was already set, delete
that information. If False, then any existing primary host URL would
treated as a regular back-up cluster's host. Default value is False.
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method will no longer be
functional. This method will be a no-op, not changing primary host.
port. The method will be removed in version 7.2.0.0. The only
way to set the primary host is via `GPUdb.Options` at `GPUdb`
initialization. It cannot be changed after that.
"""
pass
# end set_primary_host
[docs] def get_port( self ):
"""Return the port the host is listening to."""
return self.get_url( stringified = False ).port
# end get_port
[docs] def get_host_manager_port( self ):
"""Return the port the host manager is listening to."""
return self.get_hm_url( stringified = False ).port
# end get_host_manager_port
[docs] def get_url( self, stringified = True ):
"""Return the GPUdb.URL or its string representation that points to the
current head node of the current cluster in use.
Parameters:
stringified (bool)
Optional argument. If True, return the string representation,
otherwise return the :class:`GPUdb.URL` object. Default is
True.
Returns:
The :class:`GPUdb.URL` object or its string representation.
"""
# Ensure we have some cluster information first!
if not self.__cluster_info:
raise GPUdbException( "No cluster registered with the API yet!" )
# Get the current URL
url = self.current_cluster_info.head_rank_url
if stringified:
return str(url)
else:
return url
# end get_url
[docs] def get_hm_url( self, stringified = True ):
"""Return the GPUdb.URL or its string representation that points to the
current host manager of the current cluster in use.
Parameters:
stringified (bool)
Optional argument. If True, return the string representation,
otherwise return the :class:`GPUdb.URL` object. Default is
True.
Returns:
The :class:`GPUdb.URL` object or its string representation.
"""
# Get the current URL
url = self.current_cluster_info.host_manager_url
if stringified:
return str(url)
else:
return url
# end get_hm_url
[docs] def get_failover_urls( self ):
"""Return a list of the head node URLs for each of the clusters in the
HA ring in failover order.
Returns:
A list of :class:`GPUdb.URL` objects.
"""
# Get the current URL
return [ self.__cluster_info[cluster_index].head_rank_url for cluster_index in self.__cluster_indices ]
# end get_failover_urls
[docs] def get_head_node_urls( self ):
"""Return a list of the head node URLs for each of the clusters in the
HA ring for the database server.
Returns:
A list of :class:`GPUdb.URL` objects.
"""
# Get the current URL
return [ cluster.head_rank_url for cluster in self.__cluster_info ]
# end get_head_node_urls
[docs] def get_num_cluster_switches( self ):
"""Gets the number of times the client has switched to a different
cluster amongst the high availability ring.
"""
return self.__num_cluster_switches
# end get_num_cluster_switches
@property
def current_cluster_info( self ):
"""Return the :class:`GPUdb.ClusterAddressInfo` object
containing information on the current/active cluster."""
return self.__cluster_info[ self.__get_curr_cluster_index() ]
# end all_cluster_info
@property
def all_cluster_info( self ):
"""Return the list of :class:`GPUdb.ClusterAddressInfo` objects
that contain address of each of the clusters in the ring."""
return self.__cluster_info
# end all_cluster_info
@property
def ha_ring_size( self ):
"""Return the list of :class:`GPUdb.ClusterAddressInfo` objects
that contain address of each of the clusters in the ring."""
return len( self.__cluster_info )
# end all_cluster_info
@property
def options( self ):
"""Return the :class:`GPUdb.Options` object that contains all
the knobs the user can turn for controlling this class's behavior.
"""
return self.__options
# end options
@property
def host(self):
return self.get_host()
@host.setter
def host(self, value):
"""
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method will no longer be
functional. This method will be a no-op, not changing host.
The method will be removed in version 7.2.0.0. The only
way to set the host at `GPUdb` initialization. It cannot be
changed after that.
"""
pass
# end host setter
@property
def port(self):
return self.get_port()
@port.setter
def port(self, value):
"""
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method will no longer be
functional. This method will be a no-op, not changing port.
The method will be removed in version 7.2.0.0. The only
way to set the port at `GPUdb` initialization. It cannot be
changed after that.
"""
pass
# end port setter
@property
def host_manager_port(self):
return self.get_host_manager_port()
@host_manager_port.setter
def host_manager_port(self, value):
"""
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method will no longer be
functional. This method will be a no-op, not changing host manager
port. The method will be removed in version 7.2.0.0. The only
way to set the host manager port is via `GPUdb.Options` at `GPUdb`
initialization. It cannot be changed after that.
"""
pass
# end host_manager_port setter
@property
def gpudb_url_path(self):
return self.get_url( stringified = False ).path
@gpudb_url_path.setter
def gpudb_url_path(self, value):
"""
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method will no longer be
functional. This method will be a no-op, not changing URL path
The method will be removed in version 7.2.0.0. The only
way to set the URL path is via `GPUdb.Options` at `GPUdb`
initialization. It cannot be changed after that.
"""
pass
# end gpudb_url_path setter
@property
def gpudb_full_url(self):
"""Returns the full URL of the current head rank of the currently
active cluster."""
return self.get_url( stringified = False ).url
@property
def server_version(self):
"""Returns the :class:`GPUdb.Version` object representing the version of
the *currently active* cluster of the Kinetica server."""
return self.current_cluster_info.server_version
@server_version.setter
def server_version(self, value):
self.current_cluster_info.server_version = value
@property
def connection(self):
return self.__protocol
@connection.setter
def connection(self, value):
"""
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method will no longer be
functional. This method will be a no-op, not changing protocol
The method will be removed in version 7.2.0.0. The only
way to set the protocol is via `GPUdb.Options` at `GPUdb`
initialization. It cannot be changed after that.
"""
pass
# end connection setter
@property
def protocol(self):
"""Returns the HTTP protocol being used by the :class:`GPUdb`
object to communicate to the database server.
"""
return self.__protocol
@property
def primary_host(self):
"""Returns the primary hostname."""
return self.__primary_host
@property
def username(self):
"""Gets the username to be used for authentication to GPUdb."""
return self.__username
@property
def password(self):
"""Gets the password to be used for authentication to GPUdb."""
return self.__password
@property
def encoding(self):
return self.__encoding
@property
def timeout(self):
"""Gets the timeout used for http connections to GPUdb."""
return self.__timeout
@property
def disable_auto_discovery(self):
"""Returns whether auto-discovery has been disabled."""
return self.__disable_auto_discovery
@property
def ha_sync_mode(self):
return self._ha_sync_mode
@ha_sync_mode.setter
def ha_sync_mode(self, value ):
if not isinstance( value, GPUdb.HASynchronicityMode ):
raise GPUdbException( "HA sync mode must be of type '{}', given {}!"
"".format( str( GPUdb.HASynchronicityMode ),
str( type( value ) ) ) )
# end error checking
self._ha_sync_mode = value
# end setter
@property
def logging_level(self):
"""Returns the integer value of the logging level that is being used by
the API. By default, logging is set to NOTSET, and the logger will
honor the root logger's level.
"""
return self.log.level
@property
def skip_ssl_cert_verification(self):
return self.__skip_ssl_cert_check
def save_known_type(self, type_id, _type ):
self._known_types[ type_id ] = _type
@property
def get_known_types(self):
"""Return all known types; if
none, return None.
"""
return self._known_types
# end get_known_types
[docs] def get_known_type(self, type_id, lookup_type = True ):
"""Given an type ID, return any associated known type; if
none is found, then optionally try to look it up and save it.
Otherwise, return None.
Parameters:
type_id (str)
The ID for the type.
lookup_type (bool)
If True, then if the type is not already found, then
to look it up by invoking :meth:`.show_types`, save
it for the future, and return it.
Returns:
The associated RecordType, if found (or looked up). None
otherwise.
"""
if type_id in self._known_types:
return self._known_types[ type_id ]
if lookup_type:
# Get the type info from the database
type_info = self.show_types( type_id = type_id, label = "" )
if not _Util.is_ok( type_info ):
raise GPUdbException( "Error in finding type {}: {}"
"".format( type_id,
_Util.get_error_msg( type_info ) ) )
# Create the RecordType
record_type = RecordType.from_type_schema( label = "",
type_schema = type_info["type_schemas"][ 0 ],
properties = type_info["properties"][ 0 ] )
# Save the RecordType
self._known_types[ type_id ] = record_type
return record_type
# end if
return None # none found
# end get_known_type
[docs] def get_all_available_full_urls( self, stringified = True ):
"""Return the list of :class:`GPUdb.URL` objects or its string
representation that points to the current head node of each of the
clusters in the ring.
Parameters:
stringified (bool)
Optional argument. If True, return the string representation,
otherwise return the :class:`GPUdb.URL` object. Default is
True.
Returns:
The :class:`GPUdb.URL` object or its string representation.
"""
if stringified:
# TODO: Uncomment the commented out part and delete the old version
return [ str(cluster.head_rank_url) for cluster in self.all_cluster_info ]
# return "{host}:{port}".format( host = self.get_host(), port = self.get_port() ) # old version
else:
return [ cluster.head_rank_url for cluster in self.all_cluster_info ]
# end get_all_available_full_urls
# end add_http_header
# end remove_http_header
# end get_http_header
# ==========================================================================
def __load_logger_schemas( self ):
# Some other schemas for internal work
self.logger_request_schema_str = """
{
"type" : "record",
"name" : "logger_request",
"fields" : [
{"name" : "ranks", "type" : {"type" : "array", "items" : "int"}},
{"name" : "log_levels", "type" : {"type" : "map", "values" : "string"}},
{"name" : "options", "type" : {"type" : "map", "values" : "string"}}
]
}
""".replace("\n", "").replace(" ", "")
self.logger_response_schema_str = """
{
"type" : "record",
"name" : "logger_response",
"fields" : [
{"name" : "status" , "type" : "string"},
{"name" : "log_levels", "type" : {"type" : "map", "values" : "string"}}
]
}
""".replace("\n", "").replace(" ", "")
self.logger_request_schema = Schema( "record",
[
("ranks", "array", [("int")]),
("log_levels", "map", [("string")] ),
("options", "map", [("string")])
] )
self.logger_response_schema = Schema( "record",
[
("status" , "string"),
("log_levels", "map", [("string")] )
] )
# Save the logger endpoint schemas for future use
endpoint = "/logger"
self.gpudb_schemas[ endpoint ] = {
"REQ_SCHEMA_STR" : self.logger_request_schema_str,
"RSP_SCHEMA_STR" : self.logger_response_schema_str,
"REQ_SCHEMA" : self.logger_request_schema,
"RSP_SCHEMA" : self.logger_response_schema,
"ENDPOINT" : endpoint
}
# end __load_logger_schemas
# -----------------------------------------------------------------------
# Helper functions
# -----------------------------------------------------------------------
def __log_debug_with_id( self, message ):
if not self.log.isEnabledFor( logging.DEBUG ):
# No-op if debug is not enabled. This is important
# because the inspect module is a time killer!
return
# end if
try:
# Get calling method's information from the stack
stack = inspect.stack()
# stack[1] gives the previous/calling function
filename = stack[1][1].split("/")[-1]
ln = stack[1][2]
func = stack[1][3]
self.log.debug( "[GPUdb::{fn}::{line}::{func}] {id} {msg}"
"".format( fn = filename,
func = func, line = ln,
id = self._id,
msg = message ) )
except:
# Some error occurred with inspect; just log the debug message
self.log.debug( "[GPUdb] {id} {msg}"
"".format( id = self._id,
msg = message ) )
# end __debug_with_id
def __log_warn_with_id( self, message ):
self.log.warn( "[GPUdb] {} {}".format( self._id, message ) )
# end __warn_with_id
def __log_info_with_id( self, message ):
self.log.info( "[GPUdb] {} {}".format( self._id, message ) )
# end __log_info_with_id
def __log_error_with_id( self, message ):
self.log.error( "[GPUdb] {} {}".format( self._id, message ) )
# end __log_error_with_id
def __is_log_level_trace_enabled( self ):
"""Returns whether the trace log level is enabled. This is
often required when we need to log messages very judiciously.
Since string concatenation takes a long time, we don't want to
create the log message if trace level is not enabled.
"""
return self.log.isEnabledFor( logging.TRACE )
# end __is_log_level_trace_enabled
def __log_trace( self, message ):
if not self.log.isEnabledFor( logging.TRACE ):
# No-op if trace is not enabled. This is important
# because the inspect module is a time killer!
return
# end if
try:
# Get calling method's information from the stack
stack = inspect.stack()
# stack[1] gives the previous/calling function
filename = stack[1][1].split("/")[-1]
ln = stack[1][2]
func = stack[1][3]
self.log.trace( "[GPUdb::{fn}::{line}::{func}] {msg}"
"".format( fn = filename,
func = func, line = ln,
msg = message ) )
except:
# Some error occurred with inspect; just log the debug message
self.log.trace( "[GPUdb] {msg}"
"".format( msg = message ) )
# end __log_trace
def __log_debug( self, message ):
if not self.log.isEnabledFor( logging.DEBUG ):
# No-op if debug is not enabled. This is important
# because the inspect module is a time killer!
return
# end if
try:
# Get calling method's information from the stack
stack = inspect.stack()
# stack[1] gives the previous/calling function
filename = stack[1][1].split("/")[-1]
ln = stack[1][2]
func = stack[1][3]
self.log.debug( "[GPUdb::{fn}::{line}::{func}] {msg}"
"".format( fn = filename,
func = func, line = ln,
msg = message ) )
except:
# Some error occurred with inspect; just log the debug message
self.log.debug( "[GPUdb] {msg}"
"".format( msg = message ) )
# end __debug
def __log_warn( self, message ):
self.log.warning( "[GPUdb] {}".format( message ) )
# end __warn
def __log_info( self, message ):
self.log.info( "[GPUdb] {}".format( message ) )
# end __log_info
def __log_error( self, message ):
self.log.error( "[GPUdb] {}".format( message ) )
# end __log_error
[docs] def log_debug( self, message ):
"""Logging method for debug.
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method is deprecated,
and may be removed in a future version. Previously,
this was a static method; now it is an instance method.
This method will log messages as intended.
"""
self.__log_debug( message )
# end log_debug
[docs] def log_warn( self, message ):
"""Logging method for warnings.
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method is deprecated,
and may be removed in a future version. Previously,
this was a static method; now it is an instance method.
This method will log messages as intended.
"""
self.__log_warn( message )
# end log_warn
[docs] def log_info( self, message ):
"""Logging method for information.
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method is deprecated,
and may be removed in a future version. Previously,
this was a static method; now it is an instance method.
This method will log messages as intended.
"""
self.__log_info( message )
# end log_info
[docs] def log_error( self, message ):
"""Logging method for error.
.. deprecated:: 7.1.0.0
As of version 7.1.0.0, this method is deprecated,
and may be removed in a future version. Previously,
this was a static method; now it is an instance method.
This method will log messages as intended.
"""
self.__log_error( message )
# end log_error
# ------------------------------------------------------------------------
# Endpoint Submission Related Methods
# ------------------------------------------------------------------------
@classmethod
def _check_error(cls, response):
status = response['status_info']['status']
if (status != 'OK'):
message = response['status_info']['message']
raise GPUdbException('[%s]: %s' % (status, message))
def __submit_request_raw( self, url = None, endpoint = None,
request_body = None,
# enable_compression = False,
timeout = None,
get_req_cext = False,
get_rsp_cext = False,
request_schema = None,
response_schema = None,
convert_to_attr_dict = False,
return_raw_response_too = False ):
"""Submits an arbitrary request to GPUdb via the specified URL and
decodes and returns response. No failover is handled here.
Parameters:
url (GPUdb.URL)
The URL to send the request to
endpoint (str)
The endpoint to use (needed for looking up the appropriate
request and response avro schema).
request_body (dict) # TODO: Is dict right here?
The request object that has already been encoded as appropriate.
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
get_req_cext (bool)
If True, then use the c-extension version of the request schema.
Default is False.
get_rsp_cext (bool)
If True, then use the c-extension version of the response schema.
Default is False.
request_schema (Schema)
Optional argument. The :class:`Schema` object to use to encode
the request object instead of looking one up using the endpoint.
Can be used to test internal endpoints that are not exposed
through this API. If given, `response_schema` must also be
given. Default is None.
response_schema (Schema)
Optional argument. The :class:`Schema` object to use to decode
the response from the server (instead of looking one up using)
the endpoint. Can be used to test internal endpoints that are
not exposed through this API. If given, `request_schema` must
also be given. Default is None.
convert_to_attr_dict (bool)
If True, convert the response to an :class:`AttrDict` object.
If False, return the raw response. Default is False.
return_raw_response_too (bool)
Optional argument. If True, then return the raw response
obtained from the server along with the decoded response.
Default is False.
Returns:
The decoded response object by itself if `return_raw_response_too`
is False. If that argument is True, then return a tuple where the
first element is the decoded response and the second element is the
raw response returned by the server.
"""
# Validate the input arguments
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if request_body is None:
msg = ("Argument 'request_body' must be provided; given None" )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# If no user given timeout given, just use the cached one
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# Get the requst and response schemas, if not given already
if ( (not request_schema) or (not response_schema) ):
try:
( request_schema,
response_schema
) = self.__get_schemas( endpoint,
get_req_cext = get_req_cext,
get_rsp_cext = get_rsp_cext )
# (request_schema, response_schema) = self.__get_schemas( endpoint )
except Exception as ex:
msg = ("Unable to retrieve avro schemas for endpoint '{}': {}"
"".format( endpoint,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
raise GPUdbException( msg)
# end try
# end if
# Log the request and the endpoint at the trace level. Note that since
# string interpolation takes a demonstrably large time (proved via
# benchmarking), we need to first check if the log level is on. That
# way, we only create the interpolated string when it will be used.
if self.__is_log_level_trace_enabled():
self.__log_trace( "Sending {} request {} to {}"
"".format( endpoint, request_body, str(url) ) )
# Encode the request
encoded_request = self.encode_datum_cext( request_schema, request_body )
# Get the header and process the body data
( headers, body_data ) = self.__create_header_and_process_body_data( encoded_request )
http_conn = self.__initialize_http_connection( url, timeout )
try:
# Post the request
path = "{url_path}{endpoint}".format( url_path = url.path,
endpoint = endpoint )
http_conn.request( C._REQUEST_POST, path, body_data, headers )
except ssl.SSLError as ex:
msg = ("Unable to execute SSL handshake with '{}' due to: {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ))
final_msg = self.__SSL_ERROR_MESSAGE_TEMPLATE.format(msg)
self.__log_debug( final_msg )
raise GPUdbUnauthorizedAccessException( final_msg )
except Exception as ex:
msg = ("Error posting to '{}' due to: {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
# TODO: In the Java API, this is an GPUdbExitException; decide what this should be here
raise GPUdbConnectionException( msg )
# end try
# Get the response
try:
response = http_conn.getresponse()
except Exception as ex: # some error occurred; return a message
msg = ( "No response received from {} due to {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
raise GPUdbConnectionException( msg )
# end try
# Read and decode the response, handling any error
try:
response_data = response.read()
response_time = response.getheader('x-request-time-secs', None)
# Check the HTTP status code and throw an exit exception as appropriate
status_code = response.status
response_msg = response.reason
if ( status_code == httplib.UNAUTHORIZED ):
# Unauthorized access gets a different exception
msg = ( "Unauthorized access: '{}'".format( response_msg ) )
self.__log_debug( msg )
raise GPUdbUnauthorizedAccessException( msg )
elif ( status_code in self.__http_response_triggering_failover ):
msg = ( "Could not connect to database at '{}' due to status "
"code {}: {}".format( url.url, status_code, response_msg ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
elif ( status_code == httplib.NOT_FOUND ):
msg = ( "Endpoint not found ({}) due to status "
"code {}: {}".format( url.url, status_code,
response_msg ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
# end if
# Decode the http raw response and extract the endpoint response
# from within it
decoded_response = self.__read_datum_cext( response_schema,
response_data,
None, response_time )
# TODO: Do we need special handling for html/text responses here like the Java API?
# Convert the response to a class that has attributes per
# key-value pair of the response dictionary
if convert_to_attr_dict:
decoded_response = AttrDict( decoded_response )
# end if
if return_raw_response_too:
# Return the decoded and the raw response per the user's wishes
return (decoded_response, response_data)
else:
# Simply return the decoded response
return decoded_response
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
# For special connection or exit errors, just pass them on
self.__log_debug("Caught conn/exit exception: {}".format( str(ex) ))
raise
except GPUdbException as ex:
# An end-of-file problem from the server is also a failover trigger
if C._DB_EOF_FROM_SERVER_ERROR_MESSAGE in str(ex):
msg = ( "Received failover triggering error when trying to "
"connect to {}: {}"
"".format( url.url, str(ex) ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
else:
# All other errors are legitimate, and to be passed on to the
# user
self.__log_debug( "Throwing GPUdb exception; {}".format( str(ex) ) )
raise
# end if
except Exception as ex: # some error occurred; return a message
msg = ("Error reading response from {} for endpoint {}: {}"
"".format( url.url, endpoint,
GPUdbException.stringify_exception( ex ) ) )
# TODO: Or should this be an exit exception also??
self.__log_debug( "Throwing GPUdb exception; {}".format( msg ) )
raise GPUdbException( msg )
# end try
# end __submit_request_raw
def __submit_request_raw_json_without_body( self, url = None, endpoint = None,
timeout = None,
):
"""Submits an arbitrary request to GPUdb via the specified URL and
decodes and returns response. This method is called from the `submit_request_json`
generally which handles the *HA Failover* and hence failover is not handled here.
The main purpose of this method is to execute a request over HTTP/S to a specific
URL and send the response back.
Parameters:
url (GPUdb.URL)
The URL to send the request to
endpoint (str)
The endpoint to use (needed for looking up the appropriate
request and response avro schema).
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
Returns:
The full JSON (str) response returned by the server. The part carrying relevant information
about the output of the operation is the 'data' object.
"""
# Validate the input arguments
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# If no user given timeout given, just use the cached one
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
http_conn = self.__initialize_http_connection( url, timeout )
headers = {}
headers[C._HEADER_CONTENT_TYPE] = "application/json"
headers[C._HEADER_ACCEPT] = "text/plain"
if self.auth:
headers[C._HEADER_AUTHORIZATION] = self.auth
try:
# Post the request
path = "{url_path}{endpoint}".format( url_path = url.path,
endpoint = endpoint )
http_conn.request( C._REQUEST_POST, path, None, headers )
except ssl.SSLError as ex:
msg = ("Unable to execute SSL handshake with '{}' due to: {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ))
final_msg = self.__SSL_ERROR_MESSAGE_TEMPLATE.format(msg)
self.__log_debug( final_msg )
raise GPUdbUnauthorizedAccessException( final_msg )
except Exception as ex:
msg = ("Error posting to '{}' due to: {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
# TODO: In the Java API, this is an GPUdbExitException; decide what this should be here
raise GPUdbConnectionException( msg )
# end try
# Get the response
try:
response = http_conn.getresponse()
except Exception as ex: # some error occurred; return a message
msg = ( "No response received from {} due to {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
raise GPUdbConnectionException( msg )
# end try
# Read and decode the response, handling any error
try:
response_data = response.read()
response_time = response.getheader('x-request-time-secs', None)
# Check the HTTP status code and throw an exit exception as appropriate
status_code = response.status
response_msg = response.reason
if ( status_code == httplib.UNAUTHORIZED ):
# Unauthorized access gets a different exception
msg = ( "Unauthorized access: '{}'".format( response_msg ) )
self.__log_debug( msg )
raise GPUdbUnauthorizedAccessException( msg )
elif ( status_code in self.__http_response_triggering_failover ):
msg = ( "Could not connect to database at '{}' due to status "
"code {}: {}".format( url.url, status_code, response_msg ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
elif ( status_code == httplib.NOT_FOUND ):
msg = ( "Endpoint not found ({}) due to status "
"code {}: {}".format( url.url, status_code,
response_msg ) )
self.__log_debug( "Throwing GPUdb exception; {}".format( msg ) )
raise GPUdbException( msg )
# end if
return str(response_data, "UTF-8")
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
# For special connection or exit errors, just pass them on
self.__log_debug("Caught conn/exit exception: {}".format( str(ex) ))
raise
except GPUdbException as ex:
# An end-of-file problem from the server is also a failover trigger
if C._DB_EOF_FROM_SERVER_ERROR_MESSAGE in str(ex):
msg = ( "Received failover triggering error when trying to "
"connect to {}: {}"
"".format( url.url, str(ex) ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
else:
# All other errors are legitimate, and to be passed on to the
# user
self.__log_debug( "Throwing GPUdb exception; {}".format( str(ex) ) )
raise
# end if
except Exception as ex: # some error occurred; return a message
msg = ("Error reading response from {} for endpoint {}: {}"
"".format( url.url, endpoint,
GPUdbException.stringify_exception( ex ) ) )
# TODO: Or should this be an exit exception also??
self.__log_debug( "Throwing GPUdb exception; {}".format( msg ) )
raise GPUdbException( msg )
# end try
# end __submit_request_raw_json
def __submit_request_raw_json( self, url = None, endpoint = None,
request_body = None,
timeout = None,
):
"""Submits an arbitrary request to GPUdb via the specified URL and
decodes and returns response. This method is called from the `submit_request_json`
generally which handles the *HA Failover* and hence failover is not handled here.
The main purpose of this method is to execute a request over HTTP/S to a specific
URL and send the response back.
Parameters:
url (GPUdb.URL)
The URL to send the request to
endpoint (str)
The endpoint to use (needed for looking up the appropriate
request and response avro schema).
request_body (str)
The request body that is either a single JSON record or an array of JSON records
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
Returns:
The full JSON (str) response returned by the server. The part carrying relevant information
about the output of the operation is the 'data' object.
"""
# Validate the input arguments
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if request_body is None:
msg = ("Argument 'request_body' must be provided; given None" )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if type(request_body) != str:
raise GPUdbException("'request_body' has to be either a single JSON record or an array of JSON records (as string)")
# If no user given timeout given, just use the cached one
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# Log the request and the endpoint at the trace level. Note that since
# string interpolation takes a demonstrably large time (proved via
# benchmarking), we need to first check if the log level is on. That
# way, we only create the interpolated string when it will be used.
if self.__is_log_level_trace_enabled():
self.__log_trace( "Sending {} request {} to {}"
"".format( endpoint, request_body, str(url) ) )
http_conn = self.__initialize_http_connection( url, timeout )
headers = {}
headers[C._HEADER_CONTENT_TYPE] = "application/json"
headers[C._HEADER_ACCEPT] = "text/plain"
if self.auth:
headers[C._HEADER_AUTHORIZATION] = self.auth
try:
# Post the request
path = "{url_path}{endpoint}".format( url_path = url.path,
endpoint = endpoint )
http_conn.request( C._REQUEST_POST, path, request_body, headers )
except ssl.SSLError as ex:
msg = ("Unable to execute SSL handshake with '{}' due to: {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ))
final_msg = self.__SSL_ERROR_MESSAGE_TEMPLATE.format(msg)
self.__log_debug( final_msg )
raise GPUdbUnauthorizedAccessException( final_msg )
except Exception as ex:
msg = ("Error posting to '{}' due to: {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
# TODO: In the Java API, this is an GPUdbExitException; decide what this should be here
raise GPUdbConnectionException( msg )
# end try
# Get the response
try:
response = http_conn.getresponse()
except Exception as ex: # some error occurred; return a message
msg = ( "No response received from {} due to {}"
"".format( url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
raise GPUdbConnectionException( msg )
# end try
# Read and decode the response, handling any error
try:
response_data = response.read()
response_time = response.getheader('x-request-time-secs', None)
# Check the HTTP status code and throw an exit exception as appropriate
status_code = response.status
response_msg = response.reason
if ( status_code == httplib.UNAUTHORIZED ):
# Unauthorized access gets a different exception
msg = ( "Unauthorized access: '{}'".format( response_msg ) )
self.__log_debug( msg )
raise GPUdbUnauthorizedAccessException( msg )
elif ( status_code in self.__http_response_triggering_failover ):
msg = ( "Could not connect to database at '{}' due to status "
"code {}: {}".format( url.url, status_code, response_msg ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
elif ( status_code == httplib.NOT_FOUND ):
msg = ( "Endpoint not found ({}) due to status "
"code {}: {}".format( url.url, status_code,
response_msg ) )
self.__log_debug( "Throwing GPUdb exception; {}".format( msg ) )
raise GPUdbException( msg )
# end if
return str(response_data, "UTF-8")
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
# For special connection or exit errors, just pass them on
self.__log_debug("Caught conn/exit exception: {}".format( str(ex) ))
raise
except GPUdbException as ex:
# An end-of-file problem from the server is also a failover trigger
if C._DB_EOF_FROM_SERVER_ERROR_MESSAGE in str(ex):
msg = ( "Received failover triggering error when trying to "
"connect to {}: {}"
"".format( url.url, str(ex) ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
else:
# All other errors are legitimate, and to be passed on to the
# user
self.__log_debug( "Throwing GPUdb exception; {}".format( str(ex) ) )
raise
# end if
except Exception as ex: # some error occurred; return a message
msg = ("Error reading response from {} for endpoint {}: {}"
"".format( url.url, endpoint,
GPUdbException.stringify_exception( ex ) ) )
# TODO: Or should this be an exit exception also??
self.__log_debug( "Throwing GPUdb exception; {}".format( msg ) )
raise GPUdbException( msg )
# end try
# end __submit_request_raw_json
def __submit_request( self, endpoint, request_body,
url = None,
timeout = None,
get_req_cext = False,
get_rsp_cext = False,
request_schema = None,
response_schema = None,
convert_to_attr_dict = False,
return_raw_response_too = False ):
"""Submits an arbitrary request to the database server and returns
the response. If a failover trigger is encountered, then either an
HA failover occurs (if an HA ring has been set up), or in the case
of a stand-alone cluster, a failover recovery is attempted (which
may continue indefinitely, based on relevant options set the by the
user). In the case of a successful failover, the internally cached
URL will be updated to point to the new URL being used.
Parameters:
endpoint (str)
The GPUdb endpoint to send the request to; must be a string.
Must be provided.
request_body (dict) # TODO: Is dict right here?
The request object. Must be provided.
url (GPUdb.URL)
Optional argument. If given, this URL would be used to connect
to the database. If none given, then the current URL cached
internally would be used instead. If given, then **no failover
will be attempted**.
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
get_req_cext (bool)
If True, then use the c-extension version of the request schema.
Default is False.
get_rsp_cext (bool)
If True, then use the c-extension version of the response schema.
Default is False.
request_schema (Schema)
Optional argument. The :class:`Schema` object to use to encode
the request object instead of looking one up using the endpoint.
Can be used to test internal endpoints that are not exposed
through this API. If given, `response_schema` must also be
given. Default is None.
response_schema (Schema)
Optional argument. The :class:`Schema` object to use to decode
the response from the server (instead of looking one up using)
the endpoint. Can be used to test internal endpoints that are
not exposed through this API. If given, `request_schema` must
also be given. Default is None.
convert_to_attr_dict (bool)
If True, convert the response to an :class:`AttrDict` object.
If False, return the raw response. Default is False.
return_raw_response_too (bool)
Optional argument. If True, then return the raw response
obtained from the server along with the decoded response.
Default is False.
Returns:
The decoded response object by itself if `return_raw_response_too`
is False. If that argument is True, then return a tuple where the
first element is the decoded response and the second element is the
raw response returned by the server.
"""
# Validate input arguments
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if request_body is None:
msg = ("Argument 'request_body' must be provided; given None" )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# If any URL is given, then no failover would be attempted! The easiest
# way to do this is to just call submit request raw, and propagate any
# exceptions that method may throw
if url is not None:
# First validate it
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
response = self.__submit_request_raw( url = url,
endpoint = endpoint,
request_body = request_body,
timeout = timeout,
get_req_cext = get_req_cext,
get_rsp_cext = get_rsp_cext,
request_schema = request_schema,
response_schema = response_schema,
convert_to_attr_dict = convert_to_attr_dict,
return_raw_response_too = return_raw_response_too )
return response
# end if
# We need to send the request to the database server head node
url = self.get_url( stringified = False )
original_url = url
while True:
# We need a snapshot of the current state re: HA failover. When
# multiple threads work on this object, we'll need to know how
# many times we've switched clusters *before* attempting another
# request submission.
current_cluster_switch_count = self.get_num_cluster_switches()
try:
response = self.__submit_request( endpoint,
request_body,
url = url,
timeout = timeout,
get_req_cext = get_req_cext,
get_rsp_cext = get_rsp_cext,
request_schema = request_schema,
response_schema = response_schema,
convert_to_attr_dict = convert_to_attr_dict,
return_raw_response_too = return_raw_response_too )
return response
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException, GPUdbDecodingException) as ex:
self.__log_debug( "Got exit-level exception when trying endpoint {} at {}: {}; switch URL..."
"".format( endpoint, str(url), str(ex ) ) )
# Handle our special exit exception
try:
url = self.__switch_url( original_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(url ) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
# end try
except GPUdbException as ex:
# Any other GPUdbException is a valid failure
self.__log_debug( "Got GPUdbException, so propagating: {}"
"".format( str(ex) ) )
raise
except Exception as ex:
orig_ex_str = GPUdbException.stringify_exception( ex )
self.__log_debug( "Got regular exception when trying endpoint {}"
" at {}: {}; switch URL..."
"".format( endpoint, str(url), orig_ex_str ) )
# And other random exceptions probably are also connection errors
try:
url = self.__switch_url( original_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(url) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
# end try
# end try
# end while
# end __submit_request
def __submit_request_json_without_body( self, endpoint,
url = None,
timeout = None,
):
"""Submits an arbitrary request to the database server and returns
the response. If a failover trigger is encountered, then either an
HA failover occurs (if an HA ring has been set up), or in the case
of a stand-alone cluster, a failover recovery is attempted (which
may continue indefinitely, based on relevant options set the by the
user). In the case of a successful failover, the internally cached
URL will be updated to point to the new URL being used.
Parameters:
endpoint (str)
The GPUdb endpoint to send the request to; must be a string.
Must be provided.
url (GPUdb.URL)
Optional argument. If given, this URL would be used to connect
to the database. If none given, then the current URL cached
internally would be used instead. If given, then **no failover
will be attempted**.
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
Returns:
The full JSON (str) response returned by the server. The part carrying relevant information
about the output of the operation is the 'data' object.
"""
# Validate input arguments
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# If any URL is given, then no failover would be attempted! The easiest
# way to do this is to just call submit request raw, and propagate any
# exceptions that method may throw
if url is not None:
# First validate it
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
response = self.__submit_request_raw_json_without_body( url = url,
endpoint = endpoint,
timeout = timeout,
)
return response
# end if
# We need to send the request to the database server head node
url = self.get_url( stringified = False )
original_url = url
while True:
# We need a snapshot of the current state re: HA failover. When
# multiple threads work on this object, we'll need to know how
# many times we've switched clusters *before* attempting another
# request submission.
current_cluster_switch_count = self.get_num_cluster_switches()
try:
response = self.__submit_request_json_without_body( endpoint,
url = url,
timeout = timeout,
)
return response
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
self.__log_debug( "Got EXIT or Connection exception when trying"
" endpoint {} at {}: {}; switch URL..."
"".format( endpoint, str(url), str(ex ) ) )
# Handle our special exit exception
try:
url = self.__switch_url( original_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(url ) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
# end try
except GPUdbException as ex:
# Any other GPUdbException is a valid failure
self.__log_debug( "Got GPUdbException, so propagating: {}"
"".format( str(ex) ) )
raise
except Exception as ex:
orig_ex_str = GPUdbException.stringify_exception( ex )
self.__log_debug( "Got regular exception when trying endpoint {}"
" at {}: {}; switch URL..."
"".format( endpoint, str(url), orig_ex_str ) )
# And other random exceptions probably are also connection errors
try:
url = self.__switch_url( original_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(url) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
# end try
# end try
# end while
# end __submit_request_json
def __submit_request_json( self, endpoint, request_body,
url = None,
timeout = None,
):
"""Submits an arbitrary request to the database server and returns
the response. If a failover trigger is encountered, then either an
HA failover occurs (if an HA ring has been set up), or in the case
of a stand-alone cluster, a failover recovery is attempted (which
may continue indefinitely, based on relevant options set the by the
user). In the case of a successful failover, the internally cached
URL will be updated to point to the new URL being used.
Parameters:
endpoint (str)
The GPUdb endpoint to send the request to; must be a string.
Must be provided.
request_body (str)
The request body. Either a single JSON record or an array of JSON records
url (GPUdb.URL)
Optional argument. If given, this URL would be used to connect
to the database. If none given, then the current URL cached
internally would be used instead. If given, then **no failover
will be attempted**.
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
Returns:
The full JSON (str) response returned by the server. The part carrying relevant information
about the output of the operation is the 'data' object.
"""
# Validate input arguments
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if request_body is None:
msg = ("Argument 'request_body' must be provided; given None" )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if type(request_body) != str:
raise GPUdbException("'request_body' has to be either a single JSON record or an array of JSON records (as string)")
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# If any URL is given, then no failover would be attempted! The easiest
# way to do this is to just call submit request raw, and propagate any
# exceptions that method may throw
if url is not None:
# First validate it
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
response = self.__submit_request_raw_json( url = url,
endpoint = endpoint,
request_body = request_body,
timeout = timeout,
)
return response
# end if
# We need to send the request to the database server head node
url = self.get_url( stringified = False )
original_url = url
while True:
# We need a snapshot of the current state re: HA failover. When
# multiple threads work on this object, we'll need to know how
# many times we've switched clusters *before* attempting another
# request submission.
current_cluster_switch_count = self.get_num_cluster_switches()
try:
response = self.__submit_request_json( endpoint,
request_body,
url = url,
timeout = timeout,
)
return response
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException) as ex:
self.__log_debug( "Got EXIT or Connection exception when trying"
" endpoint {} at {}: {}; switch URL..."
"".format( endpoint, str(url), str(ex ) ) )
# Handle our special exit exception
try:
url = self.__switch_url( original_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(url ) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
# end try
except GPUdbException as ex:
# Any other GPUdbException is a valid failure
self.__log_debug( "Got GPUdbException, so propagating: {}"
"".format( str(ex) ) )
raise
except Exception as ex:
orig_ex_str = GPUdbException.stringify_exception( ex )
self.__log_debug( "Got regular exception when trying endpoint {}"
" at {}: {}; switch URL..."
"".format( endpoint, str(url), orig_ex_str ) )
# And other random exceptions probably are also connection errors
try:
url = self.__switch_url( original_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(url) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
raise GPUdbException( error_message, True )
# end try
# end try
# end while
# end __submit_request_json
def __submit_request_to_hm( self, endpoint, request_body,
url = None,
timeout = None,
get_req_cext = False,
get_rsp_cext = False,
convert_to_attr_dict = False,
return_raw_response_too = False ):
"""Submits an arbitrary request to the database server's host manager
and returns the response. If a failover trigger is encountered, then
either an HA failover occurs (if an HA ring has been set up), or in the
case of a stand-alone cluster, a failover recovery is attempted (which
may continue indefinitely, based on relevant options set the by the
user). In the case of a successful failover, the internally cached
URL will be updated to point to the new URL being used.
Should only be used for endpoints that are actually accepted by the
host manager.
Parameters:
endpoint (str)
The GPUdb endpoint to send the request to; must be a string.
Must be provided.
request_body (dict) # TODO: Is dict right here?
The request object. Must be provided.
url (GPUdb.URL)
Optional argument. If given, this URL would be used to connect
to the database. If none given, then the current URL cached
internally would be used instead. If given, then **no failover
will be attempted**.
timeout (int)
Optional argument. If given, then the positive integer would be
used for the timeout for the request connection (in seconds).
If not given, then the currently configured timeout for this
GPUdb object would be used instead.
get_req_cext (bool)
If True, then use the c-extension version of the request schema.
Default is False.
get_rsp_cext (bool)
If True, then use the c-extension version of the response schema.
Default is False.
convert_to_attr_dict (bool)
If True, convert the response to an :class:`AttrDict` object.
If False, return the raw response. Default is False.
return_raw_response_too (bool)
Optional argument. If True, then return the raw response
obtained from the server along with the decoded response.
Default is False.
Returns:
The decoded response object by itself if `return_raw_response_too`
is False. If that argument is True, then return a tuple where the
first element is the decoded response and the second element is the
raw response returned by the server.
"""
# Validate input arguments
if not isinstance( endpoint, (basestring, unicode) ):
msg = ("Argument 'endpoint' must be a string; given '{}'"
"".format( str(type(endpoint)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if request_body is None:
msg = ("Argument 'request_body' must be provided; given None" )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if timeout is None:
# Use the cached one
timeout = self.timeout
else:
# The given timeout must be a non-negative integer
try:
timeout = int( timeout )
except:
msg = ("Argument 'timeout' must be an integer value; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
if timeout < 0:
msg = ("Argument 'timeout' must be a non-negative integer value; "
"given '{}'".format( timeout ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# If any URL is given, then no failover would be attempted! The easiest
# way to do this is to just call submit request raw, and propagate any
# exceptions that method may throw
if url is not None:
# First validate it
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end inner if
response = self.__submit_request_raw( url = url, endpoint = endpoint,
request_body = request_body,
timeout = timeout,
get_req_cext = get_req_cext,
get_rsp_cext = get_rsp_cext,
convert_to_attr_dict = convert_to_attr_dict,
return_raw_response_too = return_raw_response_too )
return response
# end if
# We need to send the request to the host manager
hm_url = self.get_hm_url( stringified = False )
original_hm_url = hm_url
# We want to capture the original exception
original_exception = None
for i in range(0, self.__HOST_MANAGER_SUBMIT_REQUEST_RETRY_COUNT):
# We need a snapshot of the current state re: HA failover. When
# multiple threads work on this object, we'll need to know how
# many times we've switched clusters *before* attempting another
# request submission.
current_cluster_switch_count = self.get_num_cluster_switches()
try:
response = self.__submit_request( endpoint,
request_body,
url = hm_url,
timeout = timeout,
get_req_cext = get_req_cext,
get_rsp_cext = get_rsp_cext,
convert_to_attr_dict = convert_to_attr_dict,
return_raw_response_too = return_raw_response_too )
# Check that the host manager did not go down
if not response.is_ok():
# Check for a special error message saying that the DB
# is offline
if (C._DB_HM_OFFLINE_ERROR_MESSAGE
in response.get_error_msg() ):
raise GPUdbException( response.get_error_msg() )
# end inner if
# end if
return response
except GPUdbUnauthorizedAccessException as ex:
# Any permission related problem should get propagated
raise
except (GPUdbConnectionException, GPUdbExitException, GPUdbDecodingException) as ex:
# Save the original exception for later use
if original_exception is None:
original_exception = GPUdbException( str(ex) )
# end if
self.__log_debug( "Got exit-level exception when trying endpoint {} at {}: {}; switch URL..."
"".format( endpoint, str(hm_url), str(ex ) ) )
# Handle our special exit exception
try:
hm_url = self.__switch_hm_url( original_hm_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str( hm_url ) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
self.__log_debug( error_message )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
self.__log_debug( error_message )
raise GPUdbException( error_message, True )
# end try
except GPUdbException as ex:
# Save the original exception for later use
if original_exception is None:
original_exception = ex
# end if
# The host manager can still be going even if the database is down
if ( C._DB_HM_OFFLINE_ERROR_MESSAGE in str(ex) ):
# Looks like the host manager is down
try:
hm_url = self.__switch_hm_url( original_hm_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str( hm_url ) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
self.__log_debug( error_message )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = str(ex),
new = str(ha_ex) ) )
self.__log_debug( error_message )
raise GPUdbException( error_message, True )
# end try
else:
# Any other GPUdbException is a valid failure
self.__log_debug( "Got GPUdbException, so propagating: {}"
"".format( str(ex) ) )
raise
# end if
except Exception as ex:
# Save the original exception for later use
orig_ex_str = GPUdbException.stringify_exception( ex )
if original_exception is None:
original_exception = GPUdbException( orig_ex_str )
# end if
self.__log_debug( "Got regular exception when trying endpoint {}"
" at {}: {}; switch URL..."
"".format( endpoint, str(hm_url), orig_ex_str ) )
# And other random exceptions probably are also connection errors
try:
hm_url = self.__switch_hm_url( original_hm_url, current_cluster_switch_count )
self.__log_debug( "Switched to {}".format( str(hm_url) ) )
except GPUdbHAUnavailableException as ha_ex:
# We've now tried all the HA clusters and circled back
# Get the original cause to propagate to the user
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
self.__log_debug( error_message )
raise GPUdbException( error_message, True )
except GPUdbFailoverDisabledException as ha_ex:
# Failover is disabled; return the original cause
error_message = ("{orig}; {new}".format( orig = orig_ex_str,
new = str(ha_ex) ) )
self.__log_debug( error_message )
raise GPUdbException( error_message, True )
# end try
# end try
# end for
# If we reach here, then something went wrong
self.__log_debug( "Failed to submit host manager endpoint {}; exceeded "
"retry count {}; original exception: '{}'; please "
"check if the host manager port is wrong: {}"
"".format( endpoint,
self.__HOST_MANAGER_SUBMIT_REQUEST_RETRY_COUNT,
str( original_exception ),
str( hm_url ) ) )
raise original_exception
# end __submit_request_to_hm
def __create_header_and_process_body_data( self, body_data ):
"""Create an HTTP or HTTPS header, and compress the body data
if needed.
Parameters:
body_data
The body of the data, already avro or json encoded
Returns:
A tuple where the first element is the header and the second
element is the body data (either unprocessed or processed).
"""
headers = {}
# Set the user defined headers (do this first so that the required
# headers are not overridden accidentally)
if self.__custom_http_headers:
for header, value in self.__custom_http_headers.items():
headers[ header ] = value
# end for
# end if
if self.encoding == C._ENCODING_BINARY:
headers[ C._HEADER_CONTENT_TYPE ] = C._REQUEST_ENCODING_OCTET
headers[ C._HEADER_ACCEPT ] = C._REQUEST_ENCODING_OCTET
elif self.encoding == C._ENCODING_JSON:
headers[ C._HEADER_CONTENT_TYPE ] = C._REQUEST_ENCODING_JSON
headers[ C._HEADER_ACCEPT ] = C._REQUEST_ENCODING_JSON
elif self.encoding == C._ENCODING_SNAPPY:
headers[ C._HEADER_CONTENT_TYPE ] = C._REQUEST_ENCODING_SNAPPY
headers[ C._HEADER_ACCEPT ] = C._REQUEST_ENCODING_SNAPPY
body_data = snappy.compress(body_data)
# end if
# Set the authentication header, if needed
if self.auth:
headers[ C._HEADER_AUTHORIZATION ] = self.auth
# Add the synchronicity override mode header (ONLY if it's not
# default mode)
if (self.ha_sync_mode != GPUdb.HASynchronicityMode.DEFAULT):
headers[ C._HEADER_HA_SYNC_MODE ] = self.ha_sync_mode.value
return (headers, body_data)
# end __create_header_and_process_body_data
def __initialize_http_connection( self, url, timeout ):
"""Create an HTTP or HTTPS connection object given the URL and timeout
(in seconds). Throws GPUdbException and GPUdbConnectionException as
needed.
"""
# Validate the input arguments
if not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object; given '{}'"
"".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if ( (timeout is not None)
and (not isinstance( timeout, int ))
):
msg = ("Argument 'timeout' must be an integer value, if given; "
"given '{}'".format( str(type(timeout)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# NOTE: Creating a new httplib.HTTPConnection is suprisingly just as
# fast as reusing a persistent one and has the advantage of
# fully retrying from scratch if the connection fails.
# Try to create the connection object
try:
if (url.protocol == 'HTTP'):
conn = httplib.HTTPConnection( host = url.host,
port = url.port,
timeout = timeout)
elif (url.protocol == 'HTTPS'):
if self.skip_ssl_cert_verification:
if IS_PYTHON_3:
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
ssl_context.verify_mode = ssl.CERT_NONE
ssl_context.check_hostname = False
conn = httplib.HTTPSConnection(host=url.host,
port=url.port,
timeout=timeout,
context=ssl_context)
else:
conn = httplib.HTTPSConnection( host = url.host,
port = url.port,
timeout = timeout,
context = ssl._create_unverified_context() )
else:
conn = httplib.HTTPSConnection( host = url.host,
port = url.port,
timeout = timeout)
except Exception as ex:
msg = ( "Error connecting to '{}' on port "
"'{}' due to (full url '{}'): {}"
"".format( url.host, url.port,
url.url,
GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
raise GPUdbConnectionException( msg )
return conn
# end __initialize_http_connection
def __switch_url( self, old_url, old_num_cluster_switches ):
"""Switches the URL of the HA ring cluster. Check if we've circled back to
the old URL. If we've circled back to it, then re-shuffle the list of
indices so that the next time, we pick up HA clusters in a different random
manner and throw an exception.
This is not a thread-safe method.
Parameters:
old_url (GPUdb.URL)
The head rank :class:`GPUdb.URL` in use at the time of the
failover that initiated this switch
old_num_cluster_switches (int)
The total number of cluster switches that have occurred up to
the moment before this thread's switch was initiated; this will
be used to determine whether another thread is already trying to
fail over to the next cluster and that this thread should stand
down
Returns:
The next cluster head rank :class:`GPUdb.URL` to try
"""
self.__log_debug( "Attempting to switch URLs, from: {}; original failing URL: {}"
"".format( self.get_url(), str(old_url) ) )
# The user may disable failover altogether
if self.__disable_failover:
self.__log_debug( "Failover is disabled; throwing exception" )
raise GPUdbFailoverDisabledException( "Failover is disabled!" )
# end if
# If there is only one URL, then we can't switch URLs
if ( self.__get_ha_ring_size() == 1 ):
self.__log_debug( "Only one cluster in ring--no fail-over cluster available")
raise GPUdbHAUnavailableException("Only one cluster in ring; HA failover unavailable")
# end if
# Get how many more times other threads have switched clusters
# since the caller called this function. If the situation is:
#
# count = 0 -> the calling thread is the first to get
# here; switch to the next cluster
# 0 < count < ring size -> another thread is either in the process
# of switching clusters or has switched to
# a working one; use the new current one
# count >= ring size -> another thread has already tried all
# failover clusters; throw exception
count_cluster_switches_since_invocation = (self.get_num_cluster_switches()
- old_num_cluster_switches)
# Check if another thread has tried all the clusters in the HA ring
ha_ring_size = self.__get_ha_ring_size()
have_switched_clusters_across_the_ring = ( count_cluster_switches_since_invocation
>= ha_ring_size )
self.__log_debug( "Cluster fail-over attempts across all threads vs. total clusters in ring: {} vs. {}"
"".format( count_cluster_switches_since_invocation, ha_ring_size ) )
if ( have_switched_clusters_across_the_ring ):
all_head_rank_urls = [ str(cluster.head_rank_url) for cluster in self.__cluster_info ]
raise GPUdbHAUnavailableException("Fail-over attempted as many times as clusters in the ring; URLs attempted: {}"
"".format( all_head_rank_urls ) )
# end if
# Check if another thread beat us to switching the URL
curr_url = self.get_url( stringified = False )
if ( (curr_url != old_url)
and (count_cluster_switches_since_invocation > 0) ):
self.__log_debug( "Already failed over to URL: {}".format( str(curr_url) ) )
# Another thread must have already switched the URL; use the new
# current URL
return curr_url
# end if
# This thread is the first one here--select the next cluster to use
# during this HA failover
self.__select_next_cluster()
# If we've circled back, shuffle the indices again so that future
# requests go to a different randomly selected cluster, but also
# let the caller know that we've circled back
curr_url = self.get_url( stringified = False )
if ( curr_url == old_url ):
self.__log_debug( "Current URL is the same as the original URL: {}; randomizing URLs and throwing exception"
"".format( str(old_url)) )
# Re-shuffle and set the index counter to zero
self.__randomize_clusters()
# Let the user know that we've circled back
all_head_rank_urls = [ str(cluster.head_rank_url) for cluster in self.__cluster_info ]
raise GPUdbHAUnavailableException("Circled back to original URL; no clusters available for fail-over among these: {}"
"".format( all_head_rank_urls ) )
# end if
# Haven't circled back to the old URL; so return the new one
self.__log_debug( "Switched to fail-over URL: {}".format( self.get_url() ) )
return self.get_url( stringified = False )
# end __switch_url
def __switch_hm_url( self, old_url, old_num_cluster_switches ):
"""Switches the host manager URL of the HA ring cluster. Check if we've
circled back to the old URL. If we've circled back to it, then
re-shuffle the list of indices so that the next time, we pick up HA
clusters in a different random manner and throw an exception.
This is not a thread-safe method.
Parameters:
old_url (GPUdb.URL)
The host manager :class:`GPUdb.URL` in use at the time of the
failover that initiated this switch
old_num_cluster_switches (int)
The total number of cluster switches that have occurred up to
the moment before this thread's switch was initiated; this will
be used to determine whether another thread is already trying to
fail over to the next cluster and that this thread should stand
down
Returns:
The next host manager :class:`GPUdb.URL` to try
"""
self.__log_debug( "Attempting to switch Host Manager URLs, from: {}; original failing URL: {}"
"".format( self.get_hm_url(), str(old_url) ) )
# The user may disable failover altogether
if self.__disable_failover:
self.__log_debug( "Failover is disabled; throwing exception" )
raise GPUdbFailoverDisabledException( "Failover is disabled!" )
# end if
# If there is only one URL, then we can't switch URLs
if ( self.__get_ha_ring_size() == 1 ):
self.__log_debug( "Only one cluster in ring--no fail-over cluster available")
raise GPUdbHAUnavailableException("Only one cluster in ring; HA failover unavailable")
# end if
# Get how many more times other threads have switched clusters
# since the caller called this function. If the situation is:
#
# count = 0 -> the calling thread is the first to get
# here; switch to the next cluster
# 0 < count < ring size -> another thread is either in the process
# of switching clusters or has switched to
# a working one; use the new current one
# count >= ring size -> another thread has already tried all
# failover clusters; throw exception
count_cluster_switches_since_invocation = (self.get_num_cluster_switches()
- old_num_cluster_switches)
# Check if another thread has tried all the clusters in the HA ring
ha_ring_size = self.__get_ha_ring_size()
have_switched_clusters_across_the_ring = ( count_cluster_switches_since_invocation
>= ha_ring_size )
self.__log_debug( "Host Manager cluster fail-over attempts across all threads vs. total clusters in ring: {} vs. {}"
"".format( count_cluster_switches_since_invocation, ha_ring_size ) )
if ( have_switched_clusters_across_the_ring ):
all_hm_urls = [ str(cluster.host_manager_url) for cluster in self.__cluster_info ]
raise GPUdbHAUnavailableException("Host Manager fail-over attempted as many times as clusters in the ring; URLs attempted: {}"
"".format( all_hm_urls ) )
# end if
# Check if another thread beat us to switching the URL
curr_url = self.get_hm_url( stringified = False )
if ( (curr_url != old_url)
and (count_cluster_switches_since_invocation > 0) ):
self.__log_debug( "Already failed over to Host Manager URL: {}".format( str(curr_url) ) )
# Another thread must have already switched the URL; use the new
# current URL
return curr_url
# end if
# This thread is the first one here--select the next cluster to use
# during this HA failover
self.__select_next_cluster()
# If we've circled back, shuffle the indices again so that future
# requests go to a different randomly selected cluster, but also
# let the caller know that we've circled back
curr_url = self.get_hm_url( stringified = False )
if ( curr_url == old_url ):
self.__log_debug( "Current Host Manager URL is the same as the original URL: {}; randomizing URLs and throwing exception"
"".format( str(old_url)) )
# Re-shuffle and set the index counter to zero
self.__randomize_clusters()
# Let the user know that we've circled back
all_hm_urls = [ str(cluster.host_manager_url) for cluster in self.__cluster_info ]
raise GPUdbHAUnavailableException("Circled back to original URL; no clusters available for Host Manager fail-over among these: {}"
"".format( all_hm_urls ) )
# end if
# Haven't circled back to the old URL; so return the new one
self.__log_debug( "Switched to Host Manager fail-over URL: {}".format( self.get_hm_url() ) )
return self.get_hm_url( stringified = False )
# end __switch_hm_url
def __select_next_cluster( self ):
"""Select the next cluster based on the HA failover priority set by the
user. This is not a thread-safe method.
"""
curr_url_index_pointer = self.__get_curr_cluster_index_pointer()
self.__log_debug(
"Cluster switch #{} from cluster #{} ({}) to the next one in {}".format(
self.get_num_cluster_switches() + 1, curr_url_index_pointer + 1, self.get_url(), [str(url) for url in self.get_failover_urls()]
))
# Increment the index by one (mod url list length)
self.__set_curr_cluster_index_pointer( (curr_url_index_pointer + 1) % self.__get_ha_ring_size() )
# Keep a running count of how many times we had to switch clusters
self.__increment_num_cluster_switches()
self.__log_debug(
"Cluster switch #{} to cluster #{} ({})".format(
self.get_num_cluster_switches(), self.__get_curr_cluster_index_pointer() + 1, self.get_url()
))
# end __select_next_cluster
def __post_and_get( self,
host, port, url_path, connection_type,
headers, body_data, endpoint ):
"""
Create a HTTP connection and POST then get GET, returning the server response.
Parameters:
host (str)
The host to send the request to
port (str)
The port to send the request to
url_path (str)
The URL for the request (exclusive of the endpoint)
connection_type (str)
'HTTP' or 'HTTPS'
headers (dict)
The headers to use for the HTTP or HTTPS connection
body_data (bytes)
Data to POST to GPUdb server.
endpoint (str)
Server path to POST to, e.g. "/insert/records".
"""
# NOTE: Creating a new httplib.HTTPConnection is suprisingly just as
# fast as reusing a persistent one and has the advantage of
# fully retrying from scratch if the connection fails.
# Get the full URL path for the request
url_path = (url_path + endpoint)
# Try to establish a connection
try:
if (connection_type == 'HTTP'):
conn = httplib.HTTPConnection( host = host,
port = port,
timeout = self.timeout)
elif (connection_type == 'HTTPS'):
if self.skip_ssl_cert_verification:
conn = httplib.HTTPSConnection( host = host,
port = port,
timeout = self.timeout,
context = ssl._create_unverified_context() )
else:
conn = httplib.HTTPSConnection( host = host,
port = port,
timeout = self.timeout)
except Exception as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbConnectionException( "Error connecting to '{}' on port "
"{} due to: {}"
"" .format(host, port, ex_str) )
# Try to post the message
try:
conn.request(C._REQUEST_POST, url_path, body_data, headers)
except Exception as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbConnectionException( "Error posting to '{}:{}{}' due "
"to: {}"
"".format(host, port,
url_path, ex_str) )
# Get the response
try:
resp = conn.getresponse()
except: # some error occurred; return a message
raise GPUdbConnectionException( "Timeout Error: No response received from %s:%s"
"" % (host, port) )
# Read the response
try:
resp_data = resp.read()
resp_time = resp.getheader('x-request-time-secs',None)
# Check the HTTP status code and throw an exit exception as appropriate
status = resp.status
if ( status in self.__http_response_triggering_failover ):
raise GPUdbExitException( "Could not connect to database at '{}:{}{}' "
"due to status code '{}'"
"".format( host, port, url_path, status ) )
return resp_data, resp_time
except GPUdbExitException as ex: # some error occurred; return a message
raise
except: # some error occurred; return a message
raise GPUdbException( "Error reading response from {}:{} for {}"
"".format( host, port, endpoint ) )
# end __post_and_get
def __client_to_object_encoding( self ):
"""Returns object encoding for queries based on the GPUdb client's
encoding.
"""
return self.client_to_object_encoding_map[ self.encoding ]
# end client_to_object_encoding
def __read_orig_datum(self, SCHEMA, encoded_datum, encoding=None):
"""
Decode the binary or JSON encoded datum using the avro schema and return a dict.
Parameters:
SCHEMA
A parsed schema from avro.schema.parse().
encoded_datum
Binary or JSON encoded data.
encoding
Type of avro encoding, either "BINARY" or "JSON";
None uses the encoding this class was initialized with.
"""
if encoding is None:
encoding = self.encoding
if (encoding == C._ENCODING_BINARY) or (encoding == C._ENCODING_SNAPPY):
return _Util.decode_binary_data( SCHEMA, encoded_datum )
elif encoding == C._ENCODING_JSON:
data_str = json.loads( _Util.ensure_str(encoded_datum) )
return data_str
# end __read_orig_datum
def __read_datum(self, SCHEMA, encoded_datum, encoding=None, response_time=None):
"""
Decode a gpudb_response and decode the contained message too.
Parameters:
SCHEMA
The parsed schema from avro.schema.parse() that the gpudb_response contains.
encoded_datum
A BINARY or JSON encoded gpudb_response message.
Returns:
An OrderedDict of the decoded gpudb_response message's data with the
gpudb_response put into the "status_info" field.
"""
# Parse the gpudb_response message
REP_SCHEMA = self.gpudb_schemas["gpudb_response"]["RSP_SCHEMA"]
resp = self.__read_orig_datum(REP_SCHEMA, encoded_datum, encoding)
#now parse the actual response if there is no error
#NOTE: DATA_SCHEMA should be equivalent to SCHEMA but is NOT for get_set_sorted
stype = resp['data_type']
if stype == 'none':
out = collections.OrderedDict()
else:
if self.encoding == C._ENCODING_JSON:
out = self.__read_orig_datum(SCHEMA, resp['data_str'], C._ENCODING_JSON)
elif (self.encoding == C._ENCODING_BINARY) or (self.encoding == C._ENCODING_SNAPPY):
out = self.__read_orig_datum(SCHEMA, resp['data'], C._ENCODING_BINARY)
del resp['data']
del resp['data_str']
out['status_info'] = resp
if (response_time is not None):
out['status_info']['response_time'] = float(response_time)
return out
# end __read_datum
def __read_orig_datum_cext(self, SCHEMA, encoded_datum, encoding=None):
"""
Decode the binary or JSON encoded datum using the avro schema and return a dict.
Parameters:
SCHEMA
A parsed schema from avro.schema.parse().
encoded_datum
Binary or JSON encoded data.
encoding
Type of avro encoding, either "BINARY" or "JSON";
None uses the encoding this class was initialized with.
"""
if encoding is None:
encoding = self.encoding
try:
if (encoding == C._ENCODING_BINARY) or (encoding == C._ENCODING_SNAPPY):
return SCHEMA.decode( encoded_datum )
elif encoding == C._ENCODING_JSON:
data_str = json.loads( _Util.ensure_str(encoded_datum) )
return data_str
except (Exception, RuntimeError) as ex:
self.__log_debug( "Encountered problem for encoded_datum: {}"
"".format( encoded_datum ) )
msg = ( "Unable to parse server response; "
"please check that the client and server "
"versions match. Got error {}"
"".format( GPUdbException.stringify_exception( ex ) ) )
self.__log_debug( msg )
raise GPUdbDecodingException ( msg )
# end __read_orig_datum_cext
def __read_datum_cext(self, SCHEMA, encoded_datum, encoding=None, response_time=None):
"""
Decode a gpudb_response and decode the contained message too.
Parameters:
SCHEMA
The parsed schema from .protocol.Schema() that the gpudb_response contains.
encoded_datum
A BINARY or JSON encoded gpudb_response message.
Returns:
An OrderedDict of the decoded gpudb_response message's data with the
gpudb_response put into the "status_info" field.
"""
# Parse the gpudb_response message
RSP_SCHEMA = self.gpudb_schemas["gpudb_response"]["RSP_SCHEMA"]
resp = self.__read_orig_datum_cext( RSP_SCHEMA, encoded_datum, encoding )
# Now parse the actual response if there is no error
# NOTE: DATA_SCHEMA should be equivalent to SCHEMA but is NOT for get_set_sorted
stype = resp['data_type']
if stype == 'none':
out = collections.OrderedDict()
else:
if self.encoding == C._ENCODING_JSON:
out = self.__read_orig_datum_cext(SCHEMA, resp['data_str'], C._ENCODING_JSON)
elif (self.encoding == C._ENCODING_BINARY) or (self.encoding == C._ENCODING_SNAPPY):
try:
out = SCHEMA.decode( encoded_datum, resp['data'] )
except (Exception, RuntimeError) as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbDecodingException ( "Unable to parse server response from {}; "
"please check that the client and "
"server versions match. Got error '{}'"
"".format( self.gpudb_full_url,
ex_str ) )
# end inner if
# end if
del resp['data']
del resp['data_str']
out['status_info'] = resp
if (response_time is not None):
out['status_info']['response_time'] = float(response_time)
# For error cases, certain scenarios need to trigger a failover
if (out['status_info']['status'] == 'ERROR'):
error_msg = out['status_info']['message']
if ( (C._DB_CONNECTION_REFUSED in error_msg)
or (C._DB_CONNECTION_RESET in error_msg)
or (C._DB_EXITING_ERROR_MESSAGE in error_msg)
or (C._DB_OFFLINE_ERROR_MESSAGE in error_msg)
or (C._DB_SYSTEM_LIMITED_ERROR_MESSAGE in error_msg) ):
msg = ( "Database returned failover triggering error: {}"
"".format( error_msg ) )
self.__log_debug( "Throwing EXIT exception; {}".format( msg ) )
raise GPUdbExitException( msg )
# end if
return out
# end __read_datum_cext
def __get_schemas(self, base_name,
get_req_cext = False,
get_rsp_cext = False ):
"""
Get a tuple of parsed and cached request and reply schemas.
Parameters:
base_name
Schema name, e.g. "base_name"+"_request.json" or "_response.json"
get_req_cext (bool)
If True, then try to return the c-extension version
of the request schema. If none found, raise exception.
Default is False.
get_rsp_cext (bool)
If True, then try to return the c-extension version
of the response schema. If none found, raise exception.
Default is False.
"""
if get_req_cext:
if "REQ_SCHEMA_CEXT" not in self.gpudb_schemas[base_name]:
raise GPUdbException( "No c-extension version of the request "
"schema was found for {}".format( base_name ) )
# end inner if
REQ_SCHEMA = self.gpudb_schemas[base_name]["REQ_SCHEMA_CEXT"]
else:
REQ_SCHEMA = self.gpudb_schemas[base_name]["REQ_SCHEMA"]
# end if
if get_rsp_cext:
if "RSP_SCHEMA_CEXT" not in self.gpudb_schemas[base_name]:
raise GPUdbException( "No c-extension version of the response "
"schema was found for {}".format( base_name ) )
# end inner if
RSP_SCHEMA = self.gpudb_schemas[base_name]["RSP_SCHEMA_CEXT"]
else:
RSP_SCHEMA = self.gpudb_schemas[base_name]["RSP_SCHEMA"]
# end if
return (REQ_SCHEMA, RSP_SCHEMA)
# end __get_schemas
def __get_endpoint(self, func_name):
"""
Get the endpoint for a given query.
Parameters:
base_name
Schema name, e.g. "func_name"+"_request.json" or "_response.json"
"""
return self.gpudb_func_to_endpoint_map[ func_name ]
# end __get_endpoint
def __sanitize_dicts( self, _dict ):
"""If the given options dictionary has boolean values, replace
them with the strings 'true' and 'false' for consumption of the
database. Return the "sanitized" dictionary.
"""
if not isinstance( _dict, (dict, collections.OrderedDict) ):
return
# Iterate over a copy of the keys so that we can modify the dict
for key in _dict.keys():
val = _dict[ key ]
if isinstance( val, bool ):
if val: # true
_dict[ key ] = 'true'
else:
_dict[ key ] = 'false'
elif isinstance( val, (dict, collections.OrderedDict) ):
_dict[ key ] = self.__sanitize_dicts( _dict[ key ] )
# end loop
return _dict
# end sanitize_dicts
[docs] def encode_datum(self, SCHEMA, datum, encoding = None):
"""
Returns an avro binary or JSON encoded dataum dict using its schema.
Parameters:
SCHEMA (str or avro.Schema)
A parsed schema object from avro.schema.parse() or a
string containing the schema.
datum (dict)
A dict of key-value pairs containing the data to encode (the
entries must match the schema).
"""
# Convert the string to a parsed schema object (if needed)
if isinstance( SCHEMA, basestring ):
SCHEMA = schema.parse( SCHEMA )
if encoding is None:
encoding = self.encoding
else:
encoding = encoding.upper()
# Build the encoder; this output is where the data will be written
if encoding == C._ENCODING_BINARY or encoding == C._ENCODING_SNAPPY:
return _Util.encode_binary_data( SCHEMA, datum, self.encoding )
elif encoding == C._ENCODING_JSON:
return json.dumps( _Util.convert_dict_bytes_to_str( datum ) )
# end encode_datum
[docs] def encode_datum_cext(self, SCHEMA, datum, encoding = None):
"""
Returns an avro binary or JSON encoded dataum dict using its schema.
Parameters:
SCHEMA (str or avro.Schema)
A parsed schema object from avro.schema.parse() or a
string containing the schema.
datum (dict)
A dict of key-value pairs containing the data to encode (the
entries must match the schema).
"""
if encoding is None:
encoding = self.encoding
else:
encoding = encoding.upper()
# Build the encoder; this output is where the data will be written
if encoding == C._ENCODING_BINARY or encoding == C._ENCODING_SNAPPY:
return _Util.encode_binary_data_cext( SCHEMA, datum, self.encoding )
elif encoding == C._ENCODING_JSON:
# Convert bytes to strings first
datum = _Util.convert_dict_bytes_to_str( datum )
# Create an OrderedDict for the JSON since the server expects
# fields in order
json_datum = collections.OrderedDict()
# Populate the JSON-encoded payload
for field in SCHEMA.fields:
name = field.name
json_datum[ name ] = datum[ name ]
# end loop
return json.dumps( json_datum )
# end encode_datum_cext
# ------------- Convenience Functions ------------------------------------
[docs] @staticmethod
def valid_json(self, json_string):
"""
Validates a JSON string by trying to parse it into a Python object
"""
try:
json.loads(json_string)
except ValueError as err:
return False
return True
[docs] @staticmethod
def merge_dicts(self, *dict_args):
"""
Given any number of dictionaries, shallow copy and merge into a new dict,
precedence goes to key-value pairs in latter dictionaries.
"""
result = {}
for dictionary in dict_args:
result.update(dictionary)
return result
@staticmethod
def is_json_array(json_string):
trimmed = json_string.strip()
return trimmed.startswith("[") and trimmed.endswith("]")
@staticmethod
def is_json(json_string):
try:
obj = json.loads(json_string)
return isinstance( obj, list), ''
except ValueError as err:
return False, str(err)
@staticmethod
def convert_json_list_to_json_array(json_list):
if not isinstance(json_list, list):
raise ValueError("Input must be an object of type 'list'")
return "[{}]".format(",".join(json_list))
def read_trigger_msg(self, encoded_datum):
RSP_SCHEMA = self.gpudb_schemas[ "trigger_notification" ]["RSP_SCHEMA"]
return self.__read_orig_datum_cext(RSP_SCHEMA, encoded_datum, C._ENCODING_BINARY)
[docs] def logger(self, ranks, log_levels, options = {}):
"""Convenience function to change log levels of some
or all GPUdb ranks.
Parameters:
ranks (list of ints)
A list containing the ranks to which to apply the new log levels.
log_levels (dict of str to str)
A map where the keys dictate which log's levels to change, and the
values dictate what the new log levels will be.
options (dict of str to str)
Optional parameters. Default value is an empty dict ( {} ).
Returns:
A dict with the following entries--
status (str)
The status of the endpoint ('OK' or 'ERROR').
log_levels (map of str to str)
A map of each log level to its respective value
"""
REQ_SCHEMA = self.logger_request_schema
RSP_SCHEMA = self.logger_response_schema
datum = {}
datum["ranks"] = ranks
datum["log_levels"] = log_levels
datum["options"] = options
response = self.__submit_request( "/logger", datum )
if not _Util.is_ok( response ): # problem setting the log levels
raise GPUdbException( "Problem setting the log levels: "
+ _Util.get_error_msg( response ) )
return AttrDict( response )
# end logger
[docs] def set_server_logger_level(self, ranks, log_levels, options = {}):
"""Convenience function to change log levels of some
or all GPUdb ranks.
Parameters:
ranks (list of ints)
A list containing the ranks to which to apply the new log levels.
log_levels (dict of str to str)
A map where the keys dictate which log's levels to change, and the
values dictate what the new log levels will be.
options (dict of str to str)
Optional parameters. Default value is an empty dict ( {} ).
Returns:
A dict with the following entries--
status (str)
The status of the endpoint ('OK' or 'ERROR').
log_levels (map of str to str)
A map of each log level to its respective value
"""
self.logger(ranks, log_levels, options)
# end set_server_logger_level
[docs] def set_client_logger_level( self, log_level ):
"""Set the log level for the client GPUdb class.
Parameters:
log_level (int, long, or str)
A valid log level for the logging module
"""
try:
self.log.setLevel( log_level )
except (ValueError, TypeError, Exception) as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbException("Invalid log level: '{}'".format( ex_str ))
# end set_client_logger_level
# Helper function to emulate old /add (single object insert) capability
def insert_object(self, set_id, object_data, params=None):
if (params):
return self.insert_records(set_id, [object_data], None, params)
else:
return self.insert_records(set_id, [object_data], None, {"return_record_ids":"true"})
[docs] def insert_records_from_json(self, json_records, table_name, json_options = None, create_table_options = None, options = None ):
"""Method to insert a single JSON record or an array of JSON records passed in as a string.
Parameters:
json_records (str) : Either a single JSON record or an array of JSON records (as string). Mandatory.
table_name (str) : The name of the table to insert into.
json_options (dict) : Only valid option is *validate* which could be True or False
create_table_options (dict) : Same options as the *create_table_options* in :meth:`GPUdb.insert_records_from_payload` endpoint
options (dict) : Same options as *options* in :meth:`GPUdb.insert_records_from_payload` endpoint
Example
::
response = gpudb.insert_records_from_json(records, "test_insert_records_json", json_options={'validate': True}, create_table_options={'truncate_table': 'true'})
response_object = json.loads(response)
print(response_object['data']['count_inserted'])
.. seealso:: :meth:`GPUdb.insert_records_from_payload`
"""
if json_records is None or type(json_records) != str:
raise GPUdbException("'json_records' must be a parameter of type 'str' and is mandatory")
if len(json_records) == 0:
raise GPUdbException("'json_records' must be a valid json and cannot be empty")
if table_name is None or type(table_name) != str or len(table_name) == 0:
raise GPUdbException("'table_name' must be a valid non-empty string")
if json_options and 'validate' in json_options and json_options['validate']:
if not GPUdb.valid_json( json_records):
raise GPUdbException("'json_records' passed in is not a valid JSON")
if create_table_options is None :
create_table_options = {}
if options is None or not options:
options = {'table_name': table_name}
# overwrite the value
options['table_name'] = table_name
combined_options = options if create_table_options is None or not create_table_options else GPUdb.merge_dicts( options, create_table_options )
query_string = urlencode(combined_options)
final_endpoint = "/insert/records/json?{}".format(query_string)
return self.__submit_request_json( final_endpoint, json_records )
[docs] def get_records_json(self, table_name, column_names = None, offset = 0, limit = -9999, expression = None, orderby_columns = None, having_clause = None):
""" This method is used to retrieve records from a Kinetica table in the form of
a JSON array (stringified). The only mandatory parameter is the 'tableName'.
The rest are all optional with suitable defaults wherever applicable.
Args:
table_name (str): Name of the table
column_names (list): the columns names to retrieve
offset (int): the offset to start from - default 0
limit (int): the maximum number of records - default GPUdb.END_OF_SET
expression (str): the filter expression
orderby_columns (list): the list of columns to order by
having_clause (str): the having clause
Returns:
The response string (JSON)
Raises:
GPUdbException: On detecting invalid parameters or some other internal errors
Example
::
resp = gpudb.get_records_json("table_name")
json_object = json.loads(resp)
print(json_object["data"]["records"])
"""
if table_name is None or type(table_name) != str or len(table_name) == 0:
raise GPUdbException("'table_name' must be a valid non-empty string")
if column_names is not None and type(column_names) != list:
raise GPUdbException("'column_names' must be of type 'list'")
if orderby_columns is not None and type(orderby_columns) != list:
raise GPUdbException("'orderby_columns' must be of type 'list'")
if offset is not None and type(offset) != int:
raise GPUdbException("'offset' must be of type 'int'")
if limit is not None and type(limit) != int:
raise GPUdbException("'limit' must be of type 'int'")
if expression is not None and type(expression) != str:
raise GPUdbException("'expression' must be of type 'str'")
if having_clause is not None and type(having_clause) != str:
raise GPUdbException("'having_clause' must be of type 'str'")
get_records_json_options = {'table_name': table_name}
if column_names is not None and len(column_names) != 0:
get_records_json_options['column_names'] = ','.join(column_names)
offset = 0 if (offset is None or offset < 0) else offset
limit = GPUdb.END_OF_SET if (limit is None or limit < 0) else limit
get_records_json_options['offset'] = offset
get_records_json_options['limit'] = limit
if expression is not None and expression != "":
get_records_json_options['expression'] = expression
if orderby_columns is not None and len(orderby_columns) != 0:
get_records_json_options['order_by'] = ','.join(orderby_columns)
if having_clause is not None and having_clause != "":
get_records_json_options['having'] = having_clause
query_string = urlencode(get_records_json_options)
final_endpoint = "/get/records/json?{}".format(query_string)
return self.__submit_request_json_without_body( final_endpoint )
# Helper for dynamic schema responses
def parse_dynamic_response(self, retobj, do_print=False, convert_nulls = True):
if (retobj['status_info']['status'] == 'ERROR'):
return retobj
my_schema = schema.parse(retobj['response_schema_str'])
fields = eval(retobj['response_schema_str'])['fields']
nullable = [type(x['type']['items']) != str for x in fields]
if len(retobj['binary_encoded_response']) > 0:
data = retobj['binary_encoded_response']
# Use the python avro package to decode the data
decoded = _Util.decode_binary_data( my_schema, data )
# Translate the column names
column_lookup = decoded['column_headers']
translated = collections.OrderedDict()
for i,(n,column_name) in enumerate(zip(nullable,column_lookup)):
if (n and convert_nulls): # nullable - replace None with '<NULL>'
col = [x if x is not None else '<NULL>' for x in decoded['column_%d'%(i+1)]]
else:
col = decoded['column_%d'%(i+1)]
# end if
translated[column_name] = col
# end loop
# # TODO: For 7.0, use the following block of code instead of
# # the above block (which will now go inside the if block.
# if "record_type" not in retobj:
# # Use the python avro package to decode the data
# decoded = _Util.decode_binary_data( my_schema, data )
# # Translate the column names
# column_lookup = decoded['column_headers']
# translated = collections.OrderedDict()
# for i,(n,column_name) in enumerate(zip(nullable,column_lookup)):
# if (n and convert_nulls): # nullable - replace None with '<NULL>'
# col = [x if x is not None else '<NULL>' for x in decoded['column_%d'%(i+1)]]
# else:
# col = decoded['column_%d'%(i+1)]
# # end if
# translated[column_name] = col
# # end loop
# else: # use the c-extension for avro decoding
# record_type = retobj["record_type"]
# if not isinstance( record_type, RecordType ):
# raise GPUdbException( "'record_type' must be a RecordType object; given {}"
# "".format( str(type( record_type )) ) )
# records = record_type.decode_dynamic_records( data )
# # For 6.2, return column-major data
# # TODO: For 7.0, just return records, maybe
# translated = GPUdbRecord.transpose_data_to_col_major( records )
# # end if
retobj['response'] = translated
else: # JSON encoding
retobj['response'] = collections.OrderedDict()
#note running eval here returns a standard (unordered) dict
#d_resp = eval(retobj['json_encoded_response'])
d_resp = json.loads(retobj['json_encoded_response'])
column_lookup = d_resp['column_headers']
for i,(n,column_name) in enumerate(zip(nullable,column_lookup)):
column_index_name = 'column_%d'%(i+1)
#double/float conversion here
#get the datatype of the underlying data
data_type = my_schema.fields_dict[column_index_name].type.items.type
if (data_type == 'double' or data_type == 'float'):
retobj['response'][column_name] = [float(x) for x in d_resp[column_index_name]]
else:
retobj['response'][column_name] = d_resp[column_index_name]
if (n and convert_nulls): # nullable
retobj['response'][column_name] = [x if x is not None else '<NULL>' for x in retobj['response'][column_name]]
if (do_print):
print(tabulate(retobj['response'],headers='keys',tablefmt='psql'))
return AttrDict( retobj )
# end parse_dynamic_response
[docs] def wms( self, wms_params, url = None ):
"""Submits a WMS call to the server.
Parameters:
wms_params (str)
A string containing the WMS endpoint parameters, not containing
the '/wms' endpoint itself.
url (str or GPUdb.URL)
An optional URL to which we submit the /wms endpoint. If None
given, use the current URL for this :class:`GPUdb` object.
Returns:
A dict with the following entries--
data
The /wms content.
status_info (dict)
A dict containing more information regarding the request. Keys:
* **status**
* **message**
* **response_time**
"""
# Validate the input arguments
if not url:
url = self.get_url( stringified = False )
elif isinstance( url, (basestring, unicode) ):
try:
url = GPUdb.URL( url )
except Exception as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbException( "Error parsing given URL '{}': {}"
"".format( url, ex_str) )
elif not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object, a string, or None;"
" given '{}'".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
if not wms_params:
msg = ("Argument 'wms_params' must be a string; "
"given '{}'".format( str(wms_params) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# Make sure that it starts with ?
if not wms_params.startswith( "?" ):
wms_params = "?" + wms_params
# end if
http_conn = self.__initialize_http_connection( url, self.timeout )
# WMS is a get, unlike all endpoints which are post
headers = {
C._HEADER_ACCEPT: C._REQUEST_ENCODING_JSON
}
wms_path = "{url_path}/wms{params}".format( url_path = url.path,
params = wms_params )
# Start shaping up the response
result = {}
status_info = {}
status_info['message'] = ''
# Actually submit the /wms request
try:
# Send the get request
http_conn.request( C._REQUEST_GET, wms_path, "", headers )
# Process the response
raw_response = http_conn.getresponse()
# Save the response
result["data"] = raw_response.read()
# Save ancillary information
status_info["status"] = "OK"
status_info["response_time"] = raw_response.getheader( "x-request-time-secs" )
except Exception as ex:
# Save the error status and message
status_info["status"] = "ERROR"
status_info["message"] = GPUdbException.stringify_exception( ex )
status_info["response_time"] = raw_response.getheader( "x-request-time-secs" )
# end try
result[ "status_info" ] = status_info
return AttrDict( result )
# end wms( url )
[docs] def ping( self, url ):
"""Pings the given URL and returns the response. If no response,
returns an empty string.
Parameters:
url (GPUdb.URL)
The URL which we are supposed to ping.
Returns:
The ping response, or an empty string if it fails.
"""
# Validate the input arguments
if isinstance( url, (basestring, unicode) ):
try:
url = GPUdb.URL( url )
except Exception as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbException( "Error parsing given URL '{}': {}"
"".format( url, ex_str ) )
elif not isinstance( url, GPUdb.URL ):
msg = ("Argument 'url' must be a GPUdb.URL object or a string; "
"given '{}'".format( str(type(url)) ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
try:
http_conn = self.__initialize_http_connection( url, self.__server_connection_timeout )
# Ping is a get, unlike all endpoints which are post
headers = {
C._HEADER_ACCEPT: C._REQUEST_ENCODING_JSON
}
http_conn.request( C._REQUEST_GET, url.path, "", headers )
# Get the ping response
response = http_conn.getresponse()
raw_data = response.read()
# Decode the response, possibly bytes, into string
if isinstance( raw_data, (basestring, unicode) ):
# Got a string, no need to decoded
return raw_data
elif isinstance( raw_data, bytes ):
return raw_data.decode("utf-8")
else:
raise GPUdbException( "Unhandled response {} with type {}"
"".format( raw_data,
str(type(raw_data)) ) )
# end if
except Exception as ex:
ex_str = GPUdbException.stringify_exception( ex )
self.__log_debug( "Got error while pinging: {}".format( ex_str ) )
return ""
# end try
# end ping( url )
if IS_PYTHON_3:
@deprecated
def is_kinetica_running( self, url ):
"""Verifies that GPUdb is running at the given URL (does not do any HA
failover).
Parameters:
url (GPUdb.URL)
The URL which we are supposed to ping.
Returns:
True if Kinetica is running at that URL, False otherwise.
"""
ping_response = self.ping( url )
self.__log_debug( "HTTP server @ {} responded with '{}'"
"".format( str(url), ping_response ) )
if ( ping_response == C._KINETICA_IS_RUNNING ):
# Kinetica IS running!
return True
# end if
# Did not get the expected response
return False
# end is_kinetica_running
else:
[docs] def is_kinetica_running( self, url ):
"""Verifies that GPUdb is running at the given URL (does not do any HA
failover).
Parameters:
url (GPUdb.URL)
The URL which we are supposed to ping.
Returns:
True if Kinetica is running at that URL, False otherwise.
"""
warnings.warn("deprecated", DeprecationWarning)
ping_response = self.ping( url )
self.__log_debug( "HTTP server @ {} responded with '{}'"
"".format( str(url), ping_response ) )
if ( ping_response == C._KINETICA_IS_RUNNING ):
# Kinetica IS running!
return True
# end if
# Did not get the expected response
return False
# end is_kinetica_running
# end get_server_debug_information
[docs] def to_df(self,
sql,
sql_params = [],
batch_size = 5000,
sql_opts = {},
show_progress = False):
"""Runs the given query and converts the result to a Pandas Data Frame.
Parameters:
sql (str)
The SQL query to run
sql_params (list)
The SQL parameters that will be substituted for tokens (e.g. $1 $2)
batch_size (int)
The number of records to retrieve at a time from the database
sql_opts (dict)
The options for SQL execution, matching the options passed to
:meth:`GPUdb.execute_sql`. Defaults to None.
show_progress (bool)
Whether to display progress on the console or not. Defaults to False.
Raises:
GPUdbException:
Returns:
pd.DataFrame: A Pandas Data Frame containing the result set of the SQL query or None if
there are no results
"""
from . import gpudb_dataframe
return gpudb_dataframe.DataFrameUtils.sql_to_df(self, sql, sql_params, batch_size, sql_opts, show_progress)
# end to_df
[docs] def query(self, sql, batch_size = 5000, sql_params = [], sql_opts = {}):
"""Execute a SQL query and return a GPUdbSqlIterator
Parameters:
sql (str)
The SQL query to run
batch_size(int)
The number of records to retrieve at a time from the database
sql_params(list of native types)
The SQL parameters that will be substituted for tokens (e.g. $1 $2)
sql_opts(dict)
The options for SQL execution, matching the options passed to
:meth:`GPUdb.execute_sql`. Defaults to None.
Returns:
An instance of GPUdbSqlIterator.
"""
from . import gpudb_sql_iterator
sql_iterator = gpudb_sql_iterator.GPUdbSqlIterator(db=self,
sql=sql,
batch_size=batch_size,
sql_params=sql_params,
sql_opts=sql_opts)
return sql_iterator
# end query
[docs] def query_one(self, sql, sql_params = [], sql_opts = {}):
"""Execute a SQL query that returns only one row.
Parameters:
sql (str)
The SQL query to run
sql_params(list of native types)
The SQL parameters that will be substituted for tokens (e.g. $1 $2)
sql_opts(dict)
The options for SQL execution, matching the options passed to
:meth:`GPUdb.execute_sql`. Defaults to None.
Returns:
The returned row or None.
"""
from . import gpudb_sql_iterator
with gpudb_sql_iterator.GPUdbSqlIterator(db=self,
sql=sql,
sql_params=sql_params,
batch_size=2,
sql_opts=sql_opts) as sql_iterator:
if(sql_iterator.total_count == 0):
return None
elif(sql_iterator.total_count > 1):
raise GPUdbException("More than one result was returned")
row = sql_iterator.__next__()
return row
# end query_one
[docs] def execute(self, sql, sql_params = [], sql_opts = {}):
"""Execute a SQL query and return the row count.
Parameters:
sql (str)
The SQL to execute
sql_params(list of native types)
The SQL parameters that will be substituted for tokens (e.g. $1 $2)
sql_opts(dict)
The options for SQL execution, matching the options passed to
:meth:`GPUdb.execute_sql`. Defaults to None.
Returns:
Number of records affected
"""
from . import gpudb_sql_iterator
GPUdb._set_sql_params(sql_opts, sql_params)
response = self.execute_sql(statement=sql, options=sql_opts)
GPUdb._check_error(response)
count_affected = response['count_affected']
return count_affected
# end execute
@classmethod
def _set_sql_params(cls, sql_opts, sql_params):
"""Convert SQL parameters to JSON and set as an option for execute_sql_and_decode()
Parameters:
sql_opts (dict)
The parameter list that will be appended to.
sql_params (list of native types)
The SQL parameters that will be substituted for tokens (e.g. $1 $2)
"""
if (len(sql_params) == 0):
return
for idx, item in enumerate(sql_params):
if (isinstance(item, list)):
# assume that list type is vector
sql_params[idx] = str(item)
json_params = json.dumps(sql_params)
sql_opts['query_parameters'] = json.dumps(sql_params)
[docs] @staticmethod
def get_connection(
enable_ssl_cert_verification = False,
enable_auto_discovery = False,
enable_failover = False,
logging_level = 'INFO'):
""" Get a connection to Kinetica getting connection and authentication
information from environment variables.
This method is useful particularly for Jupyter notebooks, which won't
need authentication credentials embedded within them. This, in turn,
helps to prevent commit of credentials to the notebook version control.
In addition, some features including auto-discovery and SSL certificate
verification are disabled by default to simplify connections for simple
use cases.
The following environment variables are required:
- `KINETICA_URL`: the url of the Kinetica server
- `KINETICA_USER`: the username to connect with
- `KINETICA_PASSWD`: the password to connect with
Parameters:
enable_ssl_cert_verification (bool):
Enable SSL certificate verification.
enable_auto_discovery (bool):
Enable auto-discovery of the initial cluster nodes, as well as
any attached failover clusters. This allows for both multi-head
ingestion & key lookup, as well as cluster failover.
enable_failover (bool):
Enable failover to another cluster.
logging_level (str):
Logging level for the connection. (INFO by default)
Returns (GPUdb):
An active connection to Kinetica.
"""
ENV_URL = 'KINETICA_URL'
ENV_USER = 'KINETICA_USER'
ENV_PASS = 'KINETICA_PASSWD'
ENV_NOT_FOUND_ERROR = 'Environment variable <{}> needs to be set when connecting with get_connection()'
if ENV_URL in os.environ:
url = os.environ[ENV_URL]
else:
raise GPUdbException(ENV_NOT_FOUND_ERROR.format( ENV_URL ))
if ENV_USER in os.environ:
user = os.environ[ENV_USER]
else:
kdbc.__log_warn("Attempting to log in with no username set in environment variable <{}>!".format(ENV_USER))
if ENV_PASS in os.environ:
passwd = os.environ[ENV_PASS]
else:
kdbc.__log_warn("Attempting to log in with no password set in environment variable <{}>!".format(ENV_PASS))
options = GPUdb.Options()
options.username = user
options.password = passwd
options.skip_ssl_cert_verification = not enable_ssl_cert_verification
options.disable_auto_discovery = not enable_auto_discovery
options.disable_failover = not enable_failover
options.logging_level = logging_level
kdbc = GPUdb(host = url, options = options)
kdbc.__log_info("Connected to Kinetica! (host={} api={} server={})".format(kdbc.get_url(), kdbc.api_version, str(kdbc.server_version)))
return kdbc
# end get_connection
# ------------- END convenience functions ------------------------------------
# -----------------------------------------------------------------------
# Begin autogenerated functions
# -----------------------------------------------------------------------
[docs] def load_gpudb_schemas( self ):
"""Saves all request and response schemas for GPUdb queries
in a lookup table (lookup by query name).
"""
self.gpudb_schemas = {}
name = "gpudb_response"
RSP_SCHEMA_STR = """{"type":"record","name":"gpudb_response","fields":[{"name":"status","type":"string"},{"name":"message","type":"string"},{"name":"data_type","type":"string"},{"name":"data","type":"bytes"},{"name":"data_str","type":"string"}]}"""
RSP_SCHEMA = Schema( "record", [("status", "string"), ("message", "string"), ("data_type", "string"), ("data", "object"), ("data_str", "string")] )
self.gpudb_schemas[ name ] = { "RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"RSP_SCHEMA" : RSP_SCHEMA }
name = "trigger_notification"
RSP_SCHEMA_STR = """{"type":"record","name":"trigger_notification","fields":[{"name":"trigger_id","type":"string"},{"name":"set_id","type":"string"},{"name":"object_id","type":"string"},{"name":"object_data","type":"bytes"}]}"""
RSP_SCHEMA = Schema( "record", [("trigger_id", "string"), ("set_id", "string"), ("object_id", "string"), ("object_data", "bytes")] )
self.gpudb_schemas[ name ] = { "RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"RSP_SCHEMA" : RSP_SCHEMA }
name = "/admin/add/host"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_add_host_request","fields":[{"name":"host_address","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_add_host_response","fields":[{"name":"added_host","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("host_address", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("added_host", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/add/host"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/add/ranks"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_add_ranks_request","fields":[{"name":"hosts","type":{"type":"array","items":"string"}},{"name":"config_params","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_add_ranks_response","fields":[{"name":"added_ranks","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("hosts", "array", [("string")]), ("config_params", "array", [("map", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("added_ranks", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/admin/add/ranks"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/alter/configuration"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_configuration_request","fields":[{"name":"config_string","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_configuration_response","fields":[{"name":"status","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("config_string", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("status", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/alter/configuration"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/alter/host"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_host_request","fields":[{"name":"host","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_host_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("host", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/alter/host"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/alter/jobs"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_jobs_request","fields":[{"name":"job_ids","type":{"type":"array","items":"long"}},{"name":"action","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_jobs_response","fields":[{"name":"job_ids","type":{"type":"array","items":"long"}},{"name":"action","type":"string"},{"name":"status","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("job_ids", "array", [("long")]), ("action", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("job_ids", "array", [("long")]), ("action", "string"), ("status", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/admin/alter/jobs"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/alter/shards"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_alter_shards_request","fields":[{"name":"version","type":"long"},{"name":"use_index","type":"boolean"},{"name":"rank","type":{"type":"array","items":"int"}},{"name":"tom","type":{"type":"array","items":"int"}},{"name":"index","type":{"type":"array","items":"int"}},{"name":"backup_map_list","type":{"type":"array","items":"int"}},{"name":"backup_map_values","type":{"type":"array","items":{"type":"array","items":"int"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_alter_shards_response","fields":[{"name":"version","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("version", "long"), ("use_index", "boolean"), ("rank", "array", [("int")]), ("tom", "array", [("int")]), ("index", "array", [("int")]), ("backup_map_list", "array", [("int")]), ("backup_map_values", "array", [("array", [("int")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("version", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/alter/shards"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/backup/begin"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_backup_begin_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_backup_begin_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/backup/begin"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/backup/end"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_backup_end_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_backup_end_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/backup/end"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/ha/refresh"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_ha_refresh_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_ha_refresh_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/ha/refresh"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/offline"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_offline_request","fields":[{"name":"offline","type":"boolean"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_offline_response","fields":[{"name":"is_offline","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("offline", "boolean"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("is_offline", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/offline"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/rebalance"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_rebalance_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_rebalance_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/rebalance"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/remove/host"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_remove_host_request","fields":[{"name":"host","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_remove_host_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("host", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/remove/host"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/remove/ranks"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_remove_ranks_request","fields":[{"name":"ranks","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_remove_ranks_response","fields":[{"name":"removed_ranks","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("ranks", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("removed_ranks", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/admin/remove/ranks"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/show/alerts"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_alerts_request","fields":[{"name":"num_alerts","type":"int"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_alerts_response","fields":[{"name":"timestamps","type":{"type":"array","items":"string"}},{"name":"types","type":{"type":"array","items":"string"}},{"name":"params","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("num_alerts", "int"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("timestamps", "array", [("string")]), ("types", "array", [("string")]), ("params", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/admin/show/alerts"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/show/cluster/operations"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_cluster_operations_request","fields":[{"name":"history_index","type":"int"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_cluster_operations_response","fields":[{"name":"history_index","type":"int"},{"name":"history_size","type":"int"},{"name":"in_progress","type":"boolean"},{"name":"start_time","type":"string"},{"name":"end_time","type":"string"},{"name":"endpoint","type":"string"},{"name":"endpoint_schema","type":"string"},{"name":"overall_status","type":"string"},{"name":"user_stopped","type":"boolean"},{"name":"percent_complete","type":"int"},{"name":"dry_run","type":"boolean"},{"name":"messages","type":{"type":"array","items":"string"}},{"name":"add_ranks","type":"boolean"},{"name":"add_ranks_status","type":"string"},{"name":"ranks_being_added","type":{"type":"array","items":"int"}},{"name":"rank_hosts","type":{"type":"array","items":"string"}},{"name":"add_ranks_percent_complete","type":"int"},{"name":"remove_ranks","type":"boolean"},{"name":"remove_ranks_status","type":"string"},{"name":"ranks_being_removed","type":{"type":"array","items":"int"}},{"name":"remove_ranks_percent_complete","type":"int"},{"name":"rebalance","type":"boolean"},{"name":"rebalance_unsharded_data","type":"boolean"},{"name":"rebalance_unsharded_data_status","type":"string"},{"name":"unsharded_rebalance_percent_complete","type":"int"},{"name":"rebalance_sharded_data","type":"boolean"},{"name":"shard_array_version","type":"long"},{"name":"rebalance_sharded_data_status","type":"string"},{"name":"num_shards_changing","type":"int"},{"name":"sharded_rebalance_percent_complete","type":"int"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("history_index", "int"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("history_index", "int"), ("history_size", "int"), ("in_progress", "boolean"), ("start_time", "string"), ("end_time", "string"), ("endpoint", "string"), ("endpoint_schema", "string"), ("overall_status", "string"), ("user_stopped", "boolean"), ("percent_complete", "int"), ("dry_run", "boolean"), ("messages", "array", [("string")]), ("add_ranks", "boolean"), ("add_ranks_status", "string"), ("ranks_being_added", "array", [("int")]), ("rank_hosts", "array", [("string")]), ("add_ranks_percent_complete", "int"), ("remove_ranks", "boolean"), ("remove_ranks_status", "string"), ("ranks_being_removed", "array", [("int")]), ("remove_ranks_percent_complete", "int"), ("rebalance", "boolean"), ("rebalance_unsharded_data", "boolean"), ("rebalance_unsharded_data_status", "string"), ("unsharded_rebalance_percent_complete", "int"), ("rebalance_sharded_data", "boolean"), ("shard_array_version", "long"), ("rebalance_sharded_data_status", "string"), ("num_shards_changing", "int"), ("sharded_rebalance_percent_complete", "int"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/show/cluster/operations"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/show/configuration"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_configuration_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_configuration_response","fields":[{"name":"config_string","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("config_string", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/show/configuration"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/show/jobs"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_jobs_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_jobs_response","fields":[{"name":"job_id","type":{"type":"array","items":"long"}},{"name":"status","type":{"type":"array","items":"string"}},{"name":"endpoint_name","type":{"type":"array","items":"string"}},{"name":"time_received","type":{"type":"array","items":"long"}},{"name":"auth_id","type":{"type":"array","items":"string"}},{"name":"source_ip","type":{"type":"array","items":"string"}},{"name":"user_data","type":{"type":"array","items":"string"}},{"name":"flags","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("job_id", "array", [("long")]), ("status", "array", [("string")]), ("endpoint_name", "array", [("string")]), ("time_received", "array", [("long")]), ("auth_id", "array", [("string")]), ("source_ip", "array", [("string")]), ("user_data", "array", [("string")]), ("flags", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/admin/show/jobs"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/show/shards"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_show_shards_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_show_shards_response","fields":[{"name":"version","type":"long"},{"name":"rank","type":{"type":"array","items":"int"}},{"name":"tom","type":{"type":"array","items":"int"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("version", "long"), ("rank", "array", [("int")]), ("tom", "array", [("int")]), ("info", "map", [("string")])] )
ENDPOINT = "/admin/show/shards"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/shutdown"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_shutdown_request","fields":[{"name":"exit_type","type":"string"},{"name":"authorization","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_shutdown_response","fields":[{"name":"exit_status","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("exit_type", "string"), ("authorization", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("exit_status", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/shutdown"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/switchover"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_switchover_request","fields":[{"name":"processes","type":{"type":"array","items":"string"}},{"name":"destinations","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_switchover_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("processes", "array", [("string")]), ("destinations", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/admin/switchover"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/admin/verifydb"
REQ_SCHEMA_STR = """{"type":"record","name":"admin_verify_db_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"admin_verify_db_response","fields":[{"name":"verified_ok","type":"boolean"},{"name":"error_list","type":{"type":"array","items":"string"}},{"name":"orphaned_tables_total_size","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("verified_ok", "boolean"), ("error_list", "array", [("string")]), ("orphaned_tables_total_size", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/admin/verifydb"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/convexhull"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_convex_hull_request","fields":[{"name":"table_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_convex_hull_response","fields":[{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"count","type":"int"},{"name":"is_valid","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("x_column_name", "string"), ("y_column_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("x_vector", "array", [("double")]), ("y_vector", "array", [("double")]), ("count", "int"), ("is_valid", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/convexhull"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/groupby"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_group_by_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_group_by_response","fields":[{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/groupby"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/histogram"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_histogram_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"start","type":"double"},{"name":"end","type":"double"},{"name":"interval","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_histogram_response","fields":[{"name":"counts","type":{"type":"array","items":"double"}},{"name":"start","type":"double"},{"name":"end","type":"double"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("start", "double"), ("end", "double"), ("interval", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("counts", "array", [("double")]), ("start", "double"), ("end", "double"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/histogram"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/kmeans"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_k_means_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"k","type":"int"},{"name":"tolerance","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_k_means_response","fields":[{"name":"means","type":{"type":"array","items":{"type":"array","items":"double"}}},{"name":"counts","type":{"type":"array","items":"long"}},{"name":"rms_dists","type":{"type":"array","items":"double"}},{"name":"count","type":"long"},{"name":"rms_dist","type":"double"},{"name":"tolerance","type":"double"},{"name":"num_iters","type":"int"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("k", "int"), ("tolerance", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("means", "array", [("array", [("double")])]), ("counts", "array", [("long")]), ("rms_dists", "array", [("double")]), ("count", "long"), ("rms_dist", "double"), ("tolerance", "double"), ("num_iters", "int"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/kmeans"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/minmax"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_response","fields":[{"name":"min","type":"double"},{"name":"max","type":"double"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("min", "double"), ("max", "double"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/minmax"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/minmax/geometry"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_min_max_geometry_response","fields":[{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/minmax/geometry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/statistics"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"stats","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_response","fields":[{"name":"stats","type":{"type":"map","values":"double"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("stats", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("stats", "map", [("double")]), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/statistics"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/statistics/byrange"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_by_range_request","fields":[{"name":"table_name","type":"string"},{"name":"select_expression","type":"string"},{"name":"column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"stats","type":"string"},{"name":"start","type":"double"},{"name":"end","type":"double"},{"name":"interval","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_statistics_by_range_response","fields":[{"name":"stats","type":{"type":"map","values":{"type":"array","items":"double"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("select_expression", "string"), ("column_name", "string"), ("value_column_name", "string"), ("stats", "string"), ("start", "double"), ("end", "double"), ("interval", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("stats", "map", [("array", [("double")])]), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/statistics/byrange"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/unique"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_unique_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_unique_response","fields":[{"name":"table_name","type":"string"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"has_more_records","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/unique"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/aggregate/unpivot"
REQ_SCHEMA_STR = """{"type":"record","name":"aggregate_unpivot_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"variable_column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"pivoted_columns","type":{"type":"array","items":"string"}},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"aggregate_unpivot_response","fields":[{"name":"table_name","type":"string"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("variable_column_name", "string"), ("value_column_name", "string"), ("pivoted_columns", "array", [("string")]), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/aggregate/unpivot"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/alter/credential"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_credential_request","fields":[{"name":"credential_name","type":"string"},{"name":"credential_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_credential_response","fields":[{"name":"credential_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("credential_name", "string"), ("credential_updates_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("credential_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/credential"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/datasink"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_datasink_request","fields":[{"name":"name","type":"string"},{"name":"datasink_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_datasink_response","fields":[{"name":"updated_properties_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("datasink_updates_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("updated_properties_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/alter/datasink"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/datasource"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_datasource_request","fields":[{"name":"name","type":"string"},{"name":"datasource_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_datasource_response","fields":[{"name":"updated_properties_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("datasource_updates_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("updated_properties_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/alter/datasource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/directory"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_directory_request","fields":[{"name":"directory_name","type":"string"},{"name":"directory_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_directory_response","fields":[{"name":"directory_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("directory_name", "string"), ("directory_updates_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("directory_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/directory"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/environment"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_environment_request","fields":[{"name":"environment_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_environment_response","fields":[{"name":"environment_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("environment_name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("environment_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/environment"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/graph"
REQ_SCHEMA_STR = """{"name":"alter_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"action","type":"string"},{"name":"action_arg","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"alter_graph_response","type":"record","fields":[{"name":"action","type":"string"},{"name":"action_arg","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("action", "string"), ("action_arg", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("action", "string"), ("action_arg", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/model"
REQ_SCHEMA_STR = """{"name":"alter_model_request","type":"record","fields":[{"name":"model_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"alter_model_response","type":"record","fields":[{"name":"model_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("model_name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("model_name", "string"), ("action", "string"), ("value", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/model"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/resourcegroup"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_resource_group_request","fields":[{"name":"name","type":"string"},{"name":"tier_attributes","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"ranking","type":"string"},{"name":"adjoining_resource_group","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_resource_group_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("tier_attributes", "map", [("map", [("string")])]), ("ranking", "string"), ("adjoining_resource_group", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/resourcegroup"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/role"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_role_request","fields":[{"name":"name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_role_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/role"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/schema"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_schema_request","fields":[{"name":"schema_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_schema_response","fields":[{"name":"schema_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("schema_name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("schema_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/schema"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/system/properties"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_system_properties_request","fields":[{"name":"property_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_system_properties_response","fields":[{"name":"updated_properties_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("property_updates_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("updated_properties_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/alter/system/properties"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/table"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_table_request","fields":[{"name":"table_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_table_response","fields":[{"name":"table_name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"label","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("action", "string"), ("value", "string"), ("type_id", "string"), ("type_definition", "string"), ("properties", "map", [("array", [("string")])]), ("label", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/table/columns"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_table_columns_request","fields":[{"name":"table_name","type":"string"},{"name":"column_alterations","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_table_columns_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"label","type":"string"},{"name":"column_alterations","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_alterations", "array", [("map", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("type_definition", "string"), ("properties", "map", [("array", [("string")])]), ("label", "string"), ("column_alterations", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/alter/table/columns"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/table/metadata"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_table_metadata_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"metadata_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_table_metadata_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"metadata_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("metadata_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("metadata_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/alter/table/metadata"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/tablemonitor"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_table_monitor_request","fields":[{"name":"topic_id","type":"string"},{"name":"monitor_updates_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_table_monitor_response","fields":[{"name":"topic_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("topic_id", "string"), ("monitor_updates_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("topic_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/tablemonitor"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/tier"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_tier_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_tier_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/tier"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/user"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_user_request","fields":[{"name":"name","type":"string"},{"name":"action","type":"string"},{"name":"value","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_user_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("action", "string"), ("value", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/user"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/user/reveal"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_user_reveal_request","fields":[{"name":"user_name","type":"string"},{"name":"password","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"alter_user_reveal_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("user_name", "string"), ("password", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/alter/user/reveal"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/alter/video"
REQ_SCHEMA_STR = """{"type":"record","name":"alter_video_request","fields":[{"name":"path","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"alter_video_response","type":"record","fields":[{"name":"path","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("path", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("path", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/alter/video"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/append/records"
REQ_SCHEMA_STR = """{"type":"record","name":"append_records_request","fields":[{"name":"table_name","type":"string"},{"name":"source_table_name","type":"string"},{"name":"field_map","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"append_records_response","fields":[{"name":"table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("source_table_name", "string"), ("field_map", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/append/records"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/clear/statistics"
REQ_SCHEMA_STR = """{"type":"record","name":"clear_statistics_request","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"clear_statistics_response","fields":[{"name":"table_name","type":"string"},{"name":"column_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("column_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/clear/statistics"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/clear/table"
REQ_SCHEMA_STR = """{"type":"record","name":"clear_table_request","fields":[{"name":"table_name","type":"string"},{"name":"authorization","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"clear_table_response","fields":[{"name":"table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("authorization", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/clear/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/clear/tablemonitor"
REQ_SCHEMA_STR = """{"type":"record","name":"clear_table_monitor_request","fields":[{"name":"topic_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"clear_table_monitor_response","fields":[{"name":"topic_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("topic_id", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("topic_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/clear/tablemonitor"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/clear/trigger"
REQ_SCHEMA_STR = """{"type":"record","name":"clear_trigger_request","fields":[{"name":"trigger_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"clear_trigger_response","fields":[{"name":"trigger_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("trigger_id", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("trigger_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/clear/trigger"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/collect/statistics"
REQ_SCHEMA_STR = """{"type":"record","name":"collect_statistics_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"collect_statistics_response","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/collect/statistics"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/container/registry"
REQ_SCHEMA_STR = """{"type":"record","name":"create_container_registry_request","fields":[{"name":"registry_name","type":"string"},{"name":"uri","type":"string"},{"name":"credential","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_container_registry_response","fields":[{"name":"registry_name","type":"string"},{"name":"entity_id","type":"int"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("registry_name", "string"), ("uri", "string"), ("credential", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("registry_name", "string"), ("entity_id", "int"), ("info", "map", [("string")])] )
ENDPOINT = "/create/container/registry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/credential"
REQ_SCHEMA_STR = """{"type":"record","name":"create_credential_request","fields":[{"name":"credential_name","type":"string"},{"name":"type","type":"string"},{"name":"identity","type":"string"},{"name":"secret","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_credential_response","fields":[{"name":"credential_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("credential_name", "string"), ("type", "string"), ("identity", "string"), ("secret", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("credential_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/credential"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/datasink"
REQ_SCHEMA_STR = """{"type":"record","name":"create_datasink_request","fields":[{"name":"name","type":"string"},{"name":"destination","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_datasink_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("destination", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/datasink"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/datasource"
REQ_SCHEMA_STR = """{"type":"record","name":"create_datasource_request","fields":[{"name":"name","type":"string"},{"name":"location","type":"string"},{"name":"user_name","type":"string"},{"name":"password","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_datasource_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("location", "string"), ("user_name", "string"), ("password", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/datasource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/deltatable"
REQ_SCHEMA_STR = """{"type":"record","name":"create_delta_table_request","fields":[{"name":"delta_table_name","type":"string"},{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_delta_table_response","fields":[{"name":"delta_table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("delta_table_name", "string"), ("table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("delta_table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/deltatable"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/directory"
REQ_SCHEMA_STR = """{"type":"record","name":"create_directory_request","fields":[{"name":"directory_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_directory_response","fields":[{"name":"directory_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("directory_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("directory_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/directory"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/environment"
REQ_SCHEMA_STR = """{"type":"record","name":"create_environment_request","fields":[{"name":"environment_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_environment_response","fields":[{"name":"environment_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("environment_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("environment_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/environment"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/graph"
REQ_SCHEMA_STR = """{"name":"create_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"directed_graph","type":"boolean"},{"name":"nodes","type":{"type":"array","items":"string"}},{"name":"edges","type":{"type":"array","items":"string"}},{"name":"weights","type":{"type":"array","items":"string"}},{"name":"restrictions","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"create_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"num_nodes","type":"long"},{"name":"num_edges","type":"long"},{"name":"edges_ids","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("directed_graph", "boolean"), ("nodes", "array", [("string")]), ("edges", "array", [("string")]), ("weights", "array", [("string")]), ("restrictions", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("num_nodes", "long"), ("num_edges", "long"), ("edges_ids", "array", [("long")]), ("info", "map", [("string")])] )
ENDPOINT = "/create/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/job"
REQ_SCHEMA_STR = """{"type":"record","name":"create_job_request","fields":[{"name":"endpoint","type":"string"},{"name":"request_encoding","type":"string"},{"name":"data","type":"bytes"},{"name":"data_str","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_job_response","fields":[{"name":"job_id","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("endpoint", "string"), ("request_encoding", "string"), ("data", "bytes"), ("data_str", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("job_id", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/create/job"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/jointable"
REQ_SCHEMA_STR = """{"type":"record","name":"create_join_table_request","fields":[{"name":"join_table_name","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"expressions","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_join_table_response","fields":[{"name":"join_table_name","type":"string"},{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("join_table_name", "string"), ("table_names", "array", [("string")]), ("column_names", "array", [("string")]), ("expressions", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("join_table_name", "string"), ("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/create/jointable"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/materializedview"
REQ_SCHEMA_STR = """{"type":"record","name":"create_materialized_view_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_materialized_view_response","fields":[{"name":"table_name","type":"string"},{"name":"view_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("view_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/materializedview"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"create_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"execution_mode","type":"string"},{"name":"files","type":{"type":"map","values":"bytes"}},{"name":"command","type":"string"},{"name":"args","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_proc_response","fields":[{"name":"proc_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("execution_mode", "string"), ("files", "map", [("bytes")]), ("command", "string"), ("args", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("proc_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/projection"
REQ_SCHEMA_STR = """{"type":"record","name":"create_projection_request","fields":[{"name":"table_name","type":"string"},{"name":"projection_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_projection_response","fields":[{"name":"projection_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("projection_name", "string"), ("column_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("projection_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/projection"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/resourcegroup"
REQ_SCHEMA_STR = """{"type":"record","name":"create_resource_group_request","fields":[{"name":"name","type":"string"},{"name":"tier_attributes","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"ranking","type":"string"},{"name":"adjoining_resource_group","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_resource_group_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("tier_attributes", "map", [("map", [("string")])]), ("ranking", "string"), ("adjoining_resource_group", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/resourcegroup"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/role"
REQ_SCHEMA_STR = """{"type":"record","name":"create_role_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_role_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/role"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/schema"
REQ_SCHEMA_STR = """{"type":"record","name":"create_schema_request","fields":[{"name":"schema_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_schema_response","fields":[{"name":"schema_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("schema_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("schema_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/schema"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/statetable"
REQ_SCHEMA_STR = """{"type":"record","name":"create_state_table_request","fields":[{"name":"table_name","type":"string"},{"name":"input_table_name","type":"string"},{"name":"init_table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_state_table_response","fields":[{"name":"table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("input_table_name", "string"), ("init_table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/statetable"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/table"
REQ_SCHEMA_STR = """{"type":"record","name":"create_table_request","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_table_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"is_collection","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("is_collection", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/create/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/table/external"
REQ_SCHEMA_STR = """{"type":"record","name":"create_table_external_request","fields":[{"name":"table_name","type":"string"},{"name":"filepaths","type":{"type":"array","items":"string"}},{"name":"modify_columns","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"create_table_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_table_external_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"type_label","type":"string"},{"name":"type_properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"count_inserted","type":"long"},{"name":"count_skipped","type":"long"},{"name":"count_updated","type":"long"},{"name":"info","type":{"type":"map","values":"string"}},{"name":"files","type":{"type":"array","items":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("filepaths", "array", [("string")]), ("modify_columns", "map", [("map", [("string")])]), ("create_table_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("type_definition", "string"), ("type_label", "string"), ("type_properties", "map", [("array", [("string")])]), ("count_inserted", "long"), ("count_skipped", "long"), ("count_updated", "long"), ("info", "map", [("string")]), ("files", "array", [("string")])] )
ENDPOINT = "/create/table/external"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/tablemonitor"
REQ_SCHEMA_STR = """{"type":"record","name":"create_table_monitor_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_table_monitor_response","fields":[{"name":"topic_id","type":"string"},{"name":"table_name","type":"string"},{"name":"type_schema","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("topic_id", "string"), ("table_name", "string"), ("type_schema", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/tablemonitor"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/trigger/byarea"
REQ_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_area_request","fields":[{"name":"request_id","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_column_name","type":"string"},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_area_response","fields":[{"name":"trigger_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("request_id", "string"), ("table_names", "array", [("string")]), ("x_column_name", "string"), ("x_vector", "array", [("double")]), ("y_column_name", "string"), ("y_vector", "array", [("double")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("trigger_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/trigger/byarea"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/trigger/byrange"
REQ_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_range_request","fields":[{"name":"request_id","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"column_name","type":"string"},{"name":"min","type":"double"},{"name":"max","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_trigger_by_range_response","fields":[{"name":"trigger_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("request_id", "string"), ("table_names", "array", [("string")]), ("column_name", "string"), ("min", "double"), ("max", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("trigger_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/trigger/byrange"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/type"
REQ_SCHEMA_STR = """{"type":"record","name":"create_type_request","fields":[{"name":"type_definition","type":"string"},{"name":"label","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_type_response","fields":[{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"label","type":"string"},{"name":"properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("type_definition", "string"), ("label", "string"), ("properties", "map", [("array", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("type_id", "string"), ("type_definition", "string"), ("label", "string"), ("properties", "map", [("array", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/create/type"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/union"
REQ_SCHEMA_STR = """{"type":"record","name":"create_union_request","fields":[{"name":"table_name","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"input_column_names","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"output_column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_union_response","fields":[{"name":"table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("table_names", "array", [("string")]), ("input_column_names", "array", [("array", [("string")])]), ("output_column_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/union"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/user/external"
REQ_SCHEMA_STR = """{"type":"record","name":"create_user_external_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_user_external_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/user/external"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/user/internal"
REQ_SCHEMA_STR = """{"type":"record","name":"create_user_internal_request","fields":[{"name":"name","type":"string"},{"name":"password","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_user_internal_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("password", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/user/internal"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/user/reveal"
REQ_SCHEMA_STR = """{"type":"record","name":"create_user_reveal_request","fields":[{"name":"user_name","type":"string"},{"name":"password","type":"string"},{"name":"first_name","type":"string"},{"name":"last_name","type":"string"},{"name":"email","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"create_user_reveal_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("user_name", "string"), ("password", "string"), ("first_name", "string"), ("last_name", "string"), ("email", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/create/user/reveal"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/create/video"
REQ_SCHEMA_STR = """{"type":"record","name":"create_video_request","fields":[{"name":"attribute","type":"string"},{"name":"begin","type":"string"},{"name":"duration_seconds","type":"double"},{"name":"end","type":"string"},{"name":"frames_per_second","type":"double"},{"name":"style","type":"string"},{"name":"path","type":"string"},{"name":"style_parameters","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"create_video_response","type":"record","fields":[{"name":"job_id","type":"long"},{"name":"path","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("attribute", "string"), ("begin", "string"), ("duration_seconds", "double"), ("end", "string"), ("frames_per_second", "double"), ("style", "string"), ("path", "string"), ("style_parameters", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("job_id", "long"), ("path", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/create/video"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/directory"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_directory_request","fields":[{"name":"directory_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_directory_response","fields":[{"name":"directory_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("directory_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("directory_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/delete/directory"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/files"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_files_request","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_files_response","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/delete/files"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/graph"
REQ_SCHEMA_STR = """{"name":"delete_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"delete_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/delete/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_proc_response","fields":[{"name":"proc_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("proc_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/delete/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/records"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_records_request","fields":[{"name":"table_name","type":"string"},{"name":"expressions","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_records_response","fields":[{"name":"count_deleted","type":"long"},{"name":"counts_deleted","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("expressions", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count_deleted", "long"), ("counts_deleted", "array", [("long")]), ("info", "map", [("string")])] )
ENDPOINT = "/delete/records"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/resourcegroup"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_resource_group_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_resource_group_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/delete/resourcegroup"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/role"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_role_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_role_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/delete/role"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/delete/user"
REQ_SCHEMA_STR = """{"type":"record","name":"delete_user_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"delete_user_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/delete/user"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/download/files"
REQ_SCHEMA_STR = """{"type":"record","name":"download_files_request","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"read_offsets","type":{"type":"array","items":"long"}},{"name":"read_lengths","type":{"type":"array","items":"long"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"download_files_response","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"file_data","type":{"type":"array","items":"bytes"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("read_offsets", "array", [("long")]), ("read_lengths", "array", [("long")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("file_data", "array", [("bytes")]), ("info", "map", [("string")])] )
ENDPOINT = "/download/files"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/container/registry"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_container_registry_request","fields":[{"name":"registry_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_container_registry_response","fields":[{"name":"registry_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("registry_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("registry_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/container/registry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/credential"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_credential_request","fields":[{"name":"credential_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_credential_response","fields":[{"name":"credential_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("credential_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("credential_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/credential"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/datasink"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_datasink_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_datasink_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/datasink"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/datasource"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_datasource_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_datasource_response","fields":[{"name":"name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/datasource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/environment"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_environment_request","fields":[{"name":"environment_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_environment_response","fields":[{"name":"environment_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("environment_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("environment_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/environment"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/model"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_model_request","fields":[{"name":"model_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_model_response","fields":[{"name":"model_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("model_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("model_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/model"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/drop/schema"
REQ_SCHEMA_STR = """{"type":"record","name":"drop_schema_request","fields":[{"name":"schema_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"drop_schema_response","fields":[{"name":"schema_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("schema_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("schema_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/drop/schema"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/evaluate/model"
REQ_SCHEMA_STR = """{"type":"record","name":"evaluate_model_request","fields":[{"name":"model_name","type":"string"},{"name":"replicas","type":"int"},{"name":"deployment_mode","type":"string"},{"name":"source_table","type":"string"},{"name":"destination_table","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"evaluate_model_response","fields":[{"name":"model_name","type":"string"},{"name":"destination_table","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("model_name", "string"), ("replicas", "int"), ("deployment_mode", "string"), ("source_table", "string"), ("destination_table", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("model_name", "string"), ("destination_table", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/evaluate/model"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/execute/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"execute_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"params","type":{"type":"map","values":"string"}},{"name":"bin_params","type":{"type":"map","values":"bytes"}},{"name":"input_table_names","type":{"type":"array","items":"string"}},{"name":"input_column_names","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"output_table_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"execute_proc_response","fields":[{"name":"run_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("params", "map", [("string")]), ("bin_params", "map", [("bytes")]), ("input_table_names", "array", [("string")]), ("input_column_names", "map", [("array", [("string")])]), ("output_table_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("run_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/execute/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/execute/sql"
REQ_SCHEMA_STR = """{"type":"record","name":"execute_sql_request","fields":[{"name":"statement","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"request_schema_str","type":"string"},{"name":"data","type":{"type":"array","items":"bytes"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"execute_sql_response","fields":[{"name":"count_affected","type":"long"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"},{"name":"paging_table","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("statement", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("request_schema_str", "string"), ("data", "array", [("bytes")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count_affected", "long"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("paging_table", "string"), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("count_affected", "long"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("paging_table", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/execute/sql"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/export/records/tofiles"
REQ_SCHEMA_STR = """{"type":"record","name":"export_records_to_files_request","fields":[{"name":"table_name","type":"string"},{"name":"filepath","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"export_records_to_files_response","fields":[{"name":"table_name","type":"string"},{"name":"count_exported","type":"long"},{"name":"count_skipped","type":"long"},{"name":"files","type":{"type":"array","items":"string"}},{"name":"last_timestamp","type":"long"},{"name":"data_text","type":{"type":"array","items":"string"}},{"name":"data_bytes","type":{"type":"array","items":"bytes"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("filepath", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("count_exported", "long"), ("count_skipped", "long"), ("files", "array", [("string")]), ("last_timestamp", "long"), ("data_text", "array", [("string")]), ("data_bytes", "array", [("bytes")]), ("info", "map", [("string")])] )
ENDPOINT = "/export/records/tofiles"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/export/records/totable"
REQ_SCHEMA_STR = """{"type":"record","name":"export_records_to_table_request","fields":[{"name":"table_name","type":"string"},{"name":"remote_query","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"export_records_to_table_response","fields":[{"name":"table_name","type":"string"},{"name":"count_inserted","type":"long"},{"name":"count_skipped","type":"long"},{"name":"count_updated","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("remote_query", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("count_inserted", "long"), ("count_skipped", "long"), ("count_updated", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/export/records/totable"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"expression","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("expression", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byarea"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_area_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_column_name","type":"string"},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_area_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("x_column_name", "string"), ("x_vector", "array", [("double")]), ("y_column_name", "string"), ("y_vector", "array", [("double")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byarea"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byarea/geometry"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_area_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"x_vector","type":{"type":"array","items":"double"}},{"name":"y_vector","type":{"type":"array","items":"double"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_area_geometry_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("x_vector", "array", [("double")]), ("y_vector", "array", [("double")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byarea/geometry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/bybox"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_box_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"y_column_name","type":"string"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_box_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("x_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("y_column_name", "string"), ("min_y", "double"), ("max_y", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/bybox"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/bybox/geometry"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_box_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_box_geometry_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/bybox/geometry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/bygeometry"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"input_wkt","type":"string"},{"name":"operation","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_geometry_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("input_wkt", "string"), ("operation", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/bygeometry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/bylist"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_list_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_values_map","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_list_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_values_map", "map", [("array", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/bylist"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byradius"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"x_center","type":"double"},{"name":"y_column_name","type":"string"},{"name":"y_center","type":"double"},{"name":"radius","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("x_column_name", "string"), ("x_center", "double"), ("y_column_name", "string"), ("y_center", "double"), ("radius", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byradius"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byradius/geometry"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_geometry_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"x_center","type":"double"},{"name":"y_center","type":"double"},{"name":"radius","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_radius_geometry_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("x_center", "double"), ("y_center", "double"), ("radius", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byradius/geometry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byrange"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_range_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"lower_bound","type":"double"},{"name":"upper_bound","type":"double"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_range_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("lower_bound", "double"), ("upper_bound", "double"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byrange"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byseries"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_series_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"track_id","type":"string"},{"name":"target_track_ids","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_series_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("track_id", "string"), ("target_track_ids", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byseries"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/bystring"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_string_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"expression","type":"string"},{"name":"mode","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_string_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("expression", "string"), ("mode", "string"), ("column_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/bystring"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/bytable"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_table_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"column_name","type":"string"},{"name":"source_table_name","type":"string"},{"name":"source_table_column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_table_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("column_name", "string"), ("source_table_name", "string"), ("source_table_column_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/bytable"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/filter/byvalue"
REQ_SCHEMA_STR = """{"type":"record","name":"filter_by_value_request","fields":[{"name":"table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"is_string","type":"boolean"},{"name":"value","type":"double"},{"name":"value_str","type":"string"},{"name":"column_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"filter_by_value_response","fields":[{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("view_name", "string"), ("is_string", "boolean"), ("value", "double"), ("value_str", "string"), ("column_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/filter/byvalue"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/get/job"
REQ_SCHEMA_STR = """{"type":"record","name":"get_job_request","fields":[{"name":"job_id","type":"long"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"get_job_response","fields":[{"name":"endpoint","type":"string"},{"name":"job_status","type":"string"},{"name":"running","type":"boolean"},{"name":"progress","type":"int"},{"name":"successful","type":"boolean"},{"name":"response_encoding","type":"string"},{"name":"job_response","type":"bytes"},{"name":"job_response_str","type":"string"},{"name":"status_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("job_id", "long"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("endpoint", "string"), ("job_status", "string"), ("running", "boolean"), ("progress", "int"), ("successful", "boolean"), ("response_encoding", "string"), ("job_response", "bytes"), ("job_response_str", "string"), ("status_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/get/job"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/get/records"
REQ_SCHEMA_STR = """{"type":"record","name":"get_records_request","fields":[{"name":"table_name","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"get_records_response","fields":[{"name":"table_name","type":"string"},{"name":"type_name","type":"string"},{"name":"type_schema","type":"string"},{"name":"records_binary","type":{"type":"array","items":"bytes"}},{"name":"records_json","type":{"type":"array","items":"string"}},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_name", "string"), ("type_schema", "string"), ("records_binary", "array", [("bytes")]), ("records_json", "array", [("string")]), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("type_name", "string"), ("type_schema", "string"), ("records_binary", "object_array"), ("records_json", "array", [("string")]), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/get/records"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/get/records/bycolumn"
REQ_SCHEMA_STR = """{"type":"record","name":"get_records_by_column_request","fields":[{"name":"table_name","type":"string"},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"get_records_by_column_response","fields":[{"name":"table_name","type":"string"},{"name":"response_schema_str","type":"string"},{"name":"binary_encoded_response","type":"bytes"},{"name":"json_encoded_response","type":"string"},{"name":"total_number_of_records","type":"long"},{"name":"has_more_records","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("column_names", "array", [("string")]), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "bytes"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("response_schema_str", "string"), ("binary_encoded_response", "object"), ("json_encoded_response", "string"), ("total_number_of_records", "long"), ("has_more_records", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/get/records/bycolumn"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/get/records/byseries"
REQ_SCHEMA_STR = """{"type":"record","name":"get_records_by_series_request","fields":[{"name":"table_name","type":"string"},{"name":"world_table_name","type":"string"},{"name":"offset","type":"int"},{"name":"limit","type":"int"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"get_records_by_series_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"type_names","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"list_records_binary","type":{"type":"array","items":{"type":"array","items":"bytes"}}},{"name":"list_records_json","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("world_table_name", "string"), ("offset", "int"), ("limit", "int"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("type_names", "array", [("string")]), ("type_schemas", "array", [("string")]), ("list_records_binary", "array", [("array", [("bytes")])]), ("list_records_json", "array", [("array", [("string")])]), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("table_names", "array", [("string")]), ("type_names", "array", [("string")]), ("type_schemas", "array", [("string")]), ("list_records_binary", "array", [("object_array")]), ("list_records_json", "array", [("array", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/get/records/byseries"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/get/records/fromcollection"
REQ_SCHEMA_STR = """{"type":"record","name":"get_records_from_collection_request","fields":[{"name":"table_name","type":"string"},{"name":"offset","type":"long"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"get_records_from_collection_response","fields":[{"name":"table_name","type":"string"},{"name":"type_names","type":{"type":"array","items":"string"}},{"name":"records_binary","type":{"type":"array","items":"bytes"}},{"name":"records_json","type":{"type":"array","items":"string"}},{"name":"record_ids","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("offset", "long"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_names", "array", [("string")]), ("records_binary", "array", [("bytes")]), ("records_json", "array", [("string")]), ("record_ids", "array", [("string")]), ("info", "map", [("string")])] )
RSP_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("type_names", "array", [("string")]), ("records_binary", "object_array"), ("records_json", "array", [("string")]), ("record_ids", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/get/records/fromcollection"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"RSP_SCHEMA_CEXT" : RSP_SCHEMA_CEXT,
"ENDPOINT" : ENDPOINT }
name = "/get/vectortile"
REQ_SCHEMA_STR = """{"type":"record","name":"get_vectortile_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"column_names","type":{"type":"array","items":"string"}},{"name":"layers","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"tile_x","type":"int"},{"name":"tile_y","type":"int"},{"name":"zoom","type":"int"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"get_vectortile_response","fields":[{"name":"encoded_data","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("column_names", "array", [("string")]), ("layers", "map", [("array", [("string")])]), ("tile_x", "int"), ("tile_y", "int"), ("zoom", "int"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("encoded_data", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/get/vectortile"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_request","fields":[{"name":"principal","type":"string"},{"name":"object","type":"string"},{"name":"object_type","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_response","fields":[{"name":"principal","type":"string"},{"name":"object","type":"string"},{"name":"object_type","type":"string"},{"name":"permission","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("principal", "string"), ("object", "string"), ("object_type", "string"), ("permission", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("principal", "string"), ("object", "string"), ("object_type", "string"), ("permission", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission/credential"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_credential_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"credential_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_credential_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"credential_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("credential_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("credential_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission/credential"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission/datasource"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_datasource_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"datasource_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_datasource_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"datasource_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("datasource_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("datasource_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission/datasource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission/directory"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_directory_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"directory_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_directory_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"directory_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("directory_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("directory_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission/directory"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_proc_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_proc_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"proc_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("proc_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("proc_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission/system"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_system_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_system_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission/system"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/permission/table"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_permission_table_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"filter_expression","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_permission_table_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"filter_expression","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("filter_expression", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("filter_expression", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/permission/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/grant/role"
REQ_SCHEMA_STR = """{"type":"record","name":"grant_role_request","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"grant_role_response","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("role", "string"), ("member", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("role", "string"), ("member", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/grant/role"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/has/permission"
REQ_SCHEMA_STR = """{"type":"record","name":"has_permission_request","fields":[{"name":"principal","type":"string"},{"name":"object","type":"string"},{"name":"object_type","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"has_permission_response","fields":[{"name":"principal","type":"string"},{"name":"object","type":"string"},{"name":"object_type","type":"string"},{"name":"permission","type":"string"},{"name":"has_permission","type":"boolean"},{"name":"filters","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("principal", "string"), ("object", "string"), ("object_type", "string"), ("permission", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("principal", "string"), ("object", "string"), ("object_type", "string"), ("permission", "string"), ("has_permission", "boolean"), ("filters", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/has/permission"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/has/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"has_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"has_proc_response","fields":[{"name":"proc_name","type":"string"},{"name":"proc_exists","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("proc_name", "string"), ("proc_exists", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/has/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/has/role"
REQ_SCHEMA_STR = """{"type":"record","name":"has_role_request","fields":[{"name":"principal","type":"string"},{"name":"role","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"has_role_response","fields":[{"name":"principal","type":"string"},{"name":"role","type":"string"},{"name":"has_role","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("principal", "string"), ("role", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("principal", "string"), ("role", "string"), ("has_role", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/has/role"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/has/schema"
REQ_SCHEMA_STR = """{"type":"record","name":"has_schema_request","fields":[{"name":"schema_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"has_schema_response","fields":[{"name":"schema_name","type":"string"},{"name":"schema_exists","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("schema_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("schema_name", "string"), ("schema_exists", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/has/schema"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/has/table"
REQ_SCHEMA_STR = """{"type":"record","name":"has_table_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"has_table_response","fields":[{"name":"table_name","type":"string"},{"name":"table_exists","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("table_exists", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/has/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/has/type"
REQ_SCHEMA_STR = """{"type":"record","name":"has_type_request","fields":[{"name":"type_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"has_type_response","fields":[{"name":"type_id","type":"string"},{"name":"type_exists","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("type_id", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("type_id", "string"), ("type_exists", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/has/type"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/import/model"
REQ_SCHEMA_STR = """{"type":"record","name":"import_model_request","fields":[{"name":"model_name","type":"string"},{"name":"registry_name","type":"string"},{"name":"container","type":"string"},{"name":"run_function","type":"string"},{"name":"model_type","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"import_model_response","fields":[{"name":"model_name","type":"string"},{"name":"entity_id","type":"int"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("model_name", "string"), ("registry_name", "string"), ("container", "string"), ("run_function", "string"), ("model_type", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("model_name", "string"), ("entity_id", "int"), ("info", "map", [("string")])] )
ENDPOINT = "/import/model"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/insert/records"
REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_request","fields":[{"name":"table_name","type":"string"},{"name":"list","type":{"type":"array","items":"bytes"}},{"name":"list_str","type":{"type":"array","items":"string"}},{"name":"list_encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_response","fields":[{"name":"record_ids","type":{"type":"array","items":"string"}},{"name":"count_inserted","type":"int"},{"name":"count_updated","type":"int"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("list", "array", [("bytes")]), ("list_str", "array", [("string")]), ("list_encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("record_ids", "array", [("string")]), ("count_inserted", "int"), ("count_updated", "int"), ("info", "map", [("string")])] )
REQ_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("list", "object_array"), ("list_str", "array", [("string")]), ("list_encoding", "string"), ("options", "map", [("string")])] )
ENDPOINT = "/insert/records"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"REQ_SCHEMA_CEXT" : REQ_SCHEMA_CEXT,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/insert/records/fromfiles"
REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_from_files_request","fields":[{"name":"table_name","type":"string"},{"name":"filepaths","type":{"type":"array","items":"string"}},{"name":"modify_columns","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"create_table_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_from_files_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"type_label","type":"string"},{"name":"type_properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"count_inserted","type":"long"},{"name":"count_skipped","type":"long"},{"name":"count_updated","type":"long"},{"name":"info","type":{"type":"map","values":"string"}},{"name":"files","type":{"type":"array","items":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("filepaths", "array", [("string")]), ("modify_columns", "map", [("map", [("string")])]), ("create_table_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("type_definition", "string"), ("type_label", "string"), ("type_properties", "map", [("array", [("string")])]), ("count_inserted", "long"), ("count_skipped", "long"), ("count_updated", "long"), ("info", "map", [("string")]), ("files", "array", [("string")])] )
ENDPOINT = "/insert/records/fromfiles"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/insert/records/frompayload"
REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_from_payload_request","fields":[{"name":"table_name","type":"string"},{"name":"data_text","type":"string"},{"name":"data_bytes","type":"bytes"},{"name":"modify_columns","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"create_table_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_from_payload_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"type_label","type":"string"},{"name":"type_properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"count_inserted","type":"long"},{"name":"count_skipped","type":"long"},{"name":"count_updated","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("data_text", "string"), ("data_bytes", "bytes"), ("modify_columns", "map", [("map", [("string")])]), ("create_table_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("type_definition", "string"), ("type_label", "string"), ("type_properties", "map", [("array", [("string")])]), ("count_inserted", "long"), ("count_skipped", "long"), ("count_updated", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/insert/records/frompayload"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/insert/records/fromquery"
REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_from_query_request","fields":[{"name":"table_name","type":"string"},{"name":"remote_query","type":"string"},{"name":"modify_columns","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"create_table_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_from_query_response","fields":[{"name":"table_name","type":"string"},{"name":"type_id","type":"string"},{"name":"type_definition","type":"string"},{"name":"type_label","type":"string"},{"name":"type_properties","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"count_inserted","type":"long"},{"name":"count_skipped","type":"long"},{"name":"count_updated","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("remote_query", "string"), ("modify_columns", "map", [("map", [("string")])]), ("create_table_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("type_id", "string"), ("type_definition", "string"), ("type_label", "string"), ("type_properties", "map", [("array", [("string")])]), ("count_inserted", "long"), ("count_skipped", "long"), ("count_updated", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/insert/records/fromquery"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/insert/records/random"
REQ_SCHEMA_STR = """{"type":"record","name":"insert_records_random_request","fields":[{"name":"table_name","type":"string"},{"name":"count","type":"long"},{"name":"options","type":{"type":"map","values":{"type":"map","values":"double"}}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"insert_records_random_response","fields":[{"name":"table_name","type":"string"},{"name":"count","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("count", "long"), ("options", "map", [("map", [("double")])])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("count", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/insert/records/random"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/insert/symbol"
REQ_SCHEMA_STR = """{"type":"record","name":"insert_symbol_request","fields":[{"name":"symbol_id","type":"string"},{"name":"symbol_format","type":"string"},{"name":"symbol_data","type":"bytes"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"insert_symbol_response","fields":[{"name":"symbol_id","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("symbol_id", "string"), ("symbol_format", "string"), ("symbol_data", "bytes"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("symbol_id", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/insert/symbol"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/kill/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"kill_proc_request","fields":[{"name":"run_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"kill_proc_response","fields":[{"name":"run_ids","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("run_id", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("run_ids", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/kill/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/list/graph"
REQ_SCHEMA_STR = """{"name":"list_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"list_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"graph_names","type":{"type":"array","items":"string"}},{"name":"num_nodes","type":{"type":"array","items":"long"}},{"name":"num_edges","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("graph_names", "array", [("string")]), ("num_nodes", "array", [("long")]), ("num_edges", "array", [("long")]), ("info", "map", [("string")])] )
ENDPOINT = "/list/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/lock/table"
REQ_SCHEMA_STR = """{"type":"record","name":"lock_table_request","fields":[{"name":"table_name","type":"string"},{"name":"lock_type","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"lock_table_response","fields":[{"name":"lock_type","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("lock_type", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("lock_type", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/lock/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/match/graph"
REQ_SCHEMA_STR = """{"name":"match_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"sample_points","type":{"type":"array","items":"string"}},{"name":"solve_method","type":"string"},{"name":"solution_table","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"match_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"match_score","type":"float"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("sample_points", "array", [("string")]), ("solve_method", "string"), ("solution_table", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("match_score", "float"), ("info", "map", [("string")])] )
ENDPOINT = "/match/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/merge/records"
REQ_SCHEMA_STR = """{"type":"record","name":"merge_records_request","fields":[{"name":"table_name","type":"string"},{"name":"source_table_names","type":{"type":"array","items":"string"}},{"name":"field_maps","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"merge_records_response","fields":[{"name":"table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("source_table_names", "array", [("string")]), ("field_maps", "array", [("map", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/merge/records"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/modify/graph"
REQ_SCHEMA_STR = """{"name":"modify_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"nodes","type":{"type":"array","items":"string"}},{"name":"edges","type":{"type":"array","items":"string"}},{"name":"weights","type":{"type":"array","items":"string"}},{"name":"restrictions","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"modify_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"num_nodes","type":"long"},{"name":"num_edges","type":"long"},{"name":"edges_ids","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("nodes", "array", [("string")]), ("edges", "array", [("string")]), ("weights", "array", [("string")]), ("restrictions", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("num_nodes", "long"), ("num_edges", "long"), ("edges_ids", "array", [("long")]), ("info", "map", [("string")])] )
ENDPOINT = "/modify/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/query/graph"
REQ_SCHEMA_STR = """{"name":"query_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"queries","type":{"type":"array","items":"string"}},{"name":"restrictions","type":{"type":"array","items":"string"}},{"name":"adjacency_table","type":"string"},{"name":"rings","type":"int"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"query_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"adjacency_list_int_array","type":{"type":"array","items":"long"}},{"name":"adjacency_list_string_array","type":{"type":"array","items":"string"}},{"name":"adjacency_list_wkt_array","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("queries", "array", [("string")]), ("restrictions", "array", [("string")]), ("adjacency_table", "string"), ("rings", "int"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("adjacency_list_int_array", "array", [("long")]), ("adjacency_list_string_array", "array", [("string")]), ("adjacency_list_wkt_array", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/query/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/repartition/graph"
REQ_SCHEMA_STR = """{"name":"repartition_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"repartition_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("info", "map", [("string")])] )
ENDPOINT = "/repartition/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/reserve/resource"
REQ_SCHEMA_STR = """{"type":"record","name":"reserve_resource_request","fields":[{"name":"component","type":"string"},{"name":"name","type":"string"},{"name":"action","type":"string"},{"name":"bytes_requested","type":"long"},{"name":"owner_id","type":"long"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"reserve_resource_response","fields":[{"name":"component","type":"string"},{"name":"name","type":"string"},{"name":"reservation","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("component", "string"), ("name", "string"), ("action", "string"), ("bytes_requested", "long"), ("owner_id", "long"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("component", "string"), ("name", "string"), ("reservation", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/reserve/resource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_request","fields":[{"name":"principal","type":"string"},{"name":"object","type":"string"},{"name":"object_type","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_response","fields":[{"name":"principal","type":"string"},{"name":"object","type":"string"},{"name":"object_type","type":"string"},{"name":"permission","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("principal", "string"), ("object", "string"), ("object_type", "string"), ("permission", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("principal", "string"), ("object", "string"), ("object_type", "string"), ("permission", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission/credential"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_credential_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"credential_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_credential_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"credential_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("credential_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("credential_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission/credential"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission/datasource"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_datasource_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"datasource_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_datasource_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"datasource_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("datasource_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("datasource_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission/datasource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission/directory"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_directory_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"directory_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_directory_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"directory_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("directory_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("directory_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission/directory"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_proc_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_proc_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"proc_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("proc_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("proc_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission/system"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_system_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_system_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission/system"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/permission/table"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_permission_table_request","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_permission_table_response","fields":[{"name":"name","type":"string"},{"name":"permission","type":"string"},{"name":"table_name","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("name", "string"), ("permission", "string"), ("table_name", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/permission/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/revoke/role"
REQ_SCHEMA_STR = """{"type":"record","name":"revoke_role_request","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"revoke_role_response","fields":[{"name":"role","type":"string"},{"name":"member","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("role", "string"), ("member", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("role", "string"), ("member", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/revoke/role"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/container/registry"
REQ_SCHEMA_STR = """{"type":"record","name":"show_container_registry_request","fields":[{"name":"registry_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_container_registry_response","fields":[{"name":"registry_name","type":"string"},{"name":"registry_names","type":{"type":"array","items":"string"}},{"name":"entity_ids","type":{"type":"array","items":"int"}},{"name":"uri_list","type":{"type":"array","items":"string"}},{"name":"credential_list","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("registry_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("registry_name", "string"), ("registry_names", "array", [("string")]), ("entity_ids", "array", [("int")]), ("uri_list", "array", [("string")]), ("credential_list", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/container/registry"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/credential"
REQ_SCHEMA_STR = """{"type":"record","name":"show_credential_request","fields":[{"name":"credential_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_credential_response","fields":[{"name":"credential_names","type":{"type":"array","items":"string"}},{"name":"credential_types","type":{"type":"array","items":"string"}},{"name":"credential_identities","type":{"type":"array","items":"string"}},{"name":"credentials","type":{"type":"array","items":"string"}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("credential_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("credential_names", "array", [("string")]), ("credential_types", "array", [("string")]), ("credential_identities", "array", [("string")]), ("credentials", "array", [("string")]), ("additional_info", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/credential"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/datasink"
REQ_SCHEMA_STR = """{"type":"record","name":"show_datasink_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_datasink_response","fields":[{"name":"datasink_names","type":{"type":"array","items":"string"}},{"name":"destination_types","type":{"type":"array","items":"string"}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("datasink_names", "array", [("string")]), ("destination_types", "array", [("string")]), ("additional_info", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/datasink"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/datasource"
REQ_SCHEMA_STR = """{"type":"record","name":"show_datasource_request","fields":[{"name":"name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_datasource_response","fields":[{"name":"datasource_names","type":{"type":"array","items":"string"}},{"name":"storage_provider_types","type":{"type":"array","items":"string"}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("datasource_names", "array", [("string")]), ("storage_provider_types", "array", [("string")]), ("additional_info", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/datasource"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/directories"
REQ_SCHEMA_STR = """{"type":"record","name":"show_directories_request","fields":[{"name":"directory_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_directories_response","fields":[{"name":"directories","type":{"type":"array","items":"string"}},{"name":"users","type":{"type":"array","items":"string"}},{"name":"creation_times","type":{"type":"array","items":"long"}},{"name":"data_usages","type":{"type":"array","items":"long"}},{"name":"data_limits","type":{"type":"array","items":"long"}},{"name":"permissions","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("directory_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("directories", "array", [("string")]), ("users", "array", [("string")]), ("creation_times", "array", [("long")]), ("data_usages", "array", [("long")]), ("data_limits", "array", [("long")]), ("permissions", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/directories"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/environment"
REQ_SCHEMA_STR = """{"type":"record","name":"show_environment_request","fields":[{"name":"environment_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_environment_response","fields":[{"name":"environment_names","type":{"type":"array","items":"string"}},{"name":"packages","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("environment_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("environment_names", "array", [("string")]), ("packages", "array", [("array", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/environment"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/files"
REQ_SCHEMA_STR = """{"type":"record","name":"show_files_request","fields":[{"name":"paths","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_files_response","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"sizes","type":{"type":"array","items":"long"}},{"name":"users","type":{"type":"array","items":"string"}},{"name":"creation_times","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("paths", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("sizes", "array", [("long")]), ("users", "array", [("string")]), ("creation_times", "array", [("long")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/files"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/functions"
REQ_SCHEMA_STR = """{"type":"record","name":"show_functions_request","fields":[{"name":"names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_functions_response","fields":[{"name":"function_names","type":{"type":"array","items":"string"}},{"name":"return_types","type":{"type":"array","items":"string"}},{"name":"parameters","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"optional_parameter_count","type":{"type":"array","items":"int"}},{"name":"flags","type":{"type":"array","items":"int"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"properties","type":{"type":"array","items":{"type":"map","values":{"type":"array","items":"string"}}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("function_names", "array", [("string")]), ("return_types", "array", [("string")]), ("parameters", "array", [("array", [("string")])]), ("optional_parameter_count", "array", [("int")]), ("flags", "array", [("int")]), ("type_schemas", "array", [("string")]), ("properties", "array", [("map", [("array", [("string")])])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/functions"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/graph"
REQ_SCHEMA_STR = """{"name":"show_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"show_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"load","type":{"type":"array","items":"int"}},{"name":"memory","type":{"type":"array","items":"long"}},{"name":"graph_names","type":{"type":"array","items":"string"}},{"name":"graph_server_ids","type":{"type":"array","items":"int"}},{"name":"graph_owner_user_names","type":{"type":"array","items":"string"}},{"name":"graph_owner_resource_groups","type":{"type":"array","items":"string"}},{"name":"directed","type":{"type":"array","items":"boolean"}},{"name":"num_nodes","type":{"type":"array","items":"long"}},{"name":"num_edges","type":{"type":"array","items":"long"}},{"name":"num_bytes","type":{"type":"array","items":"long"}},{"name":"resource_capacity","type":{"type":"array","items":"long"}},{"name":"is_persisted","type":{"type":"array","items":"boolean"}},{"name":"is_partitioned","type":{"type":"array","items":"boolean"}},{"name":"is_sync_db","type":{"type":"array","items":"boolean"}},{"name":"has_insert_table_monitor","type":{"type":"array","items":"boolean"}},{"name":"original_request","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("load", "array", [("int")]), ("memory", "array", [("long")]), ("graph_names", "array", [("string")]), ("graph_server_ids", "array", [("int")]), ("graph_owner_user_names", "array", [("string")]), ("graph_owner_resource_groups", "array", [("string")]), ("directed", "array", [("boolean")]), ("num_nodes", "array", [("long")]), ("num_edges", "array", [("long")]), ("num_bytes", "array", [("long")]), ("resource_capacity", "array", [("long")]), ("is_persisted", "array", [("boolean")]), ("is_partitioned", "array", [("boolean")]), ("is_sync_db", "array", [("boolean")]), ("has_insert_table_monitor", "array", [("boolean")]), ("original_request", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/graph/grammar"
REQ_SCHEMA_STR = """{"name":"show_graph_grammar_request","type":"record","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"show_graph_grammar_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"components_json","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("components_json", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/show/graph/grammar"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/model"
REQ_SCHEMA_STR = """{"type":"record","name":"show_model_request","fields":[{"name":"model_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_model_response","fields":[{"name":"model_names","type":{"type":"array","items":"string"}},{"name":"entity_ids","type":{"type":"array","items":"int"}},{"name":"input_schemas","type":{"type":"array","items":"string"}},{"name":"output_schemas","type":{"type":"array","items":"string"}},{"name":"registry_list","type":{"type":"array","items":"string"}},{"name":"container_list","type":{"type":"array","items":"string"}},{"name":"run_function_list","type":{"type":"array","items":"string"}},{"name":"deployments","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("model_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("model_names", "array", [("string")]), ("entity_ids", "array", [("int")]), ("input_schemas", "array", [("string")]), ("output_schemas", "array", [("string")]), ("registry_list", "array", [("string")]), ("container_list", "array", [("string")]), ("run_function_list", "array", [("string")]), ("deployments", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/model"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"show_proc_request","fields":[{"name":"proc_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_proc_response","fields":[{"name":"proc_names","type":{"type":"array","items":"string"}},{"name":"execution_modes","type":{"type":"array","items":"string"}},{"name":"files","type":{"type":"array","items":{"type":"map","values":"bytes"}}},{"name":"commands","type":{"type":"array","items":"string"}},{"name":"args","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("proc_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("proc_names", "array", [("string")]), ("execution_modes", "array", [("string")]), ("files", "array", [("map", [("bytes")])]), ("commands", "array", [("string")]), ("args", "array", [("array", [("string")])]), ("options", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/proc/status"
REQ_SCHEMA_STR = """{"type":"record","name":"show_proc_status_request","fields":[{"name":"run_id","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_proc_status_response","fields":[{"name":"proc_names","type":{"type":"map","values":"string"}},{"name":"params","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"bin_params","type":{"type":"map","values":{"type":"map","values":"bytes"}}},{"name":"input_table_names","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"input_column_names","type":{"type":"map","values":{"type":"map","values":{"type":"array","items":"string"}}}},{"name":"output_table_names","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"overall_statuses","type":{"type":"map","values":"string"}},{"name":"statuses","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"messages","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"results","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":"string"}}}},{"name":"bin_results","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":"bytes"}}}},{"name":"output","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":{"type":"array","items":"string"}}}}},{"name":"timings","type":{"type":"map","values":{"type":"map","values":{"type":"map","values":"long"}}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("run_id", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("proc_names", "map", [("string")]), ("params", "map", [("map", [("string")])]), ("bin_params", "map", [("map", [("bytes")])]), ("input_table_names", "map", [("array", [("string")])]), ("input_column_names", "map", [("map", [("array", [("string")])])]), ("output_table_names", "map", [("array", [("string")])]), ("options", "map", [("map", [("string")])]), ("overall_statuses", "map", [("string")]), ("statuses", "map", [("map", [("string")])]), ("messages", "map", [("map", [("string")])]), ("results", "map", [("map", [("map", [("string")])])]), ("bin_results", "map", [("map", [("map", [("bytes")])])]), ("output", "map", [("map", [("map", [("array", [("string")])])])]), ("timings", "map", [("map", [("map", [("long")])])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/proc/status"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/resource/objects"
REQ_SCHEMA_STR = """{"type":"record","name":"show_resource_objects_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_resource_objects_response","fields":[{"name":"rank_objects","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("rank_objects", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/resource/objects"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/resource/statistics"
REQ_SCHEMA_STR = """{"type":"record","name":"show_resource_statistics_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_resource_statistics_response","fields":[{"name":"statistics_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("statistics_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/resource/statistics"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/resourcegroups"
REQ_SCHEMA_STR = """{"type":"record","name":"show_resource_groups_request","fields":[{"name":"names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_resource_groups_response","fields":[{"name":"groups","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"rank_usage","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("groups", "array", [("map", [("string")])]), ("rank_usage", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/resourcegroups"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/schema"
REQ_SCHEMA_STR = """{"type":"record","name":"show_schema_request","fields":[{"name":"schema_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_schema_response","fields":[{"name":"schema_name","type":"string"},{"name":"schema_names","type":{"type":"array","items":"string"}},{"name":"schema_tables","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("schema_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("schema_name", "string"), ("schema_names", "array", [("string")]), ("schema_tables", "array", [("array", [("string")])]), ("additional_info", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/schema"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/security"
REQ_SCHEMA_STR = """{"type":"record","name":"show_security_request","fields":[{"name":"names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_security_response","fields":[{"name":"types","type":{"type":"map","values":"string"}},{"name":"roles","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"permissions","type":{"type":"map","values":{"type":"array","items":{"type":"map","values":"string"}}}},{"name":"resource_groups","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("types", "map", [("string")]), ("roles", "map", [("array", [("string")])]), ("permissions", "map", [("array", [("map", [("string")])])]), ("resource_groups", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/security"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/sql/proc"
REQ_SCHEMA_STR = """{"type":"record","name":"show_sql_proc_request","fields":[{"name":"procedure_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_sql_proc_response","fields":[{"name":"procedure_names","type":{"type":"array","items":"string"}},{"name":"procedure_definitions","type":{"type":"array","items":"string"}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("procedure_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("procedure_names", "array", [("string")]), ("procedure_definitions", "array", [("string")]), ("additional_info", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/sql/proc"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/statistics"
REQ_SCHEMA_STR = """{"type":"record","name":"show_statistics_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_statistics_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"stastistics_map","type":{"type":"array","items":{"type":"array","items":{"type":"map","values":"string"}}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("stastistics_map", "array", [("array", [("map", [("string")])])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/statistics"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/system/properties"
REQ_SCHEMA_STR = """{"type":"record","name":"show_system_properties_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_system_properties_response","fields":[{"name":"property_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("property_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/system/properties"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/system/status"
REQ_SCHEMA_STR = """{"type":"record","name":"show_system_status_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_system_status_response","fields":[{"name":"status_map","type":{"type":"map","values":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("status_map", "map", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/system/status"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/system/timing"
REQ_SCHEMA_STR = """{"type":"record","name":"show_system_timing_request","fields":[{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_system_timing_response","fields":[{"name":"endpoints","type":{"type":"array","items":"string"}},{"name":"time_in_ms","type":{"type":"array","items":"float"}},{"name":"jobIds","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("endpoints", "array", [("string")]), ("time_in_ms", "array", [("float")]), ("jobIds", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/system/timing"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/table"
REQ_SCHEMA_STR = """{"type":"record","name":"show_table_request","fields":[{"name":"table_name","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_table_response","fields":[{"name":"table_name","type":"string"},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"table_descriptions","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"type_ids","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"type_labels","type":{"type":"array","items":"string"}},{"name":"properties","type":{"type":"array","items":{"type":"map","values":{"type":"array","items":"string"}}}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"sizes","type":{"type":"array","items":"long"}},{"name":"full_sizes","type":{"type":"array","items":"long"}},{"name":"join_sizes","type":{"type":"array","items":"double"}},{"name":"total_size","type":"long"},{"name":"total_full_size","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_name", "string"), ("table_names", "array", [("string")]), ("table_descriptions", "array", [("array", [("string")])]), ("type_ids", "array", [("string")]), ("type_schemas", "array", [("string")]), ("type_labels", "array", [("string")]), ("properties", "array", [("map", [("array", [("string")])])]), ("additional_info", "array", [("map", [("string")])]), ("sizes", "array", [("long")]), ("full_sizes", "array", [("long")]), ("join_sizes", "array", [("double")]), ("total_size", "long"), ("total_full_size", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/show/table"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/table/metadata"
REQ_SCHEMA_STR = """{"type":"record","name":"show_table_metadata_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_table_metadata_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"metadata_maps","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("metadata_maps", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/table/metadata"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/tablemonitors"
REQ_SCHEMA_STR = """{"type":"record","name":"show_table_monitors_request","fields":[{"name":"monitor_ids","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_table_monitors_response","fields":[{"name":"monitor_ids","type":{"type":"array","items":"string"}},{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"events","type":{"type":"array","items":"string"}},{"name":"increasing_columns","type":{"type":"array","items":"string"}},{"name":"filter_expressions","type":{"type":"array","items":"string"}},{"name":"refresh_method","type":{"type":"array","items":"string"}},{"name":"refresh_period","type":{"type":"array","items":"string"}},{"name":"refresh_start_time","type":{"type":"array","items":"string"}},{"name":"datasink_names","type":{"type":"array","items":"string"}},{"name":"additional_info","type":{"type":"array","items":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("monitor_ids", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("monitor_ids", "array", [("string")]), ("table_names", "array", [("string")]), ("events", "array", [("string")]), ("increasing_columns", "array", [("string")]), ("filter_expressions", "array", [("string")]), ("refresh_method", "array", [("string")]), ("refresh_period", "array", [("string")]), ("refresh_start_time", "array", [("string")]), ("datasink_names", "array", [("string")]), ("additional_info", "array", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/tablemonitors"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/tables/bytype"
REQ_SCHEMA_STR = """{"type":"record","name":"show_tables_by_type_request","fields":[{"name":"type_id","type":"string"},{"name":"label","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_tables_by_type_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("type_id", "string"), ("label", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/tables/bytype"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/triggers"
REQ_SCHEMA_STR = """{"type":"record","name":"show_triggers_request","fields":[{"name":"trigger_ids","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_triggers_response","fields":[{"name":"trigger_map","type":{"type":"map","values":{"type":"map","values":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("trigger_ids", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("trigger_map", "map", [("map", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/triggers"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/types"
REQ_SCHEMA_STR = """{"type":"record","name":"show_types_request","fields":[{"name":"type_id","type":"string"},{"name":"label","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"show_types_response","fields":[{"name":"type_ids","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"labels","type":{"type":"array","items":"string"}},{"name":"properties","type":{"type":"array","items":{"type":"map","values":{"type":"array","items":"string"}}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("type_id", "string"), ("label", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("type_ids", "array", [("string")]), ("type_schemas", "array", [("string")]), ("labels", "array", [("string")]), ("properties", "array", [("map", [("array", [("string")])])]), ("info", "map", [("string")])] )
ENDPOINT = "/show/types"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/show/video"
REQ_SCHEMA_STR = """{"type":"record","name":"show_video_request","fields":[{"name":"paths","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"show_video_response","type":"record","fields":[{"name":"creation_times","type":{"type":"array","items":"string"}},{"name":"elapsed_render_time_seconds","type":{"type":"array","items":"long"}},{"name":"job_ids","type":{"type":"array","items":"long"}},{"name":"paths","type":{"type":"array","items":"string"}},{"name":"rendered_bytes","type":{"type":"array","items":"long"}},{"name":"rendered_frames","type":{"type":"array","items":"long"}},{"name":"rendered_percents","type":{"type":"array","items":"long"}},{"name":"requests","type":{"type":"array","items":"string"}},{"name":"status","type":{"type":"array","items":"string"}},{"name":"ttls","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("paths", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("creation_times", "array", [("string")]), ("elapsed_render_time_seconds", "array", [("long")]), ("job_ids", "array", [("long")]), ("paths", "array", [("string")]), ("rendered_bytes", "array", [("long")]), ("rendered_frames", "array", [("long")]), ("rendered_percents", "array", [("long")]), ("requests", "array", [("string")]), ("status", "array", [("string")]), ("ttls", "array", [("long")]), ("info", "map", [("string")])] )
ENDPOINT = "/show/video"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/solve/graph"
REQ_SCHEMA_STR = """{"name":"solve_graph_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"weights_on_edges","type":{"type":"array","items":"string"}},{"name":"restrictions","type":{"type":"array","items":"string"}},{"name":"solver_type","type":"string"},{"name":"source_nodes","type":{"type":"array","items":"string"}},{"name":"destination_nodes","type":{"type":"array","items":"string"}},{"name":"solution_table","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"name":"solve_graph_response","type":"record","fields":[{"name":"result","type":"boolean"},{"name":"result_per_destination_node","type":{"type":"array","items":"float"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("weights_on_edges", "array", [("string")]), ("restrictions", "array", [("string")]), ("solver_type", "string"), ("source_nodes", "array", [("string")]), ("destination_nodes", "array", [("string")]), ("solution_table", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("result", "boolean"), ("result_per_destination_node", "array", [("float")]), ("info", "map", [("string")])] )
ENDPOINT = "/solve/graph"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/update/records"
REQ_SCHEMA_STR = """{"type":"record","name":"update_records_request","fields":[{"name":"table_name","type":"string"},{"name":"expressions","type":{"type":"array","items":"string"}},{"name":"new_values_maps","type":{"type":"array","items":{"type":"map","values":["string","null"]}}},{"name":"records_to_insert","type":{"type":"array","items":"bytes"}},{"name":"records_to_insert_str","type":{"type":"array","items":"string"}},{"name":"record_encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"update_records_response","fields":[{"name":"count_updated","type":"long"},{"name":"counts_updated","type":{"type":"array","items":"long"}},{"name":"count_inserted","type":"long"},{"name":"counts_inserted","type":{"type":"array","items":"long"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("expressions", "array", [("string")]), ("new_values_maps", "array", [("map", [("nullable", [("string")])])]), ("records_to_insert", "array", [("bytes")]), ("records_to_insert_str", "array", [("string")]), ("record_encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count_updated", "long"), ("counts_updated", "array", [("long")]), ("count_inserted", "long"), ("counts_inserted", "array", [("long")]), ("info", "map", [("string")])] )
REQ_SCHEMA_CEXT = Schema( "record", [("table_name", "string"), ("expressions", "array", [("string")]), ("new_values_maps", "array", [("map", [("nullable", [("string")])])]), ("records_to_insert", "object_array"), ("records_to_insert_str", "array", [("string")]), ("record_encoding", "string"), ("options", "map", [("string")])] )
ENDPOINT = "/update/records"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"REQ_SCHEMA_CEXT" : REQ_SCHEMA_CEXT,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/update/records/byseries"
REQ_SCHEMA_STR = """{"type":"record","name":"update_records_by_series_request","fields":[{"name":"table_name","type":"string"},{"name":"world_table_name","type":"string"},{"name":"view_name","type":"string"},{"name":"reserved","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"update_records_by_series_response","fields":[{"name":"count","type":"int"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("world_table_name", "string"), ("view_name", "string"), ("reserved", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("count", "int"), ("info", "map", [("string")])] )
ENDPOINT = "/update/records/byseries"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/upload/files"
REQ_SCHEMA_STR = """{"type":"record","name":"upload_files_request","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"file_data","type":{"type":"array","items":"bytes"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"upload_files_response","fields":[{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("file_data", "array", [("bytes")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("info", "map", [("string")])] )
ENDPOINT = "/upload/files"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/upload/files/fromurl"
REQ_SCHEMA_STR = """{"type":"record","name":"upload_files_fromurl_request","fields":[{"name":"file_names","type":{"type":"array","items":"string"}},{"name":"urls","type":{"type":"array","items":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"upload_files_fromurl_response","fields":[{"name":"successful_file_names","type":{"type":"array","items":"string"}},{"name":"successful_urls","type":{"type":"array","items":"string"}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("file_names", "array", [("string")]), ("urls", "array", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("successful_file_names", "array", [("string")]), ("successful_urls", "array", [("string")]), ("info", "map", [("string")])] )
ENDPOINT = "/upload/files/fromurl"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/getfeatureinfo"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_get_feature_info_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_names","type":{"type":"array","items":"string"}},{"name":"y_column_names","type":{"type":"array","items":"string"}},{"name":"geometry_column_names","type":{"type":"array","items":"string"}},{"name":"query_column_names","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"projection","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"x","type":"int"},{"name":"y","type":"int"},{"name":"radius","type":"int"},{"name":"limit","type":"long"},{"name":"encoding","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_get_feature_info_response","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"type_schemas","type":{"type":"array","items":"string"}},{"name":"records_binary","type":{"type":"array","items":"bytes"}},{"name":"records_json","type":{"type":"array","items":"string"}},{"name":"geojson_encoded_response","type":"string"},{"name":"text_encoded_response","type":"string"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("x_column_names", "array", [("string")]), ("y_column_names", "array", [("string")]), ("geometry_column_names", "array", [("string")]), ("query_column_names", "array", [("array", [("string")])]), ("projection", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("x", "int"), ("y", "int"), ("radius", "int"), ("limit", "long"), ("encoding", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("type_schemas", "array", [("string")]), ("records_binary", "array", [("bytes")]), ("records_json", "array", [("string")]), ("geojson_encoded_response", "string"), ("text_encoded_response", "string"), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/getfeatureinfo"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/image"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"world_table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"symbol_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"track_ids","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"bg_color","type":"long"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("world_table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("symbol_column_name", "string"), ("geometry_column_name", "string"), ("track_ids", "array", [("array", [("string")])]), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("bg_color", "long"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("image_data", "bytes"), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/image"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/image/chart"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_chart_request","fields":[{"name":"table_name","type":"string"},{"name":"x_column_names","type":{"type":"array","items":"string"}},{"name":"y_column_names","type":{"type":"array","items":"string"}},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"string"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_chart_response","fields":[{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"string"},{"name":"image_data","type":"bytes"},{"name":"axes_info","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("x_column_names", "array", [("string")]), ("y_column_names", "array", [("string")]), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("bg_color", "string"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("bg_color", "string"), ("image_data", "bytes"), ("axes_info", "map", [("array", [("string")])]), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/image/chart"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/image/classbreak"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_classbreak_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"world_table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"symbol_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"track_ids","type":{"type":"array","items":{"type":"array","items":"string"}}},{"name":"cb_attr","type":"string"},{"name":"cb_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointcolor_attr","type":"string"},{"name":"cb_pointcolor_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointalpha_attr","type":"string"},{"name":"cb_pointalpha_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointsize_attr","type":"string"},{"name":"cb_pointsize_vals","type":{"type":"array","items":"string"}},{"name":"cb_pointshape_attr","type":"string"},{"name":"cb_pointshape_vals","type":{"type":"array","items":"string"}},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"bg_color","type":"long"},{"name":"style_options","type":{"type":"map","values":{"type":"array","items":"string"}}},{"name":"options","type":{"type":"map","values":"string"}},{"name":"cb_transparency_vec","type":{"type":"array","items":"int"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_classbreak_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("world_table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("symbol_column_name", "string"), ("geometry_column_name", "string"), ("track_ids", "array", [("array", [("string")])]), ("cb_attr", "string"), ("cb_vals", "array", [("string")]), ("cb_pointcolor_attr", "string"), ("cb_pointcolor_vals", "array", [("string")]), ("cb_pointalpha_attr", "string"), ("cb_pointalpha_vals", "array", [("string")]), ("cb_pointsize_attr", "string"), ("cb_pointsize_vals", "array", [("string")]), ("cb_pointshape_attr", "string"), ("cb_pointshape_vals", "array", [("string")]), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("bg_color", "long"), ("style_options", "map", [("array", [("string")])]), ("options", "map", [("string")]), ("cb_transparency_vec", "array", [("int")])] )
RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("image_data", "bytes"), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/image/classbreak"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/image/contour"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_contour_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"style_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_contour_response","fields":[{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"grid_data","type":"bytes"},{"name":"fill_n0","type":"double"},{"name":"fill_nn","type":"double"},{"name":"min_level","type":"double"},{"name":"max_level","type":"double"},{"name":"samples_used","type":"long"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("value_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("style_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("width", "int"), ("height", "int"), ("bg_color", "long"), ("image_data", "bytes"), ("grid_data", "bytes"), ("fill_n0", "double"), ("fill_nn", "double"), ("min_level", "double"), ("max_level", "double"), ("samples_used", "long"), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/image/contour"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/image/heatmap"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_heatmap_request","fields":[{"name":"table_names","type":{"type":"array","items":"string"}},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"value_column_name","type":"string"},{"name":"geometry_column_name","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"style_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_heatmap_response","fields":[{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_names", "array", [("string")]), ("x_column_name", "string"), ("y_column_name", "string"), ("value_column_name", "string"), ("geometry_column_name", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("style_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("width", "int"), ("height", "int"), ("bg_color", "long"), ("image_data", "bytes"), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/image/heatmap"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/image/labels"
REQ_SCHEMA_STR = """{"type":"record","name":"visualize_image_labels_request","fields":[{"name":"table_name","type":"string"},{"name":"x_column_name","type":"string"},{"name":"y_column_name","type":"string"},{"name":"x_offset","type":"string"},{"name":"y_offset","type":"string"},{"name":"text_string","type":"string"},{"name":"font","type":"string"},{"name":"text_color","type":"string"},{"name":"text_angle","type":"string"},{"name":"text_scale","type":"string"},{"name":"draw_box","type":"string"},{"name":"draw_leader","type":"string"},{"name":"line_width","type":"string"},{"name":"line_color","type":"string"},{"name":"fill_color","type":"string"},{"name":"leader_x_column_name","type":"string"},{"name":"leader_y_column_name","type":"string"},{"name":"filter","type":"string"},{"name":"min_x","type":"double"},{"name":"max_x","type":"double"},{"name":"min_y","type":"double"},{"name":"max_y","type":"double"},{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"projection","type":"string"},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_image_labels_response","fields":[{"name":"width","type":"double"},{"name":"height","type":"double"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("table_name", "string"), ("x_column_name", "string"), ("y_column_name", "string"), ("x_offset", "string"), ("y_offset", "string"), ("text_string", "string"), ("font", "string"), ("text_color", "string"), ("text_angle", "string"), ("text_scale", "string"), ("draw_box", "string"), ("draw_leader", "string"), ("line_width", "string"), ("line_color", "string"), ("fill_color", "string"), ("leader_x_column_name", "string"), ("leader_y_column_name", "string"), ("filter", "string"), ("min_x", "double"), ("max_x", "double"), ("min_y", "double"), ("max_y", "double"), ("width", "int"), ("height", "int"), ("projection", "string"), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("width", "double"), ("height", "double"), ("bg_color", "long"), ("image_data", "bytes"), ("info", "map", [("string")])] )
ENDPOINT = "/visualize/image/labels"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
name = "/visualize/isochrone"
REQ_SCHEMA_STR = """{"name":"visualize_isochrone_request","type":"record","fields":[{"name":"graph_name","type":"string"},{"name":"source_node","type":"string"},{"name":"max_solution_radius","type":"double"},{"name":"weights_on_edges","type":{"type":"array","items":"string"}},{"name":"restrictions","type":{"type":"array","items":"string"}},{"name":"num_levels","type":"int"},{"name":"generate_image","type":"boolean"},{"name":"levels_table","type":"string"},{"name":"style_options","type":{"type":"map","values":"string"}},{"name":"solve_options","type":{"type":"map","values":"string"}},{"name":"contour_options","type":{"type":"map","values":"string"}},{"name":"options","type":{"type":"map","values":"string"}}]}"""
RSP_SCHEMA_STR = """{"type":"record","name":"visualize_isochrone_response","fields":[{"name":"width","type":"int"},{"name":"height","type":"int"},{"name":"bg_color","type":"long"},{"name":"image_data","type":"bytes"},{"name":"info","type":{"type":"map","values":"string"}},{"name":"solve_info","type":{"type":"map","values":"string"}},{"name":"contour_info","type":{"type":"map","values":"string"}}]}"""
REQ_SCHEMA = Schema( "record", [("graph_name", "string"), ("source_node", "string"), ("max_solution_radius", "double"), ("weights_on_edges", "array", [("string")]), ("restrictions", "array", [("string")]), ("num_levels", "int"), ("generate_image", "boolean"), ("levels_table", "string"), ("style_options", "map", [("string")]), ("solve_options", "map", [("string")]), ("contour_options", "map", [("string")]), ("options", "map", [("string")])] )
RSP_SCHEMA = Schema( "record", [("width", "int"), ("height", "int"), ("bg_color", "long"), ("image_data", "bytes"), ("info", "map", [("string")]), ("solve_info", "map", [("string")]), ("contour_info", "map", [("string")])] )
ENDPOINT = "/visualize/isochrone"
self.gpudb_schemas[ name ] = { "REQ_SCHEMA_STR" : REQ_SCHEMA_STR,
"RSP_SCHEMA_STR" : RSP_SCHEMA_STR,
"REQ_SCHEMA" : REQ_SCHEMA,
"RSP_SCHEMA" : RSP_SCHEMA,
"ENDPOINT" : ENDPOINT }
# end load_gpudb_schemas
[docs] def load_gpudb_func_to_endpoint_map( self ):
"""Saves a mapping of rest endpoint function names
to endpoints in a dictionary.
"""
self.gpudb_func_to_endpoint_map = {}
self.gpudb_func_to_endpoint_map["admin_add_host"] = "/admin/add/host"
self.gpudb_func_to_endpoint_map["admin_add_ranks"] = "/admin/add/ranks"
self.gpudb_func_to_endpoint_map["admin_alter_configuration"] = "/admin/alter/configuration"
self.gpudb_func_to_endpoint_map["admin_alter_host"] = "/admin/alter/host"
self.gpudb_func_to_endpoint_map["admin_alter_jobs"] = "/admin/alter/jobs"
self.gpudb_func_to_endpoint_map["admin_alter_shards"] = "/admin/alter/shards"
self.gpudb_func_to_endpoint_map["admin_backup_begin"] = "/admin/backup/begin"
self.gpudb_func_to_endpoint_map["admin_backup_end"] = "/admin/backup/end"
self.gpudb_func_to_endpoint_map["admin_ha_refresh"] = "/admin/ha/refresh"
self.gpudb_func_to_endpoint_map["admin_offline"] = "/admin/offline"
self.gpudb_func_to_endpoint_map["admin_rebalance"] = "/admin/rebalance"
self.gpudb_func_to_endpoint_map["admin_remove_host"] = "/admin/remove/host"
self.gpudb_func_to_endpoint_map["admin_remove_ranks"] = "/admin/remove/ranks"
self.gpudb_func_to_endpoint_map["admin_show_alerts"] = "/admin/show/alerts"
self.gpudb_func_to_endpoint_map["admin_show_cluster_operations"] = "/admin/show/cluster/operations"
self.gpudb_func_to_endpoint_map["admin_show_configuration"] = "/admin/show/configuration"
self.gpudb_func_to_endpoint_map["admin_show_jobs"] = "/admin/show/jobs"
self.gpudb_func_to_endpoint_map["admin_show_shards"] = "/admin/show/shards"
self.gpudb_func_to_endpoint_map["admin_shutdown"] = "/admin/shutdown"
self.gpudb_func_to_endpoint_map["admin_switchover"] = "/admin/switchover"
self.gpudb_func_to_endpoint_map["admin_verify_db"] = "/admin/verifydb"
self.gpudb_func_to_endpoint_map["aggregate_convex_hull"] = "/aggregate/convexhull"
self.gpudb_func_to_endpoint_map["aggregate_group_by"] = "/aggregate/groupby"
self.gpudb_func_to_endpoint_map["aggregate_histogram"] = "/aggregate/histogram"
self.gpudb_func_to_endpoint_map["aggregate_k_means"] = "/aggregate/kmeans"
self.gpudb_func_to_endpoint_map["aggregate_min_max"] = "/aggregate/minmax"
self.gpudb_func_to_endpoint_map["aggregate_min_max_geometry"] = "/aggregate/minmax/geometry"
self.gpudb_func_to_endpoint_map["aggregate_statistics"] = "/aggregate/statistics"
self.gpudb_func_to_endpoint_map["aggregate_statistics_by_range"] = "/aggregate/statistics/byrange"
self.gpudb_func_to_endpoint_map["aggregate_unique"] = "/aggregate/unique"
self.gpudb_func_to_endpoint_map["aggregate_unpivot"] = "/aggregate/unpivot"
self.gpudb_func_to_endpoint_map["alter_credential"] = "/alter/credential"
self.gpudb_func_to_endpoint_map["alter_datasink"] = "/alter/datasink"
self.gpudb_func_to_endpoint_map["alter_datasource"] = "/alter/datasource"
self.gpudb_func_to_endpoint_map["alter_directory"] = "/alter/directory"
self.gpudb_func_to_endpoint_map["alter_environment"] = "/alter/environment"
self.gpudb_func_to_endpoint_map["alter_graph"] = "/alter/graph"
self.gpudb_func_to_endpoint_map["alter_model"] = "/alter/model"
self.gpudb_func_to_endpoint_map["alter_resource_group"] = "/alter/resourcegroup"
self.gpudb_func_to_endpoint_map["alter_role"] = "/alter/role"
self.gpudb_func_to_endpoint_map["alter_schema"] = "/alter/schema"
self.gpudb_func_to_endpoint_map["alter_system_properties"] = "/alter/system/properties"
self.gpudb_func_to_endpoint_map["alter_table"] = "/alter/table"
self.gpudb_func_to_endpoint_map["alter_table_columns"] = "/alter/table/columns"
self.gpudb_func_to_endpoint_map["alter_table_metadata"] = "/alter/table/metadata"
self.gpudb_func_to_endpoint_map["alter_table_monitor"] = "/alter/tablemonitor"
self.gpudb_func_to_endpoint_map["alter_tier"] = "/alter/tier"
self.gpudb_func_to_endpoint_map["alter_user"] = "/alter/user"
self.gpudb_func_to_endpoint_map["alter_user_reveal"] = "/alter/user/reveal"
self.gpudb_func_to_endpoint_map["alter_video"] = "/alter/video"
self.gpudb_func_to_endpoint_map["append_records"] = "/append/records"
self.gpudb_func_to_endpoint_map["clear_statistics"] = "/clear/statistics"
self.gpudb_func_to_endpoint_map["clear_table"] = "/clear/table"
self.gpudb_func_to_endpoint_map["clear_table_monitor"] = "/clear/tablemonitor"
self.gpudb_func_to_endpoint_map["clear_trigger"] = "/clear/trigger"
self.gpudb_func_to_endpoint_map["collect_statistics"] = "/collect/statistics"
self.gpudb_func_to_endpoint_map["create_container_registry"] = "/create/container/registry"
self.gpudb_func_to_endpoint_map["create_credential"] = "/create/credential"
self.gpudb_func_to_endpoint_map["create_datasink"] = "/create/datasink"
self.gpudb_func_to_endpoint_map["create_datasource"] = "/create/datasource"
self.gpudb_func_to_endpoint_map["create_delta_table"] = "/create/deltatable"
self.gpudb_func_to_endpoint_map["create_directory"] = "/create/directory"
self.gpudb_func_to_endpoint_map["create_environment"] = "/create/environment"
self.gpudb_func_to_endpoint_map["create_graph"] = "/create/graph"
self.gpudb_func_to_endpoint_map["create_job"] = "/create/job"
self.gpudb_func_to_endpoint_map["create_join_table"] = "/create/jointable"
self.gpudb_func_to_endpoint_map["create_materialized_view"] = "/create/materializedview"
self.gpudb_func_to_endpoint_map["create_proc"] = "/create/proc"
self.gpudb_func_to_endpoint_map["create_projection"] = "/create/projection"
self.gpudb_func_to_endpoint_map["create_resource_group"] = "/create/resourcegroup"
self.gpudb_func_to_endpoint_map["create_role"] = "/create/role"
self.gpudb_func_to_endpoint_map["create_schema"] = "/create/schema"
self.gpudb_func_to_endpoint_map["create_state_table"] = "/create/statetable"
self.gpudb_func_to_endpoint_map["create_table"] = "/create/table"
self.gpudb_func_to_endpoint_map["create_table_external"] = "/create/table/external"
self.gpudb_func_to_endpoint_map["create_table_monitor"] = "/create/tablemonitor"
self.gpudb_func_to_endpoint_map["create_trigger_by_area"] = "/create/trigger/byarea"
self.gpudb_func_to_endpoint_map["create_trigger_by_range"] = "/create/trigger/byrange"
self.gpudb_func_to_endpoint_map["create_type"] = "/create/type"
self.gpudb_func_to_endpoint_map["create_union"] = "/create/union"
self.gpudb_func_to_endpoint_map["create_user_external"] = "/create/user/external"
self.gpudb_func_to_endpoint_map["create_user_internal"] = "/create/user/internal"
self.gpudb_func_to_endpoint_map["create_user_reveal"] = "/create/user/reveal"
self.gpudb_func_to_endpoint_map["create_video"] = "/create/video"
self.gpudb_func_to_endpoint_map["delete_directory"] = "/delete/directory"
self.gpudb_func_to_endpoint_map["delete_files"] = "/delete/files"
self.gpudb_func_to_endpoint_map["delete_graph"] = "/delete/graph"
self.gpudb_func_to_endpoint_map["delete_proc"] = "/delete/proc"
self.gpudb_func_to_endpoint_map["delete_records"] = "/delete/records"
self.gpudb_func_to_endpoint_map["delete_resource_group"] = "/delete/resourcegroup"
self.gpudb_func_to_endpoint_map["delete_role"] = "/delete/role"
self.gpudb_func_to_endpoint_map["delete_user"] = "/delete/user"
self.gpudb_func_to_endpoint_map["download_files"] = "/download/files"
self.gpudb_func_to_endpoint_map["drop_container_registry"] = "/drop/container/registry"
self.gpudb_func_to_endpoint_map["drop_credential"] = "/drop/credential"
self.gpudb_func_to_endpoint_map["drop_datasink"] = "/drop/datasink"
self.gpudb_func_to_endpoint_map["drop_datasource"] = "/drop/datasource"
self.gpudb_func_to_endpoint_map["drop_environment"] = "/drop/environment"
self.gpudb_func_to_endpoint_map["drop_model"] = "/drop/model"
self.gpudb_func_to_endpoint_map["drop_schema"] = "/drop/schema"
self.gpudb_func_to_endpoint_map["evaluate_model"] = "/evaluate/model"
self.gpudb_func_to_endpoint_map["execute_proc"] = "/execute/proc"
self.gpudb_func_to_endpoint_map["execute_sql"] = "/execute/sql"
self.gpudb_func_to_endpoint_map["export_records_to_files"] = "/export/records/tofiles"
self.gpudb_func_to_endpoint_map["export_records_to_table"] = "/export/records/totable"
self.gpudb_func_to_endpoint_map["filter"] = "/filter"
self.gpudb_func_to_endpoint_map["filter_by_area"] = "/filter/byarea"
self.gpudb_func_to_endpoint_map["filter_by_area_geometry"] = "/filter/byarea/geometry"
self.gpudb_func_to_endpoint_map["filter_by_box"] = "/filter/bybox"
self.gpudb_func_to_endpoint_map["filter_by_box_geometry"] = "/filter/bybox/geometry"
self.gpudb_func_to_endpoint_map["filter_by_geometry"] = "/filter/bygeometry"
self.gpudb_func_to_endpoint_map["filter_by_list"] = "/filter/bylist"
self.gpudb_func_to_endpoint_map["filter_by_radius"] = "/filter/byradius"
self.gpudb_func_to_endpoint_map["filter_by_radius_geometry"] = "/filter/byradius/geometry"
self.gpudb_func_to_endpoint_map["filter_by_range"] = "/filter/byrange"
self.gpudb_func_to_endpoint_map["filter_by_series"] = "/filter/byseries"
self.gpudb_func_to_endpoint_map["filter_by_string"] = "/filter/bystring"
self.gpudb_func_to_endpoint_map["filter_by_table"] = "/filter/bytable"
self.gpudb_func_to_endpoint_map["filter_by_value"] = "/filter/byvalue"
self.gpudb_func_to_endpoint_map["get_job"] = "/get/job"
self.gpudb_func_to_endpoint_map["get_records"] = "/get/records"
self.gpudb_func_to_endpoint_map["get_records_by_column"] = "/get/records/bycolumn"
self.gpudb_func_to_endpoint_map["get_records_by_series"] = "/get/records/byseries"
self.gpudb_func_to_endpoint_map["get_records_from_collection"] = "/get/records/fromcollection"
self.gpudb_func_to_endpoint_map["get_vectortile"] = "/get/vectortile"
self.gpudb_func_to_endpoint_map["grant_permission"] = "/grant/permission"
self.gpudb_func_to_endpoint_map["grant_permission_credential"] = "/grant/permission/credential"
self.gpudb_func_to_endpoint_map["grant_permission_datasource"] = "/grant/permission/datasource"
self.gpudb_func_to_endpoint_map["grant_permission_directory"] = "/grant/permission/directory"
self.gpudb_func_to_endpoint_map["grant_permission_proc"] = "/grant/permission/proc"
self.gpudb_func_to_endpoint_map["grant_permission_system"] = "/grant/permission/system"
self.gpudb_func_to_endpoint_map["grant_permission_table"] = "/grant/permission/table"
self.gpudb_func_to_endpoint_map["grant_role"] = "/grant/role"
self.gpudb_func_to_endpoint_map["has_permission"] = "/has/permission"
self.gpudb_func_to_endpoint_map["has_proc"] = "/has/proc"
self.gpudb_func_to_endpoint_map["has_role"] = "/has/role"
self.gpudb_func_to_endpoint_map["has_schema"] = "/has/schema"
self.gpudb_func_to_endpoint_map["has_table"] = "/has/table"
self.gpudb_func_to_endpoint_map["has_type"] = "/has/type"
self.gpudb_func_to_endpoint_map["import_model"] = "/import/model"
self.gpudb_func_to_endpoint_map["insert_records"] = "/insert/records"
self.gpudb_func_to_endpoint_map["insert_records_from_files"] = "/insert/records/fromfiles"
self.gpudb_func_to_endpoint_map["insert_records_from_payload"] = "/insert/records/frompayload"
self.gpudb_func_to_endpoint_map["insert_records_from_query"] = "/insert/records/fromquery"
self.gpudb_func_to_endpoint_map["insert_records_random"] = "/insert/records/random"
self.gpudb_func_to_endpoint_map["insert_symbol"] = "/insert/symbol"
self.gpudb_func_to_endpoint_map["kill_proc"] = "/kill/proc"
self.gpudb_func_to_endpoint_map["list_graph"] = "/list/graph"
self.gpudb_func_to_endpoint_map["lock_table"] = "/lock/table"
self.gpudb_func_to_endpoint_map["match_graph"] = "/match/graph"
self.gpudb_func_to_endpoint_map["merge_records"] = "/merge/records"
self.gpudb_func_to_endpoint_map["modify_graph"] = "/modify/graph"
self.gpudb_func_to_endpoint_map["query_graph"] = "/query/graph"
self.gpudb_func_to_endpoint_map["repartition_graph"] = "/repartition/graph"
self.gpudb_func_to_endpoint_map["reserve_resource"] = "/reserve/resource"
self.gpudb_func_to_endpoint_map["revoke_permission"] = "/revoke/permission"
self.gpudb_func_to_endpoint_map["revoke_permission_credential"] = "/revoke/permission/credential"
self.gpudb_func_to_endpoint_map["revoke_permission_datasource"] = "/revoke/permission/datasource"
self.gpudb_func_to_endpoint_map["revoke_permission_directory"] = "/revoke/permission/directory"
self.gpudb_func_to_endpoint_map["revoke_permission_proc"] = "/revoke/permission/proc"
self.gpudb_func_to_endpoint_map["revoke_permission_system"] = "/revoke/permission/system"
self.gpudb_func_to_endpoint_map["revoke_permission_table"] = "/revoke/permission/table"
self.gpudb_func_to_endpoint_map["revoke_role"] = "/revoke/role"
self.gpudb_func_to_endpoint_map["show_container_registry"] = "/show/container/registry"
self.gpudb_func_to_endpoint_map["show_credential"] = "/show/credential"
self.gpudb_func_to_endpoint_map["show_datasink"] = "/show/datasink"
self.gpudb_func_to_endpoint_map["show_datasource"] = "/show/datasource"
self.gpudb_func_to_endpoint_map["show_directories"] = "/show/directories"
self.gpudb_func_to_endpoint_map["show_environment"] = "/show/environment"
self.gpudb_func_to_endpoint_map["show_files"] = "/show/files"
self.gpudb_func_to_endpoint_map["show_functions"] = "/show/functions"
self.gpudb_func_to_endpoint_map["show_graph"] = "/show/graph"
self.gpudb_func_to_endpoint_map["show_graph_grammar"] = "/show/graph/grammar"
self.gpudb_func_to_endpoint_map["show_model"] = "/show/model"
self.gpudb_func_to_endpoint_map["show_proc"] = "/show/proc"
self.gpudb_func_to_endpoint_map["show_proc_status"] = "/show/proc/status"
self.gpudb_func_to_endpoint_map["show_resource_objects"] = "/show/resource/objects"
self.gpudb_func_to_endpoint_map["show_resource_statistics"] = "/show/resource/statistics"
self.gpudb_func_to_endpoint_map["show_resource_groups"] = "/show/resourcegroups"
self.gpudb_func_to_endpoint_map["show_schema"] = "/show/schema"
self.gpudb_func_to_endpoint_map["show_security"] = "/show/security"
self.gpudb_func_to_endpoint_map["show_sql_proc"] = "/show/sql/proc"
self.gpudb_func_to_endpoint_map["show_statistics"] = "/show/statistics"
self.gpudb_func_to_endpoint_map["show_system_properties"] = "/show/system/properties"
self.gpudb_func_to_endpoint_map["show_system_status"] = "/show/system/status"
self.gpudb_func_to_endpoint_map["show_system_timing"] = "/show/system/timing"
self.gpudb_func_to_endpoint_map["show_table"] = "/show/table"
self.gpudb_func_to_endpoint_map["show_table_metadata"] = "/show/table/metadata"
self.gpudb_func_to_endpoint_map["show_table_monitors"] = "/show/tablemonitors"
self.gpudb_func_to_endpoint_map["show_tables_by_type"] = "/show/tables/bytype"
self.gpudb_func_to_endpoint_map["show_triggers"] = "/show/triggers"
self.gpudb_func_to_endpoint_map["show_types"] = "/show/types"
self.gpudb_func_to_endpoint_map["show_video"] = "/show/video"
self.gpudb_func_to_endpoint_map["solve_graph"] = "/solve/graph"
self.gpudb_func_to_endpoint_map["update_records"] = "/update/records"
self.gpudb_func_to_endpoint_map["update_records_by_series"] = "/update/records/byseries"
self.gpudb_func_to_endpoint_map["upload_files"] = "/upload/files"
self.gpudb_func_to_endpoint_map["upload_files_fromurl"] = "/upload/files/fromurl"
self.gpudb_func_to_endpoint_map["visualize_get_feature_info"] = "/visualize/getfeatureinfo"
self.gpudb_func_to_endpoint_map["visualize_image"] = "/visualize/image"
self.gpudb_func_to_endpoint_map["visualize_image_chart"] = "/visualize/image/chart"
self.gpudb_func_to_endpoint_map["visualize_image_classbreak"] = "/visualize/image/classbreak"
self.gpudb_func_to_endpoint_map["visualize_image_contour"] = "/visualize/image/contour"
self.gpudb_func_to_endpoint_map["visualize_image_heatmap"] = "/visualize/image/heatmap"
self.gpudb_func_to_endpoint_map["visualize_image_labels"] = "/visualize/image/labels"
self.gpudb_func_to_endpoint_map["visualize_isochrone"] = "/visualize/isochrone"
# end load_gpudb_func_to_endpoint_map
# begin admin_add_host
[docs] def admin_add_host( self, host_address = None, options = {} ):
"""Adds a host to an existing cluster.
.. note::
This method should be used for on-premise deployments only.
Parameters:
host_address (str)
IP address of the host that will be added to the cluster. This
host must have installed the same version of Kinetica as the
cluster to which it is being added.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **dry_run** --
If set to *true*, only validation checks will be performed.
No host is added.
Allowed values are:
* true
* false
The default value is 'false'.
* **accepts_failover** --
If set to *true*, the host will accept processes (ranks,
graph server, etc.) in the event of a failover on another
node in the cluster.
Allowed values are:
* true
* false
The default value is 'false'.
* **public_address** --
The publicly-accessible IP address for the host being added,
typically specified for clients using multi-head operations.
This setting is required if any other host(s) in the cluster
specify a public address.
* **host_manager_public_url** --
The publicly-accessible full path URL to the host manager on
the host being added, e.g., 'http://172.123.45.67:9300'. The
default host manager port can be found in the `list of ports
<../../../../install/shared/ports/>`__ used by Kinetica.
* **ram_limit** --
The desired RAM limit for the host being added, i.e. the sum
of RAM usage for all processes on the host will not be able
to exceed this value. Supported units: K (thousand), KB
(kilobytes), M (million), MB (megabytes), G (billion), GB
(gigabytes); if no unit is provided, the value is assumed to
be in bytes. For example, if *ram_limit* is set to 10M, the
resulting RAM limit is 10 million bytes. Set *ram_limit* to
-1 to have no RAM limit.
* **gpus** --
Comma-delimited list of GPU indices (starting at 1) that are
eligible for running worker processes. If left blank, all
GPUs on the host being added will be eligible.
Returns:
A dict with the following entries--
added_host (str)
Identifier for the newly added host, of the format 'hostN'
where N is the integer identifier of that host. Note that the
host identifier is transient, i.e. it may change in the future
if other hosts are removed.
info (dict of str to str)
Additional information.
"""
assert isinstance( host_address, (basestring)), "admin_add_host(): Argument 'host_address' must be (one) of type(s) '(basestring)'; given %s" % type( host_address ).__name__
assert isinstance( options, (dict)), "admin_add_host(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['host_address'] = host_address
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request_to_hm( '/admin/add/host', obj, convert_to_attr_dict = True )
return response
# end admin_add_host
# begin admin_add_ranks
[docs] def admin_add_ranks( self, hosts = None, config_params = None, options = {} ):
"""Add one or more ranks to an existing Kinetica cluster. The new ranks
will not contain any data initially (other than replicated tables) and
will not be assigned any shards. To rebalance data and shards across
the cluster, use :meth:`GPUdb.admin_rebalance`.
The database must be offline for this operation, see
:meth:`GPUdb.admin_offline`
For example, if attempting to add three new ranks (two ranks on host
172.123.45.67 and one rank on host 172.123.45.68) to a Kinetica cluster
with additional configuration parameters:
* input parameter *hosts*
would be an array including 172.123.45.67 in the first two indices
(signifying two ranks being added to host 172.123.45.67) and
172.123.45.68 in the last index (signifying one rank being added
to host 172.123.45.67)
* input parameter *config_params*
would be an array of maps, with each map corresponding to the ranks
being added in input parameter *hosts*. The key of each map would be
the configuration parameter name and the value would be the
parameter's value, e.g. '{"rank.gpu":"1"}'
This endpoint's processing includes copying all replicated table data
to the new rank(s) and therefore could take a long time. The API call
may time out if run directly. It is recommended to run this endpoint
asynchronously via :meth:`GPUdb.create_job`.
.. note::
This method should be used for on-premise deployments only.
Parameters:
hosts (list of str)
Array of host IP addresses (matching a hostN.address from the
gpudb.conf file), or host identifiers (e.g. 'host0' from the
gpudb.conf file), on which to add ranks to the cluster. The
hosts must already be in the cluster. If needed beforehand, to
add a new host to the cluster use :meth:`GPUdb.admin_add_host`.
Include the same entry as many times as there are ranks to add
to the cluster, e.g., if two ranks on host 172.123.45.67 should
be added, input parameter *hosts* could look like
'["172.123.45.67", "172.123.45.67"]'. All ranks will be added
simultaneously, i.e. they're not added in the order of this
array. Each entry in this array corresponds to the entry at the
same index in the input parameter *config_params*. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
config_params (list of dicts of str to str)
Array of maps containing configuration parameters to apply to
the new ranks
found in input parameter *hosts*. For example,
'{"rank.gpu":"2", "tier.ram.rank.limit":"10000000000"}'.
Currently, the available parameters
are rank-specific parameters in the `Network
<../../../../config/#config-main-network>`__,
`Hardware <../../../../config/#config-main-hardware>`__,
`Text Search <../../../../config/#config-main-text-search>`__,
and
`RAM Tiered Storage
<../../../../config/#config-main-ram-tier>`__ sections in the
gpudb.conf file, with the
key exception of the 'rankN.host' settings in the Network
section that will be determined by
input parameter *hosts* instead. Though many of these
configuration parameters typically are affixed with
'rankN' in the gpudb.conf file (where N is the rank number),
the 'N' should be omitted in
input parameter *config_params* as the new rank number(s) are
not allocated until the ranks have been added
to the cluster. Each entry in this array corresponds to the
entry at the same index in the
input parameter *hosts*. This array must either be completely
empty or have the same number of elements as
the input parameter *hosts*. An empty input parameter
*config_params* array will result in the new ranks being set
with default parameters. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **dry_run** --
If *true*, only validation checks will be performed. No ranks
are added.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
added_ranks (list of str)
The number assigned to each added rank, formatted as 'rankN',
in the same order as the ranks in input parameter *hosts* and
input parameter *config_params*.
info (dict of str to str)
Additional information.
"""
hosts = hosts if isinstance( hosts, list ) else ( [] if (hosts is None) else [ hosts ] )
config_params = config_params if isinstance( config_params, list ) else ( [] if (config_params is None) else [ config_params ] )
assert isinstance( options, (dict)), "admin_add_ranks(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['hosts'] = hosts
obj['config_params'] = config_params
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/add/ranks', obj, convert_to_attr_dict = True )
return response
# end admin_add_ranks
# begin admin_alter_host
[docs] def admin_alter_host( self, host = None, options = {} ):
"""Alter properties on an existing host in the cluster. Currently, the
only property that can be altered is a hosts ability to accept failover
processes.
Parameters:
host (str)
Identifies the host this applies to. Can be the host address,
or formatted as 'hostN' where N is the host number as specified
in gpudb.conf
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **accepts_failover** --
If set to *true*, the host will accept processes (ranks,
graph server, etc.) in the event of a failover on another
node in the cluster.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
assert isinstance( host, (basestring)), "admin_alter_host(): Argument 'host' must be (one) of type(s) '(basestring)'; given %s" % type( host ).__name__
assert isinstance( options, (dict)), "admin_alter_host(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['host'] = host
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request_to_hm( '/admin/alter/host', obj, convert_to_attr_dict = True )
return response
# end admin_alter_host
# begin admin_alter_jobs
[docs] def admin_alter_jobs( self, job_ids = None, action = None, options = {} ):
"""Perform the requested action on a list of one or more job(s). Based
on the type of job and the current state of execution, the action may
not be
successfully executed. The final result of the attempted actions for
each
specified job is returned in the status array of the response. See
`Job Manager <../../../../admin/job_manager/>`__ for more information.
Parameters:
job_ids (list of longs)
Jobs to be modified. The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
action (str)
Action to be performed on the jobs specified by job_ids.
Allowed values are:
* cancel
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **job_tag** --
Job tag returned in call to create the job
Returns:
A dict with the following entries--
job_ids (list of longs)
Jobs on which the action was performed.
action (str)
Action requested on the jobs.
status (list of str)
Status of the requested action for each job.
info (dict of str to str)
Additional information.
"""
job_ids = job_ids if isinstance( job_ids, list ) else ( [] if (job_ids is None) else [ job_ids ] )
assert isinstance( action, (basestring)), "admin_alter_jobs(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( options, (dict)), "admin_alter_jobs(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['job_ids'] = job_ids
obj['action'] = action
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/alter/jobs', obj, convert_to_attr_dict = True )
return response
# end admin_alter_jobs
# begin admin_backup_begin
[docs] def admin_backup_begin( self, options = {} ):
"""Prepares the system for a backup by closing all open file handles after
allowing current active jobs to complete. When the database is in
backup mode, queries that result in a disk write operation will be
blocked until backup mode has been completed by using
:meth:`GPUdb.admin_backup_end`.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "admin_backup_begin(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/backup/begin', obj, convert_to_attr_dict = True )
return response
# end admin_backup_begin
# begin admin_backup_end
[docs] def admin_backup_end( self, options = {} ):
"""Restores the system to normal operating mode after a backup has
completed, allowing any queries that were blocked to complete.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "admin_backup_end(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/backup/end', obj, convert_to_attr_dict = True )
return response
# end admin_backup_end
# begin admin_ha_refresh
[docs] def admin_ha_refresh( self, options = {} ):
"""Restarts the HA processing on the given cluster as a mechanism of
accepting breaking HA conf changes. Additionally the cluster is put
into read-only while HA is restarting.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "admin_ha_refresh(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/ha/refresh', obj, convert_to_attr_dict = True )
return response
# end admin_ha_refresh
# begin admin_offline
[docs] def admin_offline( self, offline = None, options = {} ):
"""Take the system offline. When the system is offline, no user operations
can be performed with the exception of a system shutdown.
Parameters:
offline (bool)
Set to true if desired state is offline.
Allowed values are:
* true
* false
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **flush_to_disk** --
Flush to disk when going offline
Allowed values are:
* true
* false
Returns:
A dict with the following entries--
is_offline (bool)
Returns true if the system is offline, or false otherwise.
info (dict of str to str)
Additional information.
"""
assert isinstance( offline, (bool)), "admin_offline(): Argument 'offline' must be (one) of type(s) '(bool)'; given %s" % type( offline ).__name__
assert isinstance( options, (dict)), "admin_offline(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['offline'] = offline
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/offline', obj, convert_to_attr_dict = True )
return response
# end admin_offline
# begin admin_rebalance
[docs] def admin_rebalance( self, options = {} ):
"""Rebalance the data in the cluster so that all nodes contain an equal
number of records approximately and/or rebalance the shards to be
equally
distributed (as much as possible) across all the ranks.
The database must be offline for this operation, see
:meth:`GPUdb.admin_offline`
* If :meth:`GPUdb.admin_rebalance` is invoked after a change is
made to the cluster, e.g., a host was added or removed,
`sharded data <../../../../concepts/tables/#sharding>`__ will be
evenly redistributed across the cluster by number of shards per rank
while unsharded data will be redistributed across the cluster by data
size per rank
* If :meth:`GPUdb.admin_rebalance`
is invoked at some point when unsharded data (a.k.a.
`randomly-sharded <../../../../concepts/tables/#random-sharding>`__)
in the cluster is unevenly distributed over time, sharded data will
not move while unsharded data will be redistributed across the
cluster by data size per rank
NOTE: Replicated data will not move as a result of this call
This endpoint's processing time depends on the amount of data in the
system,
thus the API call may time out if run directly. It is recommended to
run this
endpoint asynchronously via :meth:`GPUdb.create_job`.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **rebalance_sharded_data** --
If *true*, `sharded data
<../../../../concepts/tables/#sharding>`__ will be rebalanced
approximately equally across the cluster. Note that for
clusters with large amounts of sharded data, this data
transfer could be time consuming and result in delayed query
responses.
Allowed values are:
* true
* false
The default value is 'true'.
* **rebalance_unsharded_data** --
If *true*, unsharded data (a.k.a. `randomly-sharded
<../../../../concepts/tables/#random-sharding>`__) will be
rebalanced approximately equally across the cluster. Note
that for clusters with large amounts of unsharded data, this
data transfer could be time consuming and result in delayed
query responses.
Allowed values are:
* true
* false
The default value is 'true'.
* **table_includes** --
Comma-separated list of unsharded table names to rebalance.
Not applicable to sharded tables because they are always
rebalanced. Cannot be used simultaneously with
*table_excludes*. This parameter is ignored if
*rebalance_unsharded_data* is *false*.
* **table_excludes** --
Comma-separated list of unsharded table names to not
rebalance. Not applicable to sharded tables because they are
always rebalanced. Cannot be used simultaneously with
*table_includes*. This parameter is ignored if
*rebalance_unsharded_data* is *false*.
* **aggressiveness** --
Influences how much data is moved at a time during rebalance.
A higher *aggressiveness* will complete the rebalance faster.
A lower *aggressiveness* will take longer but allow for
better interleaving between the rebalance and other queries.
Valid values are constants from 1 (lowest) to 10 (highest).
The default value is '10'.
* **compact_after_rebalance** --
Perform compaction of deleted records once the rebalance
completes to reclaim memory and disk space. Default is
*true*, unless *repair_incorrectly_sharded_data* is set to
*true*.
Allowed values are:
* true
* false
The default value is 'true'.
* **compact_only** --
If set to *true*, ignore rebalance options and attempt to
perform compaction of deleted records to reclaim memory and
disk space without rebalancing first.
Allowed values are:
* true
* false
The default value is 'false'.
* **repair_incorrectly_sharded_data** --
Scans for any data sharded incorrectly and re-routes the data
to the correct location. Only necessary if
:meth:`GPUdb.admin_verify_db` reports an error in sharding
alignment. This can be done as part of a typical rebalance
after expanding the cluster or in a standalone fashion when
it is believed that data is sharded incorrectly somewhere in
the cluster. Compaction will not be performed by default when
this is enabled. If this option is set to *true*, the time
necessary to rebalance and the memory used by the rebalance
may increase.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "admin_rebalance(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/rebalance', obj, convert_to_attr_dict = True )
return response
# end admin_rebalance
# begin admin_remove_host
[docs] def admin_remove_host( self, host = None, options = {} ):
"""Removes a host from an existing cluster. If the host to be removed has
any ranks running on it, the ranks must be removed using
:meth:`GPUdb.admin_remove_ranks` or manually switched over to a new
host using :meth:`GPUdb.admin_switchover` prior to host removal. If the
host to be removed has the graph server or SQL planner running on it,
these must be manually switched over to a new host using
:meth:`GPUdb.admin_switchover`.
.. note::
This method should be used for on-premise deployments only.
Parameters:
host (str)
Identifies the host this applies to. Can be the host address,
or formatted as 'hostN' where N is the host number as specified
in gpudb.conf
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **dry_run** --
If set to *true*, only validation checks will be performed.
No host is removed.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
assert isinstance( host, (basestring)), "admin_remove_host(): Argument 'host' must be (one) of type(s) '(basestring)'; given %s" % type( host ).__name__
assert isinstance( options, (dict)), "admin_remove_host(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['host'] = host
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request_to_hm( '/admin/remove/host', obj, convert_to_attr_dict = True )
return response
# end admin_remove_host
# begin admin_remove_ranks
[docs] def admin_remove_ranks( self, ranks = None, options = {} ):
"""Remove one or more ranks from an existing Kinetica cluster. All data
will be rebalanced to other ranks before the rank(s) is removed unless
the
*rebalance_sharded_data* or
*rebalance_unsharded_data* parameters are set to
*false* in the
input parameter *options*, in which case the corresponding
`sharded data <../../../../concepts/tables/#sharding>`__ and/or
unsharded data (a.k.a.
`randomly-sharded <../../../../concepts/tables/#random-sharding>`__)
will be deleted.
The database must be offline for this operation, see
:meth:`GPUdb.admin_offline`
This endpoint's processing time depends on the amount of data in the
system,
thus the API call may time out if run directly. It is recommended to
run this
endpoint asynchronously via :meth:`GPUdb.create_job`.
.. note::
This method should be used for on-premise deployments only.
Parameters:
ranks (list of str)
Each array value designates one or more ranks to remove from
the cluster. Values can be formatted as 'rankN' for a specific
rank, 'hostN' (from the gpudb.conf file) to remove all ranks on
that host, or the host IP address (hostN.address from the
gpub.conf file) which also removes all ranks on that host. Rank
0 (the head rank) cannot be removed (but can be moved to
another host using :meth:`GPUdb.admin_switchover`). At least
one worker rank must be left in the cluster after the
operation. The user can provide a single element (which will
be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **rebalance_sharded_data** --
If *true*, `sharded data
<../../../../concepts/tables/#sharding>`__ will be rebalanced
approximately equally across the cluster. Note that for
clusters with large amounts of sharded data, this data
transfer could be time consuming and result in delayed query
responses.
Allowed values are:
* true
* false
The default value is 'true'.
* **rebalance_unsharded_data** --
If *true*, unsharded data (a.k.a. `randomly-sharded
<../../../../concepts/tables/#random-sharding>`__) will be
rebalanced approximately equally across the cluster. Note
that for clusters with large amounts of unsharded data, this
data transfer could be time consuming and result in delayed
query responses.
Allowed values are:
* true
* false
The default value is 'true'.
* **aggressiveness** --
Influences how much data is moved at a time during rebalance.
A higher *aggressiveness* will complete the rebalance faster.
A lower *aggressiveness* will take longer but allow for
better interleaving between the rebalance and other queries.
Valid values are constants from 1 (lowest) to 10 (highest).
The default value is '10'.
Returns:
A dict with the following entries--
removed_ranks (list of str)
The number assigned to each rank removed from the cluster. This
array will be empty if the operation fails.
info (dict of str to str)
Additional information.
"""
ranks = ranks if isinstance( ranks, list ) else ( [] if (ranks is None) else [ ranks ] )
assert isinstance( options, (dict)), "admin_remove_ranks(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['ranks'] = ranks
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/remove/ranks', obj, convert_to_attr_dict = True )
return response
# end admin_remove_ranks
# begin admin_show_alerts
[docs] def admin_show_alerts( self, num_alerts = None, options = {} ):
"""Requests a list of the most recent alerts.
Returns lists of alert data, including timestamp and type.
Parameters:
num_alerts (int)
Number of most recent alerts to request. The response will
include up to input parameter *num_alerts* depending on how
many alerts there are in the system. A value of 0 returns all
stored alerts.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
timestamps (list of str)
Timestamp for when the alert occurred, sorted from most recent
to least recent. Each array entry corresponds with the entries
at the same index in output parameter *types* and output
parameter *params*.
types (list of str)
Type of system alert, sorted from most recent to least recent.
Each array entry corresponds with the entries at the same index
in output parameter *timestamps* and output parameter *params*.
params (list of dicts of str to str)
Parameters for each alert, sorted from most recent to least
recent. Each array entry corresponds with the entries at the
same index in output parameter *timestamps* and output
parameter *types*.
info (dict of str to str)
Additional information.
"""
assert isinstance( num_alerts, (int, long, float)), "admin_show_alerts(): Argument 'num_alerts' must be (one) of type(s) '(int, long, float)'; given %s" % type( num_alerts ).__name__
assert isinstance( options, (dict)), "admin_show_alerts(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['num_alerts'] = num_alerts
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request_to_hm( '/admin/show/alerts', obj, convert_to_attr_dict = True )
return response
# end admin_show_alerts
# begin admin_show_cluster_operations
[docs] def admin_show_cluster_operations( self, history_index = 0, options = {} ):
"""Requests the detailed status of the current operation (by default) or a
prior cluster operation specified by input parameter *history_index*.
Returns details on the requested cluster operation.
The response will also indicate how many cluster operations are stored
in the history.
Parameters:
history_index (int)
Indicates which cluster operation to retrieve. Use 0 for the
most recent. The default value is 0.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
history_index (int)
The index of this cluster operation in the
reverse-chronologically sorted list of operations, where 0 is
the most recent operation.
history_size (int)
Number of cluster operations executed to date.
in_progress (bool)
Whether this cluster operation is currently in progress or not.
Allowed values are:
* true
* false
start_time (str)
The start time of the cluster operation.
end_time (str)
The end time of the cluster operation, if completed.
endpoint (str)
The endpoint that initiated the cluster operation.
endpoint_schema (str)
The schema for the original request.
overall_status (str)
Overall success status of the operation.
Allowed values are:
* **OK** --
The operation was successful, or, if still in progress, the
operation is successful so far.
* **ERROR** --
An error occurred executing the operation.
user_stopped (bool)
Whether a user stopped this operation at any point while in
progress.
Allowed values are:
* true
* false
percent_complete (int)
Percent complete of this entire operation.
dry_run (bool)
Whether this operation was a dry run.
Allowed values are:
* true
* false
messages (list of str)
Updates and error messages if any.
add_ranks (bool)
Whether adding ranks is (or was) part of this operation.
Allowed values are:
* true
* false
add_ranks_status (str)
If this was a rank-adding operation, the add-specific status of
the operation.
Allowed values are:
* NOT_STARTED
* IN_PROGRESS
* INTERRUPTED
* COMPLETED_OK
* ERROR
ranks_being_added (list of ints)
The rank numbers of the ranks currently being added, or the
rank numbers that were added if the operation is complete.
rank_hosts (list of str)
The host IP addresses of the ranks being added, in the same
order as the output parameter *ranks_being_added* list.
add_ranks_percent_complete (int)
Current percent complete of the add ranks operation.
remove_ranks (bool)
Whether removing ranks is (or was) part of this operation.
Allowed values are:
* true
* false
remove_ranks_status (str)
If this was a rank-removing operation, the removal-specific
status of the operation.
Allowed values are:
* NOT_STARTED
* IN_PROGRESS
* INTERRUPTED
* COMPLETED_OK
* ERROR
ranks_being_removed (list of ints)
The ranks being removed, or that have been removed if the
operation is completed.
remove_ranks_percent_complete (int)
Current percent complete of the remove ranks operation.
rebalance (bool)
Whether data and/or shard rebalancing is (or was) part of this
operation.
Allowed values are:
* true
* false
rebalance_unsharded_data (bool)
Whether rebalancing of unsharded data is (or was) part of this
operation.
Allowed values are:
* true
* false
rebalance_unsharded_data_status (str)
If this was an operation that included rebalancing unsharded
data, the rebalancing-specific status of the operation.
Allowed values are:
* NOT_STARTED
* IN_PROGRESS
* INTERRUPTED
* COMPLETED_OK
* ERROR
unsharded_rebalance_percent_complete (int)
Percentage of unsharded tables that completed rebalancing, out
of all unsharded tables to rebalance.
rebalance_sharded_data (bool)
Whether rebalancing of sharded data is (or was) part of this
operation.
Allowed values are:
* true
* false
shard_array_version (long)
Version of the shard array that is (or was) being rebalanced
to. Each change to the shard array results in the version
number incrementing.
rebalance_sharded_data_status (str)
If this was an operation that included rebalancing sharded
data, the rebalancing-specific status of the operation.
Allowed values are:
* NOT_STARTED
* IN_PROGRESS
* INTERRUPTED
* COMPLETED_OK
* ERROR
num_shards_changing (int)
Number of shards that will change as part of rebalance.
sharded_rebalance_percent_complete (int)
Percentage of shard keys, and their associated data if
applicable, that have completed rebalancing.
info (dict of str to str)
Additional information.
"""
assert isinstance( history_index, (int, long, float)), "admin_show_cluster_operations(): Argument 'history_index' must be (one) of type(s) '(int, long, float)'; given %s" % type( history_index ).__name__
assert isinstance( options, (dict)), "admin_show_cluster_operations(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['history_index'] = history_index
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/show/cluster/operations', obj, convert_to_attr_dict = True )
return response
# end admin_show_cluster_operations
# begin admin_show_jobs
[docs] def admin_show_jobs( self, options = {} ):
"""Get a list of the current jobs in GPUdb.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **show_async_jobs** --
If *true*, then the completed async jobs are also included in
the response. By default, once the async jobs are completed
they are no longer included in the jobs list.
Allowed values are:
* true
* false
The default value is 'false'.
* **show_worker_info** --
If *true*, then information is also returned from worker
ranks. By default only status from the head rank is returned.
Allowed values are:
* true
* false
Returns:
A dict with the following entries--
job_id (list of longs)
status (list of str)
endpoint_name (list of str)
time_received (list of longs)
auth_id (list of str)
source_ip (list of str)
user_data (list of str)
flags (list of str)
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **job_tag** --
The job tag specified by the user or if unspecified by user,
an internally generated unique identifier for the job across
clusters.
* **worker_info** --
Worker job information as json
"""
assert isinstance( options, (dict)), "admin_show_jobs(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/show/jobs', obj, convert_to_attr_dict = True )
return response
# end admin_show_jobs
# begin admin_show_shards
[docs] def admin_show_shards( self, options = {} ):
"""Show the mapping of shards to the corresponding rank and tom. The
response message contains list of 16384 (total number of shards in the
system) Rank and TOM numbers corresponding to each shard.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
version (long)
Current shard array version number.
rank (list of ints)
Array of ranks indexed by the shard number.
tom (list of ints)
Array of toms to which the corresponding shard belongs.
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "admin_show_shards(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/show/shards', obj, convert_to_attr_dict = True )
return response
# end admin_show_shards
# begin admin_shutdown
[docs] def admin_shutdown( self, exit_type = None, authorization = None, options = {}
):
"""Exits the database server application.
Parameters:
exit_type (str)
Reserved for future use. User can pass an empty string.
authorization (str)
No longer used. User can pass an empty string.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
exit_status (str)
'OK' upon (right before) successful exit.
info (dict of str to str)
Additional information.
"""
assert isinstance( exit_type, (basestring)), "admin_shutdown(): Argument 'exit_type' must be (one) of type(s) '(basestring)'; given %s" % type( exit_type ).__name__
assert isinstance( authorization, (basestring)), "admin_shutdown(): Argument 'authorization' must be (one) of type(s) '(basestring)'; given %s" % type( authorization ).__name__
assert isinstance( options, (dict)), "admin_shutdown(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['exit_type'] = exit_type
obj['authorization'] = authorization
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/shutdown', obj, convert_to_attr_dict = True )
return response
# end admin_shutdown
# begin admin_switchover
[docs] def admin_switchover( self, processes = None, destinations = None, options = {}
):
"""Manually switch over one or more processes to another host. Individual
ranks or entire hosts may be moved to another host.
.. note::
This method should be used for on-premise deployments only.
Parameters:
processes (list of str)
Indicates the process identifier to switch over to another
host. Options are
'hostN' and 'rankN' where 'N' corresponds to the number
associated with a host or rank in the
`Network <../../../../config/#config-main-network>`__ section
of the gpudb.conf file; e.g.,
'host[N].address' or 'rank[N].host'. If 'hostN' is provided,
all processes on that host will be
moved to another host. Each entry in this array will be
switched over to the corresponding host
entry at the same index in input parameter *destinations*.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
destinations (list of str)
Indicates to which host to switch over each corresponding
process given in
input parameter *processes*. Each index must be specified as
'hostN' where 'N' corresponds to the number
associated with a host or rank in the `Network
<../../../../config/#config-main-network>`__ section of the
gpudb.conf file; e.g., 'host[N].address'. Each entry in this
array will receive the corresponding
process entry at the same index in input parameter *processes*.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **dry_run** --
If set to *true*, only validation checks will be performed.
Nothing is switched over.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
processes = processes if isinstance( processes, list ) else ( [] if (processes is None) else [ processes ] )
destinations = destinations if isinstance( destinations, list ) else ( [] if (destinations is None) else [ destinations ] )
assert isinstance( options, (dict)), "admin_switchover(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['processes'] = processes
obj['destinations'] = destinations
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request_to_hm( '/admin/switchover', obj, convert_to_attr_dict = True )
return response
# end admin_switchover
# begin admin_verify_db
[docs] def admin_verify_db( self, options = {} ):
"""Verify database is in a consistent state. When inconsistencies or
errors are found, the verified_ok flag in the response is set to false
and the list of errors found is provided in the error_list.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **rebuild_on_error** --
[DEPRECATED -- Use the Rebuild DB feature of GAdmin
instead.].
Allowed values are:
* true
* false
The default value is 'false'.
* **verify_nulls** --
When *true*, verifies that null values are set to zero.
Allowed values are:
* true
* false
The default value is 'false'.
* **verify_persist** --
When *true*, persistent objects will be compared against
their state in memory and workers will be checked for
orphaned table data in persist. To check for orphaned worker
data, either set *concurrent_safe* in input parameter
*options* to *true* or place the database offline.
Allowed values are:
* true
* false
The default value is 'false'.
* **concurrent_safe** --
When *true*, allows this endpoint to be run safely with other
concurrent database operations. Other operations may be
slower while this is running.
Allowed values are:
* true
* false
The default value is 'true'.
* **verify_rank0** --
If *true*, compare rank0 table metadata against workers'
metadata.
Allowed values are:
* true
* false
The default value is 'false'.
* **delete_orphaned_tables** --
If *true*, orphaned table directories found on workers for
which there is no corresponding metadata will be deleted.
Must set *verify_persist* in input parameter *options* to
*true*. It is recommended to run this while the database is
offline OR set *concurrent_safe* in input parameter *options*
to *true*.
Allowed values are:
* true
* false
The default value is 'false'.
* **verify_orphaned_tables_only** --
If *true*, only the presence of orphaned table directories
will be checked, all persistence checks will be skipped.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
verified_ok (bool)
True if no errors were found, false otherwise. The default
value is False.
error_list (list of str)
List of errors found while validating the database internal
state. The default value is an empty list ( [] ).
orphaned_tables_total_size (long)
If *verify_persist* is *true*, *verify_orphaned_tables_only* is
*true* or *delete_orphaned_tables* is *true*, this is the sum
in bytes of all orphaned tables found. Otherwise, -1.
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "admin_verify_db(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/admin/verifydb', obj, convert_to_attr_dict = True )
return response
# end admin_verify_db
# begin aggregate_convex_hull
[docs] def aggregate_convex_hull( self, table_name = None, x_column_name = None,
y_column_name = None, options = {} ):
"""Calculates and returns the convex hull for the values in a table
specified by input parameter *table_name*.
Parameters:
table_name (str)
Name of table on which the operation will be performed. Must be
an existing table, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
x_column_name (str)
Name of the column containing the x coordinates of the points
for the operation being performed.
y_column_name (str)
Name of the column containing the y coordinates of the points
for the operation being performed.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
x_vector (list of floats)
Array of x coordinates of the resulting convex set.
y_vector (list of floats)
Array of y coordinates of the resulting convex set.
count (int)
Count of the number of points in the convex set.
is_valid (bool)
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "aggregate_convex_hull(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( x_column_name, (basestring)), "aggregate_convex_hull(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( y_column_name, (basestring)), "aggregate_convex_hull(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( options, (dict)), "aggregate_convex_hull(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['x_column_name'] = x_column_name
obj['y_column_name'] = y_column_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/convexhull', obj, convert_to_attr_dict = True )
return response
# end aggregate_convex_hull
# begin aggregate_group_by
[docs] def aggregate_group_by( self, table_name = None, column_names = None, offset =
0, limit = -9999, encoding = 'binary', options = {}
):
"""Calculates unique combinations (groups) of values for the given columns
in a given table or view and computes aggregates on each unique
combination. This is somewhat analogous to an SQL-style SELECT...GROUP
BY.
For aggregation details and examples, see `Aggregation
<../../../../concepts/aggregation/>`__. For limitations, see
`Aggregation Limitations
<../../../../concepts/aggregation/#limitations>`__.
Any column(s) can be grouped on, and all column types except
unrestricted-length strings may be used for computing applicable
aggregates; columns marked as `store-only
<../../../../concepts/types/#data-handling>`__ are unable to be used in
grouping or aggregation.
The results can be paged via the input parameter *offset* and input
parameter *limit* parameters. For example, to get 10 groups with the
largest counts the inputs would be: limit=10,
options={"sort_order":"descending", "sort_by":"value"}.
Input parameter *options* can be used to customize behavior of this
call e.g. filtering or sorting the results.
To group by columns 'x' and 'y' and compute the number of objects
within each group, use: column_names=['x','y','count(*)'].
To also compute the sum of 'z' over each group, use:
column_names=['x','y','count(*)','sum(z)'].
Available `aggregation functions
<../../../../concepts/expressions/#aggregate-expressions>`__ are:
count(*), sum, min, max, avg, mean, stddev, stddev_pop, stddev_samp,
var, var_pop, var_samp, arg_min, arg_max and count_distinct.
Available grouping functions are `Rollup
<../../../../concepts/rollup/>`__, `Cube
<../../../../concepts/cube/>`__, and `Grouping Sets
<../../../../concepts/grouping_sets/>`__
This service also provides support for `Pivot
<../../../../concepts/pivot/>`__ operations.
Filtering on aggregates is supported via expressions using `aggregation
functions <../../../../concepts/expressions/#aggregate-expressions>`__
supplied to *having*.
The response is returned as a dynamic schema. For details see: `dynamic
schemas documentation <../../../../api/concepts/#dynamic-schemas>`__.
If a *result_table* name is specified in the input parameter *options*,
the results are stored in a new table with that name--no results are
returned in the response. Both the table name and resulting column
names must adhere to `standard naming conventions
<../../../../concepts/tables/#table>`__; column/aggregation expressions
will need to be aliased. If the source table's `shard key
<../../../../concepts/tables/#shard-keys>`__ is used as the grouping
column(s) and all result records are selected (input parameter *offset*
is 0 and input parameter *limit* is -9999), the result table will be
sharded, in all other cases it will be replicated. Sorting will
properly function only if the result table is replicated or if there is
only one processing node and should not be relied upon in other cases.
Not available when any of the values of input parameter *column_names*
is an unrestricted-length string.
Parameters:
table_name (str)
Name of an existing table or view on which the operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_names (list of str)
List of one or more column names, expressions, and aggregate
expressions. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
provided is non-existent, it will be automatically created.
* **expression** --
Filter expression to apply to the table prior to computing
the aggregate group by.
* **having** --
Filter expression to apply to the aggregated results.
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending.
Allowed values are:
* **ascending** --
Indicates that the returned values should be sorted in
ascending order.
* **descending** --
Indicates that the returned values should be sorted in
descending order.
The default value is 'ascending'.
* **sort_by** --
String determining how the results are sorted.
Allowed values are:
* **key** --
Indicates that the returned values should be sorted by key,
which corresponds to the grouping columns. If you have
multiple grouping columns (and are sorting by key), it will
first sort the first grouping column, then the second
grouping column, etc.
* **value** --
Indicates that the returned values should be sorted by
value, which corresponds to the aggregates. If you have
multiple aggregates (and are sorting by value), it will
first sort by the first aggregate, then the second
aggregate, etc.
The default value is 'value'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
Column names (group-by and aggregate fields) need to be given
aliases e.g. ["FChar256 as fchar256", "sum(FDouble) as sfd"].
If present, no results are returned in the response. This
option is not available if one of the grouping attributes is
an unrestricted string (i.e.; not charN) type.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_generate_pk** --
If *true* then set a primary key for the result table. Must
be used in combination with the *result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the result table. Must be used in combination with the
*result_table* option.
* **view_id** --
ID of view of which the result table will be a member. The
default value is ''.
* **pivot** --
pivot column
* **pivot_values** --
The value list provided will become the column headers in the
output. Should be the values from the pivot_column.
* **grouping_sets** --
Customize the grouping attribute sets to compute the
aggregates. These sets can include ROLLUP or CUBE operartors.
The attribute sets should be enclosed in paranthesis and can
include composite attributes. All attributes specified in the
grouping sets must present in the groupby attributes.
* **rollup** --
This option is used to specify the multilevel aggregates.
* **cube** --
This option is used to specify the multidimensional
aggregates.
Returns:
A dict with the following entries--
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
binary_encoded_response (bytes)
Avro binary encoded response.
json_encoded_response (str)
Avro JSON encoded response.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
record_type (:class:`RecordType` or None)
A :class:`RecordType` object using which the user can decode
the binarydata by using :meth:`GPUdbRecord.decode_binary_data`.
If JSON encodingis used, then None.
"""
assert isinstance( table_name, (basestring)), "aggregate_group_by(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( offset, (int, long, float)), "aggregate_group_by(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "aggregate_group_by(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "aggregate_group_by(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "aggregate_group_by(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/groupby', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record type and save it in the response, if applicable
if encoding == "binary":
record_type = RecordType.from_dynamic_schema( response.response_schema_str, response.binary_encoded_response )
response["record_type"] = record_type
else:
response["record_type"] = None
return response
# end aggregate_group_by
# begin aggregate_group_by_and_decode
[docs] def aggregate_group_by_and_decode( self, table_name = None, column_names = None,
offset = 0, limit = -9999, encoding =
'binary', options = {}, record_type =
None, force_primitive_return_types =
True, get_column_major = True ):
"""Calculates unique combinations (groups) of values for the given columns
in a given table or view and computes aggregates on each unique
combination. This is somewhat analogous to an SQL-style SELECT...GROUP
BY.
For aggregation details and examples, see `Aggregation
<../../../../concepts/aggregation/>`__. For limitations, see
`Aggregation Limitations
<../../../../concepts/aggregation/#limitations>`__.
Any column(s) can be grouped on, and all column types except
unrestricted-length strings may be used for computing applicable
aggregates; columns marked as `store-only
<../../../../concepts/types/#data-handling>`__ are unable to be used in
grouping or aggregation.
The results can be paged via the input parameter *offset* and input
parameter *limit* parameters. For example, to get 10 groups with the
largest counts the inputs would be: limit=10,
options={"sort_order":"descending", "sort_by":"value"}.
Input parameter *options* can be used to customize behavior of this
call e.g. filtering or sorting the results.
To group by columns 'x' and 'y' and compute the number of objects
within each group, use: column_names=['x','y','count(*)'].
To also compute the sum of 'z' over each group, use:
column_names=['x','y','count(*)','sum(z)'].
Available `aggregation functions
<../../../../concepts/expressions/#aggregate-expressions>`__ are:
count(*), sum, min, max, avg, mean, stddev, stddev_pop, stddev_samp,
var, var_pop, var_samp, arg_min, arg_max and count_distinct.
Available grouping functions are `Rollup
<../../../../concepts/rollup/>`__, `Cube
<../../../../concepts/cube/>`__, and `Grouping Sets
<../../../../concepts/grouping_sets/>`__
This service also provides support for `Pivot
<../../../../concepts/pivot/>`__ operations.
Filtering on aggregates is supported via expressions using `aggregation
functions <../../../../concepts/expressions/#aggregate-expressions>`__
supplied to *having*.
The response is returned as a dynamic schema. For details see: `dynamic
schemas documentation <../../../../api/concepts/#dynamic-schemas>`__.
If a *result_table* name is specified in the input parameter *options*,
the results are stored in a new table with that name--no results are
returned in the response. Both the table name and resulting column
names must adhere to `standard naming conventions
<../../../../concepts/tables/#table>`__; column/aggregation expressions
will need to be aliased. If the source table's `shard key
<../../../../concepts/tables/#shard-keys>`__ is used as the grouping
column(s) and all result records are selected (input parameter *offset*
is 0 and input parameter *limit* is -9999), the result table will be
sharded, in all other cases it will be replicated. Sorting will
properly function only if the result table is replicated or if there is
only one processing node and should not be relied upon in other cases.
Not available when any of the values of input parameter *column_names*
is an unrestricted-length string.
Parameters:
table_name (str)
Name of an existing table or view on which the operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_names (list of str)
List of one or more column names, expressions, and aggregate
expressions. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
provided is non-existent, it will be automatically created.
* **expression** --
Filter expression to apply to the table prior to computing
the aggregate group by.
* **having** --
Filter expression to apply to the aggregated results.
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending.
Allowed values are:
* **ascending** --
Indicates that the returned values should be sorted in
ascending order.
* **descending** --
Indicates that the returned values should be sorted in
descending order.
The default value is 'ascending'.
* **sort_by** --
String determining how the results are sorted.
Allowed values are:
* **key** --
Indicates that the returned values should be sorted by key,
which corresponds to the grouping columns. If you have
multiple grouping columns (and are sorting by key), it will
first sort the first grouping column, then the second
grouping column, etc.
* **value** --
Indicates that the returned values should be sorted by
value, which corresponds to the aggregates. If you have
multiple aggregates (and are sorting by value), it will
first sort by the first aggregate, then the second
aggregate, etc.
The default value is 'value'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
Column names (group-by and aggregate fields) need to be given
aliases e.g. ["FChar256 as fchar256", "sum(FDouble) as sfd"].
If present, no results are returned in the response. This
option is not available if one of the grouping attributes is
an unrestricted string (i.e.; not charN) type.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_generate_pk** --
If *true* then set a primary key for the result table. Must
be used in combination with the *result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the result table. Must be used in combination with the
*result_table* option.
* **view_id** --
ID of view of which the result table will be a member. The
default value is ''.
* **pivot** --
pivot column
* **pivot_values** --
The value list provided will become the column headers in the
output. Should be the values from the pivot_column.
* **grouping_sets** --
Customize the grouping attribute sets to compute the
aggregates. These sets can include ROLLUP or CUBE operartors.
The attribute sets should be enclosed in paranthesis and can
include composite attributes. All attributes specified in the
grouping sets must present in the groupby attributes.
* **rollup** --
This option is used to specify the multilevel aggregates.
* **cube** --
This option is used to specify the multidimensional
aggregates.
record_type (:class:`RecordType` or None)
The record type expected in the results, or None to
determinethe appropriate type automatically. If known,
providing thismay improve performance in binary mode. Not used
in JSON mode.The default value is None.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A dict with the following entries--
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( table_name, (basestring)), "aggregate_group_by_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( offset, (int, long, float)), "aggregate_group_by_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "aggregate_group_by_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "aggregate_group_by_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "aggregate_group_by_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "aggregate_group_by_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
assert isinstance(force_primitive_return_types, bool), "aggregate_group_by_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
assert isinstance(get_column_major, bool), "aggregate_group_by_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/aggregate/groupby", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/aggregate/groupby', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_type = record_type if record_type else RecordType.from_dynamic_schema( response.response_schema_str, raw_response, response.binary_encoded_response )
records = record_type.decode_dynamic_records( raw_response, response.binary_encoded_response )
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
# Transpose the data to column-major, if requested by the user
if get_column_major:
records = GPUdbRecord.transpose_data_to_col_major( records )
response["records"] = records
else:
records = json.loads( response.json_encoded_response )
if get_column_major:
# Get column-major data
records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response.response_schema_str )
else:
# Get row-major data
records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response.response_schema_str )
response["records"] = records
# end if
del response["binary_encoded_response"]
del response["json_encoded_response"]
return response
# end aggregate_group_by_and_decode
# begin aggregate_histogram
[docs] def aggregate_histogram( self, table_name = None, column_name = None, start =
None, end = None, interval = None, options = {} ):
"""Performs a histogram calculation given a table, a column, and an
interval function. The input parameter *interval* is used to produce
bins of that size
and the result, computed over the records falling within each bin, is
returned.
For each bin, the start value is inclusive, but the end value is
exclusive--except for the very last bin for which the end value is also
inclusive. The value returned for each bin is the number of records in
it,
except when a column name is provided as a
*value_column*. In this latter case the sum of the
values corresponding to the *value_column* is used as the
result instead. The total number of bins requested cannot exceed
10,000.
NOTE: The Kinetica instance being accessed must be running a CUDA
(GPU-based)
build to service a request that specifies a *value_column*.
Parameters:
table_name (str)
Name of the table on which the operation will be performed.
Must be an existing table, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_name (str)
Name of a column or an expression of one or more column names
over which the histogram will be calculated.
start (float)
Lower end value of the histogram interval, inclusive.
end (float)
Upper end value of the histogram interval, inclusive.
interval (float)
The size of each bin within the start and end parameters.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **value_column** --
The name of the column to use when calculating the bin values
(values are summed). The column must be a numerical type
(int, double, long, float).
Returns:
A dict with the following entries--
counts (list of floats)
The array of calculated values that represents the histogram
data points.
start (float)
Value of input parameter *start*.
end (float)
Value of input parameter *end*.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "aggregate_histogram(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "aggregate_histogram(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( start, (int, long, float)), "aggregate_histogram(): Argument 'start' must be (one) of type(s) '(int, long, float)'; given %s" % type( start ).__name__
assert isinstance( end, (int, long, float)), "aggregate_histogram(): Argument 'end' must be (one) of type(s) '(int, long, float)'; given %s" % type( end ).__name__
assert isinstance( interval, (int, long, float)), "aggregate_histogram(): Argument 'interval' must be (one) of type(s) '(int, long, float)'; given %s" % type( interval ).__name__
assert isinstance( options, (dict)), "aggregate_histogram(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['start'] = start
obj['end'] = end
obj['interval'] = interval
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/histogram', obj, convert_to_attr_dict = True )
return response
# end aggregate_histogram
# begin aggregate_k_means
[docs] def aggregate_k_means( self, table_name = None, column_names = None, k = None,
tolerance = None, options = {} ):
"""This endpoint runs the k-means algorithm - a heuristic algorithm
that attempts to do k-means clustering. An ideal k-means clustering
algorithm
selects k points such that the sum of the mean squared distances of
each member
of the set to the nearest of the k points is minimized. The k-means
algorithm
however does not necessarily produce such an ideal cluster. It begins
with a
randomly selected set of k points and then refines the location of the
points
iteratively and settles to a local minimum. Various parameters and
options are
provided to control the heuristic search.
NOTE: The Kinetica instance being accessed must be running a CUDA
(GPU-based)
build to service this request.
Parameters:
table_name (str)
Name of the table on which the operation will be performed.
Must be an existing table, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_names (list of str)
List of column names on which the operation would be performed.
If n columns are provided then each of the k result points will
have n dimensions corresponding to the n columns. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
k (int)
The number of mean points to be determined by the algorithm.
tolerance (float)
Stop iterating when the distances between successive points is
less than the given tolerance.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **whiten** --
When set to 1 each of the columns is first normalized by its
stdv - default is not to whiten.
* **max_iters** --
Number of times to try to hit the tolerance limit before
giving up - default is 10.
* **num_tries** --
Number of times to run the k-means algorithm with a different
randomly selected starting points - helps avoid local
minimum. Default is 1.
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
this option is specified, the results are not returned in the
response.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
Returns:
A dict with the following entries--
means (list of lists of floats)
The k-mean values found.
counts (list of longs)
The number of elements in the cluster closest the corresponding
k-means values.
rms_dists (list of floats)
The root mean squared distance of the elements in the cluster
for each of the k-means values.
count (long)
The total count of all the clusters - will be the size of the
input table.
rms_dist (float)
The sum of all the rms_dists - the value the k-means algorithm
is attempting to minimize.
tolerance (float)
The distance between the last two iterations of the algorithm
before it quit.
num_iters (int)
The number of iterations the algorithm executed before it quit.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the result table (i.e. including
the schema) used to store the results.
"""
assert isinstance( table_name, (basestring)), "aggregate_k_means(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( k, (int, long, float)), "aggregate_k_means(): Argument 'k' must be (one) of type(s) '(int, long, float)'; given %s" % type( k ).__name__
assert isinstance( tolerance, (int, long, float)), "aggregate_k_means(): Argument 'tolerance' must be (one) of type(s) '(int, long, float)'; given %s" % type( tolerance ).__name__
assert isinstance( options, (dict)), "aggregate_k_means(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['k'] = k
obj['tolerance'] = tolerance
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/kmeans', obj, convert_to_attr_dict = True )
return response
# end aggregate_k_means
# begin aggregate_min_max
[docs] def aggregate_min_max( self, table_name = None, column_name = None, options = {}
):
"""Calculates and returns the minimum and maximum values of a particular
column in a table.
Parameters:
table_name (str)
Name of the table on which the operation will be performed.
Must be an existing table, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_name (str)
Name of a column or an expression of one or more column on
which the min-max will be calculated.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
min (float)
Minimum value of the input parameter *column_name*.
max (float)
Maximum value of the input parameter *column_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "aggregate_min_max(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "aggregate_min_max(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( options, (dict)), "aggregate_min_max(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/minmax', obj, convert_to_attr_dict = True )
return response
# end aggregate_min_max
# begin aggregate_min_max_geometry
[docs] def aggregate_min_max_geometry( self, table_name = None, column_name = None,
options = {} ):
"""Calculates and returns the minimum and maximum x- and y-coordinates
of a particular geospatial geometry column in a table.
Parameters:
table_name (str)
Name of the table on which the operation will be performed.
Must be an existing table, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_name (str)
Name of a geospatial geometry column on which the min-max will
be calculated.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
min_x (float)
Minimum x-coordinate value of the input parameter
*column_name*.
max_x (float)
Maximum x-coordinate value of the input parameter
*column_name*.
min_y (float)
Minimum y-coordinate value of the input parameter
*column_name*.
max_y (float)
Maximum y-coordinate value of the input parameter
*column_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "aggregate_min_max_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "aggregate_min_max_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( options, (dict)), "aggregate_min_max_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/minmax/geometry', obj, convert_to_attr_dict = True )
return response
# end aggregate_min_max_geometry
# begin aggregate_statistics
[docs] def aggregate_statistics( self, table_name = None, column_name = None, stats =
None, options = {} ):
"""Calculates the requested statistics of the given column(s) in a
given table.
The available statistics are:
*count* (number of total objects),
*mean*,
*stdv* (standard deviation),
*variance*,
*skew*,
*kurtosis*,
*sum*,
*min*,
*max*,
*weighted_average*,
*cardinality* (unique count),
*estimated_cardinality*,
*percentile*, and
*percentile_rank*.
Estimated cardinality is calculated by using the hyperloglog
approximation
technique.
Percentiles and percentile ranks are approximate and are calculated
using the
t-digest algorithm. They must include the desired
*percentile*/*percentile_rank*.
To compute multiple percentiles each value must be specified separately
(i.e.
'percentile(75.0),percentile(99.0),percentile_rank(1234.56),percentile_rank(-5)').
A second, comma-separated value can be added to the
*percentile* statistic to calculate percentile
resolution, e.g., a 50th percentile with 200 resolution would be
'percentile(50,200)'.
The weighted average statistic requires a weight column to be specified
in
*weight_column_name*. The weighted average is then
defined as the sum of the products of input parameter *column_name*
times the
*weight_column_name* values divided by the sum of the
*weight_column_name* values.
Additional columns can be used in the calculation of statistics via
*additional_column_names*. Values in these columns will
be included in the overall aggregate calculation--individual aggregates
will not
be calculated per additional column. For instance, requesting the
*count* & *mean* of
input parameter *column_name* x and *additional_column_names*
y & z, where x holds the numbers 1-10, y holds 11-20, and z holds
21-30, would
return the total number of x, y, & z values (30), and the single
average value
across all x, y, & z values (15.5).
The response includes a list of key/value pairs of each statistic
requested and
its corresponding value.
Parameters:
table_name (str)
Name of the table on which the statistics operation will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_name (str)
Name of the primary column for which the statistics are to be
calculated.
stats (str)
Comma separated list of the statistics to calculate, e.g.
"sum,mean".
Allowed values are:
* **count** --
Number of objects (independent of the given column(s)).
* **mean** --
Arithmetic mean (average), equivalent to sum/count.
* **stdv** --
Sample standard deviation (denominator is count-1).
* **variance** --
Unbiased sample variance (denominator is count-1).
* **skew** --
Skewness (third standardized moment).
* **kurtosis** --
Kurtosis (fourth standardized moment).
* **sum** --
Sum of all values in the column(s).
* **min** --
Minimum value of the column(s).
* **max** --
Maximum value of the column(s).
* **weighted_average** --
Weighted arithmetic mean (using the option
*weight_column_name* as the weighting column).
* **cardinality** --
Number of unique values in the column(s).
* **estimated_cardinality** --
Estimate (via hyperloglog technique) of the number of unique
values in the column(s).
* **percentile** --
Estimate (via t-digest) of the given percentile of the
column(s) (percentile(50.0) will be an approximation of the
median). Add a second, comma-separated value to calculate
percentile resolution, e.g., 'percentile(75,150)'
* **percentile_rank** --
Estimate (via t-digest) of the percentile rank of the given
value in the column(s) (if the given value is the median of
the column(s), percentile_rank(<median>) will return
approximately 50.0).
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **additional_column_names** --
A list of comma separated column names over which statistics
can be accumulated along with the primary column. All
columns listed and input parameter *column_name* must be of
the same type. Must not include the column specified in
input parameter *column_name* and no column can be listed
twice.
* **weight_column_name** --
Name of column used as weighting attribute for the weighted
average statistic.
Returns:
A dict with the following entries--
stats (dict of str to floats)
(statistic name, double value) pairs of the requested
statistics, including the total count by default.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "aggregate_statistics(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "aggregate_statistics(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( stats, (basestring)), "aggregate_statistics(): Argument 'stats' must be (one) of type(s) '(basestring)'; given %s" % type( stats ).__name__
assert isinstance( options, (dict)), "aggregate_statistics(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['stats'] = stats
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/statistics', obj, convert_to_attr_dict = True )
return response
# end aggregate_statistics
# begin aggregate_statistics_by_range
[docs] def aggregate_statistics_by_range( self, table_name = None, select_expression =
'', column_name = None, value_column_name
= None, stats = None, start = None, end =
None, interval = None, options = {} ):
"""Divides the given set into bins and calculates statistics of the
values of a value-column in each bin. The bins are based on the values
of a
given binning-column. The statistics that may be requested are mean,
stdv
(standard deviation), variance, skew, kurtosis, sum, min, max, first,
last and
weighted average. In addition to the requested statistics the count of
total
samples in each bin is returned. This counts vector is just the
histogram of the
column used to divide the set members into bins. The weighted average
statistic
requires a weight column to be specified in
*weight_column_name*. The weighted average is then
defined as the sum of the products of the value column times the weight
column
divided by the sum of the weight column.
There are two methods for binning the set members. In the first, which
can be
used for numeric valued binning-columns, a min, max and interval are
specified.
The number of bins, nbins, is the integer upper bound of
(max-min)/interval.
Values that fall in the range [min+n*interval,min+(n+1)*interval) are
placed in
the nth bin where n ranges from 0..nbin-2. The final bin is
[min+(nbin-1)*interval,max]. In the second method,
*bin_values* specifies a list of binning column values.
Binning-columns whose value matches the nth member of the
*bin_values* list are placed in the nth bin. When a list
is provided, the binning-column must be of type string or int.
NOTE: The Kinetica instance being accessed must be running a CUDA
(GPU-based)
build to service this request.
Parameters:
table_name (str)
Name of the table on which the ranged-statistics operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
select_expression (str)
For a non-empty expression statistics are calculated for those
records for which the expression is true. The default value is
''.
column_name (str)
Name of the binning-column used to divide the set samples into
bins.
value_column_name (str)
Name of the value-column for which statistics are to be
computed.
stats (str)
A string of comma separated list of the statistics to
calculate, e.g. 'sum,mean'. Available statistics: mean, stdv
(standard deviation), variance, skew, kurtosis, sum.
start (float)
The lower bound of the binning-column.
end (float)
The upper bound of the binning-column.
interval (float)
The interval of a bin. Set members fall into bin i if the
binning-column falls in the range [start+interval*i,
start+interval*(i+1)).
options (dict of str to str)
Map of optional parameters:. The default value is an empty
dict ( {} ).
Allowed keys are:
* **additional_column_names** --
A list of comma separated value-column names over which
statistics can be accumulated along with the primary
value_column.
* **bin_values** --
A list of comma separated binning-column values. Values that
match the nth bin_values value are placed in the nth bin.
* **weight_column_name** --
Name of the column used as weighting column for the
weighted_average statistic.
* **order_column_name** --
Name of the column used for candlestick charting techniques.
Returns:
A dict with the following entries--
stats (dict of str to lists of floats)
A map with a key for each statistic in the stats input
parameter having a value that is a vector of the corresponding
value-column bin statistics. In a addition the key count has a
value that is a histogram of the binning-column.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "aggregate_statistics_by_range(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( select_expression, (basestring)), "aggregate_statistics_by_range(): Argument 'select_expression' must be (one) of type(s) '(basestring)'; given %s" % type( select_expression ).__name__
assert isinstance( column_name, (basestring)), "aggregate_statistics_by_range(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( value_column_name, (basestring)), "aggregate_statistics_by_range(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__
assert isinstance( stats, (basestring)), "aggregate_statistics_by_range(): Argument 'stats' must be (one) of type(s) '(basestring)'; given %s" % type( stats ).__name__
assert isinstance( start, (int, long, float)), "aggregate_statistics_by_range(): Argument 'start' must be (one) of type(s) '(int, long, float)'; given %s" % type( start ).__name__
assert isinstance( end, (int, long, float)), "aggregate_statistics_by_range(): Argument 'end' must be (one) of type(s) '(int, long, float)'; given %s" % type( end ).__name__
assert isinstance( interval, (int, long, float)), "aggregate_statistics_by_range(): Argument 'interval' must be (one) of type(s) '(int, long, float)'; given %s" % type( interval ).__name__
assert isinstance( options, (dict)), "aggregate_statistics_by_range(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['select_expression'] = select_expression
obj['column_name'] = column_name
obj['value_column_name'] = value_column_name
obj['stats'] = stats
obj['start'] = start
obj['end'] = end
obj['interval'] = interval
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/statistics/byrange', obj, convert_to_attr_dict = True )
return response
# end aggregate_statistics_by_range
# begin aggregate_unique
[docs] def aggregate_unique( self, table_name = None, column_name = None, offset = 0,
limit = -9999, encoding = 'binary', options = {} ):
"""Returns all the unique values from a particular column
(specified by input parameter *column_name*) of a particular table or
view
(specified by input parameter *table_name*). If input parameter
*column_name* is a numeric column,
the values will be in output parameter *binary_encoded_response*.
Otherwise if
input parameter *column_name* is a string column, the values will be in
output parameter *json_encoded_response*. The results can be paged via
input parameter *offset*
and input parameter *limit* parameters.
Columns marked as `store-only
<../../../../concepts/types/#data-handling>`__
are unable to be used with this function.
To get the first 10 unique values sorted in descending order input
parameter *options*
would be::
{"limit":"10","sort_order":"descending"}.
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
If a *result_table* name is specified in the
input parameter *options*, the results are stored in a new table with
that name--no
results are returned in the response. Both the table name and
resulting column
name must adhere to
`standard naming conventions <../../../../concepts/tables/#table>`__;
any column expression will need to be aliased. If the source table's
`shard key <../../../../concepts/tables/#shard-keys>`__ is used as the
input parameter *column_name*, the result table will be sharded, in all
other cases it
will be replicated. Sorting will properly function only if the result
table is
replicated or if there is only one processing node and should not be
relied upon
in other cases. Not available if the value of input parameter
*column_name* is an
unrestricted-length string.
Parameters:
table_name (str)
Name of an existing table or view on which the operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_name (str)
Name of the column or an expression containing one or more
column names on which the unique function would be applied.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
provided is non-existent, it will be automatically created.
* **expression** --
Optional filter expression to apply to the table.
* **sort_order** --
String indicating how the returned values should be sorted.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
* **result_table** --
The name of the table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
present, no results are returned in the response. Not
available if input parameter *column_name* is an
unrestricted-length string.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_generate_pk** --
If *true* then set a primary key for the result table. Must
be used in combination with the *result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **view_id** --
ID of view of which the result table will be a member. The
default value is ''.
Returns:
A dict with the following entries--
table_name (str)
The same table name as was passed in the parameter list.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
binary_encoded_response (bytes)
Avro binary encoded response.
json_encoded_response (str)
Avro JSON encoded response.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
record_type (:class:`RecordType` or None)
A :class:`RecordType` object using which the user can decode
the binarydata by using :meth:`GPUdbRecord.decode_binary_data`.
If JSON encodingis used, then None.
"""
assert isinstance( table_name, (basestring)), "aggregate_unique(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "aggregate_unique(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( offset, (int, long, float)), "aggregate_unique(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "aggregate_unique(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "aggregate_unique(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "aggregate_unique(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/unique', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record type and save it in the response, if applicable
if encoding == "binary":
record_type = RecordType.from_dynamic_schema( response.response_schema_str, response.binary_encoded_response )
response["record_type"] = record_type
else:
response["record_type"] = None
return response
# end aggregate_unique
# begin aggregate_unique_and_decode
[docs] def aggregate_unique_and_decode( self, table_name = None, column_name = None,
offset = 0, limit = -9999, encoding =
'binary', options = {}, record_type = None,
force_primitive_return_types = True,
get_column_major = True ):
"""Returns all the unique values from a particular column
(specified by input parameter *column_name*) of a particular table or
view
(specified by input parameter *table_name*). If input parameter
*column_name* is a numeric column,
the values will be in output parameter *binary_encoded_response*.
Otherwise if
input parameter *column_name* is a string column, the values will be in
output parameter *json_encoded_response*. The results can be paged via
input parameter *offset*
and input parameter *limit* parameters.
Columns marked as `store-only
<../../../../concepts/types/#data-handling>`__
are unable to be used with this function.
To get the first 10 unique values sorted in descending order input
parameter *options*
would be::
{"limit":"10","sort_order":"descending"}.
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
If a *result_table* name is specified in the
input parameter *options*, the results are stored in a new table with
that name--no
results are returned in the response. Both the table name and
resulting column
name must adhere to
`standard naming conventions <../../../../concepts/tables/#table>`__;
any column expression will need to be aliased. If the source table's
`shard key <../../../../concepts/tables/#shard-keys>`__ is used as the
input parameter *column_name*, the result table will be sharded, in all
other cases it
will be replicated. Sorting will properly function only if the result
table is
replicated or if there is only one processing node and should not be
relied upon
in other cases. Not available if the value of input parameter
*column_name* is an
unrestricted-length string.
Parameters:
table_name (str)
Name of an existing table or view on which the operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_name (str)
Name of the column or an expression containing one or more
column names on which the unique function would be applied.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
provided is non-existent, it will be automatically created.
* **expression** --
Optional filter expression to apply to the table.
* **sort_order** --
String indicating how the returned values should be sorted.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
* **result_table** --
The name of the table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
present, no results are returned in the response. Not
available if input parameter *column_name* is an
unrestricted-length string.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_generate_pk** --
If *true* then set a primary key for the result table. Must
be used in combination with the *result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **view_id** --
ID of view of which the result table will be a member. The
default value is ''.
record_type (:class:`RecordType` or None)
The record type expected in the results, or None to
determinethe appropriate type automatically. If known,
providing thismay improve performance in binary mode. Not used
in JSON mode.The default value is None.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A dict with the following entries--
table_name (str)
The same table name as was passed in the parameter list.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( table_name, (basestring)), "aggregate_unique_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "aggregate_unique_and_decode(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( offset, (int, long, float)), "aggregate_unique_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "aggregate_unique_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "aggregate_unique_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "aggregate_unique_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "aggregate_unique_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
assert isinstance(force_primitive_return_types, bool), "aggregate_unique_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
assert isinstance(get_column_major, bool), "aggregate_unique_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/aggregate/unique", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/aggregate/unique', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_type = record_type if record_type else RecordType.from_dynamic_schema( response.response_schema_str, raw_response, response.binary_encoded_response )
records = record_type.decode_dynamic_records( raw_response, response.binary_encoded_response )
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
# Transpose the data to column-major, if requested by the user
if get_column_major:
records = GPUdbRecord.transpose_data_to_col_major( records )
response["records"] = records
else:
records = json.loads( response.json_encoded_response )
if get_column_major:
# Get column-major data
records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response.response_schema_str )
else:
# Get row-major data
records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response.response_schema_str )
response["records"] = records
# end if
del response["binary_encoded_response"]
del response["json_encoded_response"]
return response
# end aggregate_unique_and_decode
# begin aggregate_unpivot
[docs] def aggregate_unpivot( self, table_name = None, column_names = None,
variable_column_name = '', value_column_name = '',
pivoted_columns = None, encoding = 'binary', options
= {} ):
"""Rotate the column values into rows values.
For unpivot details and examples, see
`Unpivot <../../../../concepts/unpivot/>`__. For limitations, see
`Unpivot Limitations <../../../../concepts/unpivot/#limitations>`__.
Unpivot is used to normalize tables that are built for cross tabular
reporting
purposes. The unpivot operator rotates the column values for all the
pivoted
columns. A variable column, value column and all columns from the
source table
except the unpivot columns are projected into the result table. The
variable
column and value columns in the result table indicate the pivoted
column name
and values respectively.
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
Parameters:
table_name (str)
Name of the table on which the operation will be performed.
Must be an existing table/view, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_names (list of str)
List of column names or expressions. A wildcard '*' can be used
to include all the non-pivoted columns from the source table.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
variable_column_name (str)
Specifies the variable/parameter column name. The default
value is ''.
value_column_name (str)
Specifies the value column name. The default value is ''.
pivoted_columns (list of str)
List of one or more values typically the column names of the
input table. All the columns in the source table must have the
same data type. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
is non-existent, it will be automatically created.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
present, no results are returned in the response.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **expression** --
Filter expression to apply to the table prior to unpivot
processing.
* **order_by** --
Comma-separated list of the columns to be sorted by; e.g.
'timestamp asc, x desc'. The columns specified must be
present in input table. If any alias is given for any column
name, the alias must be used, rather than the original column
name. The default value is ''.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **limit** --
The number of records to keep. The default value is ''.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **view_id** --
view this result table is part of. The default value is ''.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the table specified in *result_table*. The columns specified
must be present in output column names. If any alias is
given for any column name, the alias must be used, rather
than the original column name.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Typically shows the result-table name if provided in the
request (Ignore otherwise).
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
binary_encoded_response (bytes)
Avro binary encoded response.
json_encoded_response (str)
Avro JSON encoded response.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
record_type (:class:`RecordType` or None)
A :class:`RecordType` object using which the user can decode
the binarydata by using :meth:`GPUdbRecord.decode_binary_data`.
If JSON encodingis used, then None.
"""
assert isinstance( table_name, (basestring)), "aggregate_unpivot(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( variable_column_name, (basestring)), "aggregate_unpivot(): Argument 'variable_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( variable_column_name ).__name__
assert isinstance( value_column_name, (basestring)), "aggregate_unpivot(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__
pivoted_columns = pivoted_columns if isinstance( pivoted_columns, list ) else ( [] if (pivoted_columns is None) else [ pivoted_columns ] )
assert isinstance( encoding, (basestring)), "aggregate_unpivot(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "aggregate_unpivot(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['variable_column_name'] = variable_column_name
obj['value_column_name'] = value_column_name
obj['pivoted_columns'] = pivoted_columns
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/aggregate/unpivot', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record type and save it in the response, if applicable
if encoding == "binary":
record_type = RecordType.from_dynamic_schema( response.response_schema_str, response.binary_encoded_response )
response["record_type"] = record_type
else:
response["record_type"] = None
return response
# end aggregate_unpivot
# begin aggregate_unpivot_and_decode
[docs] def aggregate_unpivot_and_decode( self, table_name = None, column_names = None,
variable_column_name = '',
value_column_name = '', pivoted_columns =
None, encoding = 'binary', options = {},
record_type = None,
force_primitive_return_types = True,
get_column_major = True ):
"""Rotate the column values into rows values.
For unpivot details and examples, see
`Unpivot <../../../../concepts/unpivot/>`__. For limitations, see
`Unpivot Limitations <../../../../concepts/unpivot/#limitations>`__.
Unpivot is used to normalize tables that are built for cross tabular
reporting
purposes. The unpivot operator rotates the column values for all the
pivoted
columns. A variable column, value column and all columns from the
source table
except the unpivot columns are projected into the result table. The
variable
column and value columns in the result table indicate the pivoted
column name
and values respectively.
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
Parameters:
table_name (str)
Name of the table on which the operation will be performed.
Must be an existing table/view, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_names (list of str)
List of column names or expressions. A wildcard '*' can be used
to include all the non-pivoted columns from the source table.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
variable_column_name (str)
Specifies the variable/parameter column name. The default
value is ''.
value_column_name (str)
Specifies the value column name. The default value is ''.
pivoted_columns (list of str)
List of one or more values typically the column names of the
input table. All the columns in the source table must have the
same data type. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
is non-existent, it will be automatically created.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
present, no results are returned in the response.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **expression** --
Filter expression to apply to the table prior to unpivot
processing.
* **order_by** --
Comma-separated list of the columns to be sorted by; e.g.
'timestamp asc, x desc'. The columns specified must be
present in input table. If any alias is given for any column
name, the alias must be used, rather than the original column
name. The default value is ''.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **limit** --
The number of records to keep. The default value is ''.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **view_id** --
view this result table is part of. The default value is ''.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the table specified in *result_table*. The columns specified
must be present in output column names. If any alias is
given for any column name, the alias must be used, rather
than the original column name.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
record_type (:class:`RecordType` or None)
The record type expected in the results, or None to
determinethe appropriate type automatically. If known,
providing thismay improve performance in binary mode. Not used
in JSON mode.The default value is None.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A dict with the following entries--
table_name (str)
Typically shows the result-table name if provided in the
request (Ignore otherwise).
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( table_name, (basestring)), "aggregate_unpivot_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( variable_column_name, (basestring)), "aggregate_unpivot_and_decode(): Argument 'variable_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( variable_column_name ).__name__
assert isinstance( value_column_name, (basestring)), "aggregate_unpivot_and_decode(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__
pivoted_columns = pivoted_columns if isinstance( pivoted_columns, list ) else ( [] if (pivoted_columns is None) else [ pivoted_columns ] )
assert isinstance( encoding, (basestring)), "aggregate_unpivot_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "aggregate_unpivot_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "aggregate_unpivot_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
assert isinstance(force_primitive_return_types, bool), "aggregate_unpivot_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
assert isinstance(get_column_major, bool), "aggregate_unpivot_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/aggregate/unpivot", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['variable_column_name'] = variable_column_name
obj['value_column_name'] = value_column_name
obj['pivoted_columns'] = pivoted_columns
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/aggregate/unpivot', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_type = record_type if record_type else RecordType.from_dynamic_schema( response.response_schema_str, raw_response, response.binary_encoded_response )
records = record_type.decode_dynamic_records( raw_response, response.binary_encoded_response )
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
# Transpose the data to column-major, if requested by the user
if get_column_major:
records = GPUdbRecord.transpose_data_to_col_major( records )
response["records"] = records
else:
records = json.loads( response.json_encoded_response )
if get_column_major:
# Get column-major data
records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response.response_schema_str )
else:
# Get row-major data
records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response.response_schema_str )
response["records"] = records
# end if
del response["binary_encoded_response"]
del response["json_encoded_response"]
return response
# end aggregate_unpivot_and_decode
# begin alter_credential
[docs] def alter_credential( self, credential_name = None, credential_updates_map =
None, options = None ):
"""Alter the properties of an existing `credential
<../../../../concepts/credentials/>`__.
Parameters:
credential_name (str)
Name of the credential to be altered. Must be an existing
credential.
credential_updates_map (dict of str to str)
Map containing the properties of the credential to be updated.
Error if empty.
Allowed keys are:
* **type** --
New type for the credential.
Allowed values are:
* aws_access_key
* aws_iam_role
* azure_ad
* azure_oauth
* azure_sas
* azure_storage_key
* docker
* gcs_service_account_id
* gcs_service_account_keys
* hdfs
* kafka
* **identity** --
New user for the credential
* **secret** --
New password for the credential
* **schema_name** --
Updates the schema name. If *schema_name*
doesn't exist, an error will be thrown. If *schema_name* is
empty, then the user's
default schema will be used.
options (dict of str to str)
Optional parameters.
Returns:
A dict with the following entries--
credential_name (str)
Value of input parameter *credential_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( credential_name, (basestring)), "alter_credential(): Argument 'credential_name' must be (one) of type(s) '(basestring)'; given %s" % type( credential_name ).__name__
assert isinstance( credential_updates_map, (dict)), "alter_credential(): Argument 'credential_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( credential_updates_map ).__name__
assert isinstance( options, (dict)), "alter_credential(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['credential_name'] = credential_name
obj['credential_updates_map'] = self.__sanitize_dicts( credential_updates_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/credential', obj, convert_to_attr_dict = True )
return response
# end alter_credential
# begin alter_datasink
[docs] def alter_datasink( self, name = None, datasink_updates_map = None, options =
None ):
"""Alters the properties of an existing `data sink
<../../../../concepts/data_sinks/>`__
Parameters:
name (str)
Name of the data sink to be altered. Must be an existing data
sink.
datasink_updates_map (dict of str to str)
Map containing the properties of the data sink to be updated.
Error if empty.
Allowed keys are:
* **destination** --
Destination for the output data in format
'destination_type://path[:port]'.
Supported destination types are 'http', 'https' and 'kafka'.
* **connection_timeout** --
Timeout in seconds for connecting to this sink
* **wait_timeout** --
Timeout in seconds for waiting for a response from this sink
* **credential** --
Name of the `credential
<../../../../concepts/credentials/>`__ object to be used in
this data sink
* **s3_bucket_name** --
Name of the Amazon S3 bucket to use as the data sink
* **s3_region** --
Name of the Amazon S3 region where the given bucket is
located
* **s3_aws_role_arn** --
Amazon IAM Role ARN which has required S3 permissions that
can be assumed for the given S3 IAM user
* **hdfs_kerberos_keytab** --
Kerberos keytab file location for the given HDFS user. This
may be a KIFS file.
* **hdfs_delegation_token** --
Delegation token for the given HDFS user
* **hdfs_use_kerberos** --
Use kerberos authentication for the given HDFS cluster.
Allowed values are:
* true
* false
The default value is 'false'.
* **azure_storage_account_name** --
Name of the Azure storage account to use as the data sink,
this is valid only if tenant_id is specified
* **azure_container_name** --
Name of the Azure storage container to use as the data sink
* **azure_tenant_id** --
Active Directory tenant ID (or directory ID)
* **azure_sas_token** --
Shared access signature token for Azure storage account to
use as the data sink
* **azure_oauth_token** --
Oauth token to access given storage container
* **gcs_bucket_name** --
Name of the Google Cloud Storage bucket to use as the data
sink
* **gcs_project_id** --
Name of the Google Cloud project to use as the data sink
* **gcs_service_account_keys** --
Google Cloud service account keys to use for authenticating
the data sink
* **kafka_url** --
The publicly-accessible full path URL to the kafka broker,
e.g., 'http://172.123.45.67:9300'.
* **kafka_topic_name** --
Name of the Kafka topic to use for this data sink, if it
references a Kafka broker
* **anonymous** --
Create an anonymous connection to the storage
provider--DEPRECATED: this is now the default. Specify
use_managed_credentials for non-anonymous connection.
Allowed values are:
* true
* false
The default value is 'true'.
* **use_managed_credentials** --
When no credentials are supplied, we use anonymous access by
default. If this is set, we will use cloud provider user
settings.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_https** --
Use https to connect to datasink if true, otherwise use http.
Allowed values are:
* true
* false
The default value is 'true'.
* **max_batch_size** --
Maximum number of records per notification message. The
default value is '1'.
* **max_message_size** --
Maximum size in bytes of each notification message. The
default value is '1000000'.
* **json_format** --
The desired format of JSON encoded notifications message.
If *nested*, records are returned as an array.
Otherwise, only a single record per messages is returned.
Allowed values are:
* flat
* nested
The default value is 'flat'.
* **skip_validation** --
Bypass validation of connection to this data sink.
Allowed values are:
* true
* false
The default value is 'false'.
* **schema_name** --
Updates the schema name. If *schema_name*
doesn't exist, an error will be thrown. If *schema_name* is
empty, then the user's
default schema will be used.
options (dict of str to str)
Optional parameters.
Returns:
A dict with the following entries--
updated_properties_map (dict of str to str)
Map of values updated
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "alter_datasink(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( datasink_updates_map, (dict)), "alter_datasink(): Argument 'datasink_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( datasink_updates_map ).__name__
assert isinstance( options, (dict)), "alter_datasink(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['datasink_updates_map'] = self.__sanitize_dicts( datasink_updates_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/datasink', obj, convert_to_attr_dict = True )
return response
# end alter_datasink
# begin alter_datasource
[docs] def alter_datasource( self, name = None, datasource_updates_map = None, options
= None ):
"""Alters the properties of an existing `data source
<../../../../concepts/data_sources/>`__
Parameters:
name (str)
Name of the data source to be altered. Must be an existing data
source.
datasource_updates_map (dict of str to str)
Map containing the properties of the data source to be updated.
Error if empty.
Allowed keys are:
* **location** --
Location of the remote storage in
'storage_provider_type://[storage_path[:storage_port]]'
format.
Supported storage provider types are
'azure','gcs','hdfs','kafka' and 's3'.
* **user_name** --
Name of the remote system user; may be an empty string
* **password** --
Password for the remote system user; may be an empty string
* **skip_validation** --
Bypass validation of connection to remote source.
Allowed values are:
* true
* false
The default value is 'false'.
* **connection_timeout** --
Timeout in seconds for connecting to this storage provider
* **wait_timeout** --
Timeout in seconds for reading from this storage provider
* **credential** --
Name of the `credential <../../../../concepts/credentials>`__
object to be used in data source
* **s3_bucket_name** --
Name of the Amazon S3 bucket to use as the data source
* **s3_region** --
Name of the Amazon S3 region where the given bucket is
located
* **s3_aws_role_arn** --
Amazon IAM Role ARN which has required S3 permissions that
can be assumed for the given S3 IAM user
* **s3_encryption_customer_algorithm** --
Customer encryption algorithm used encrypting data
* **s3_encryption_customer_key** --
Customer encryption key to encrypt or decrypt data
* **hdfs_kerberos_keytab** --
Kerberos keytab file location for the given HDFS user. This
may be a KIFS file.
* **hdfs_delegation_token** --
Delegation token for the given HDFS user
* **hdfs_use_kerberos** --
Use kerberos authentication for the given HDFS cluster.
Allowed values are:
* true
* false
The default value is 'false'.
* **azure_storage_account_name** --
Name of the Azure storage account to use as the data source,
this is valid only if tenant_id is specified
* **azure_container_name** --
Name of the Azure storage container to use as the data source
* **azure_tenant_id** --
Active Directory tenant ID (or directory ID)
* **azure_sas_token** --
Shared access signature token for Azure storage account to
use as the data source
* **azure_oauth_token** --
OAuth token to access given storage container
* **gcs_bucket_name** --
Name of the Google Cloud Storage bucket to use as the data
source
* **gcs_project_id** --
Name of the Google Cloud project to use as the data source
* **gcs_service_account_keys** --
Google Cloud service account keys to use for authenticating
the data source
* **kafka_url** --
The publicly-accessible full path URL to the Kafka broker,
e.g., 'http://172.123.45.67:9300'.
* **kafka_topic_name** --
Name of the Kafka topic to use as the data source
* **jdbc_driver_jar_path** --
JDBC driver jar file location. This may be a KIFS file.
* **jdbc_driver_class_name** --
Name of the JDBC driver class
* **anonymous** --
Create an anonymous connection to the storage
provider--DEPRECATED: this is now the default. Specify
use_managed_credentials for non-anonymous connection.
Allowed values are:
* true
* false
The default value is 'true'.
* **use_managed_credentials** --
When no credentials are supplied, we use anonymous access by
default. If this is set, we will use cloud provider user
settings.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_https** --
Use https to connect to datasource if true, otherwise use
http.
Allowed values are:
* true
* false
The default value is 'true'.
* **schema_name** --
Updates the schema name. If *schema_name*
doesn't exist, an error will be thrown. If *schema_name* is
empty, then the user's
default schema will be used.
options (dict of str to str)
Optional parameters.
Returns:
A dict with the following entries--
updated_properties_map (dict of str to str)
Map of values updated
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "alter_datasource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( datasource_updates_map, (dict)), "alter_datasource(): Argument 'datasource_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( datasource_updates_map ).__name__
assert isinstance( options, (dict)), "alter_datasource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['datasource_updates_map'] = self.__sanitize_dicts( datasource_updates_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/datasource', obj, convert_to_attr_dict = True )
return response
# end alter_datasource
# begin alter_directory
[docs] def alter_directory( self, directory_name = None, directory_updates_map = None,
options = {} ):
"""Alters an existing directory in `KiFS <../../../../tools/kifs/>`__.
Parameters:
directory_name (str)
Name of the directory in KiFS to be altered.
directory_updates_map (dict of str to str)
Map containing the properties of the directory to be altered.
Error if empty.
Allowed keys are:
* **data_limit** --
The maximum capacity, in bytes, to apply to the directory.
Set to -1 to indicate no upper limit.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
directory_name (str)
Value of input parameter *directory_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( directory_name, (basestring)), "alter_directory(): Argument 'directory_name' must be (one) of type(s) '(basestring)'; given %s" % type( directory_name ).__name__
assert isinstance( directory_updates_map, (dict)), "alter_directory(): Argument 'directory_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( directory_updates_map ).__name__
assert isinstance( options, (dict)), "alter_directory(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['directory_name'] = directory_name
obj['directory_updates_map'] = self.__sanitize_dicts( directory_updates_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/directory', obj, convert_to_attr_dict = True )
return response
# end alter_directory
# begin alter_environment
[docs] def alter_environment( self, environment_name = None, action = None, value =
None, options = {} ):
"""Alters an existing environment which can be referenced by a
`user-defined function <../../../../concepts/udf/>`__ (UDF).
Parameters:
environment_name (str)
Name of the environment to be altered.
action (str)
Modification operation to be applied
Allowed values are:
* **install_package** --
Install a python package from PyPI, an external data source
or KiFS
* **install_requirements** --
Install packages from a requirements file
* **uninstall_package** --
Uninstall a python package.
* **uninstall_requirements** --
Uninstall packages from a requirements file
* **reset** --
Uninstalls all packages in the environment and resets it to
the original state at time of creation
* **rebuild** --
Recreates the environment and re-installs all packages,
upgrades the packages if necessary based on dependencies
value (str)
The value of the modification, depending on input parameter
*action*. For example, if input parameter *action* is
*install_package*, this would be the python package name.
If input parameter *action* is *install_requirements*, this
would be the path of a requirements file from which to install
packages.
If an external data source is specified in *datasource_name*,
this can be the path to a wheel file or source archive.
Alternatively, if installing from a file (wheel or source
archive), the value may be a reference to a file in `KiFS
<../../../../tools/kifs/>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **datasource_name** --
Name of an existing external data source from which packages
specified in input parameter *value* can be loaded
Returns:
A dict with the following entries--
environment_name (str)
Value of input parameter *environment_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( environment_name, (basestring)), "alter_environment(): Argument 'environment_name' must be (one) of type(s) '(basestring)'; given %s" % type( environment_name ).__name__
assert isinstance( action, (basestring)), "alter_environment(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( value, (basestring)), "alter_environment(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__
assert isinstance( options, (dict)), "alter_environment(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['environment_name'] = environment_name
obj['action'] = action
obj['value'] = value
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/environment', obj, convert_to_attr_dict = True )
return response
# end alter_environment
# begin alter_graph
def alter_graph( self, graph_name = None, action = None, action_arg = None,
options = {} ):
assert isinstance( graph_name, (basestring)), "alter_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( action, (basestring)), "alter_graph(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( action_arg, (basestring)), "alter_graph(): Argument 'action_arg' must be (one) of type(s) '(basestring)'; given %s" % type( action_arg ).__name__
assert isinstance( options, (dict)), "alter_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['action'] = action
obj['action_arg'] = action_arg
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/graph', obj, convert_to_attr_dict = True )
return response
# end alter_graph
# begin alter_model
def alter_model( self, model_name = None, action = None, value = None, options =
{} ):
assert isinstance( model_name, (basestring)), "alter_model(): Argument 'model_name' must be (one) of type(s) '(basestring)'; given %s" % type( model_name ).__name__
assert isinstance( action, (basestring)), "alter_model(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( value, (basestring)), "alter_model(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__
assert isinstance( options, (dict)), "alter_model(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['model_name'] = model_name
obj['action'] = action
obj['value'] = value
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/model', obj, convert_to_attr_dict = True )
return response
# end alter_model
# begin alter_resource_group
[docs] def alter_resource_group( self, name = None, tier_attributes = {}, ranking = '',
adjoining_resource_group = '', options = {} ):
"""Alters the properties of an exisiting resource group to facilitate
resource management.
Parameters:
name (str)
Name of the group to be altered. Must be an existing resource
group name or an empty string when used inconjunction with the
is_default_group option.
tier_attributes (dict of str to dicts of str to str)
Optional map containing tier names and their respective
attribute group limits. The only valid attribute limit that
can be set is max_memory (in bytes) for the VRAM & RAM tiers.
For instance, to set max VRAM capacity to 1GB and max RAM
capacity to 10GB, use: {'VRAM':{'max_memory':'1000000000'},
'RAM':{'max_memory':'10000000000'}}. The default value is an
empty dict ( {} ).
Allowed keys are:
* **max_memory** --
Maximum amount of memory usable in the given tier at one time
for this group.
ranking (str)
If the resource group ranking is to be updated, this indicates
the relative ranking among existing resource groups where this
resource group will be moved; leave blank if not changing the
ranking. When using *before* or *after*, specify which
resource group this one will be inserted before or after in
input parameter *adjoining_resource_group*.
Allowed values are:
*
* first
* last
* before
* after
The default value is ''.
adjoining_resource_group (str)
If input parameter *ranking* is *before* or *after*, this field
indicates the resource group before or after which the current
group will be placed; otherwise, leave blank. The default
value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **max_cpu_concurrency** --
Maximum number of simultaneous threads that will be used to
execute a request for this group.
* **max_data** --
Maximum amount of cumulative ram usage regardless of tier
status for this group.
* **max_scheduling_priority** --
Maximum priority of a scheduled task for this group.
* **max_tier_priority** --
Maximum priority of a tiered object for this group.
* **is_default_group** --
If *true*, this request applies to the global default
resource group. It is an error for this field to be *true*
when the input parameter *name* field is also populated.
Allowed values are:
* true
* false
The default value is 'false'.
* **persist** --
If *true* and a system-level change was requested, the system
configuration will be written to disk upon successful
application of this request. This will commit the changes
from this request and any additional in-memory modifications.
Allowed values are:
* true
* false
The default value is 'true'.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "alter_resource_group(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( tier_attributes, (dict)), "alter_resource_group(): Argument 'tier_attributes' must be (one) of type(s) '(dict)'; given %s" % type( tier_attributes ).__name__
assert isinstance( ranking, (basestring)), "alter_resource_group(): Argument 'ranking' must be (one) of type(s) '(basestring)'; given %s" % type( ranking ).__name__
assert isinstance( adjoining_resource_group, (basestring)), "alter_resource_group(): Argument 'adjoining_resource_group' must be (one) of type(s) '(basestring)'; given %s" % type( adjoining_resource_group ).__name__
assert isinstance( options, (dict)), "alter_resource_group(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['tier_attributes'] = self.__sanitize_dicts( tier_attributes )
obj['ranking'] = ranking
obj['adjoining_resource_group'] = adjoining_resource_group
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/resourcegroup', obj, convert_to_attr_dict = True )
return response
# end alter_resource_group
# begin alter_role
[docs] def alter_role( self, name = None, action = None, value = None, options = {} ):
"""Alters a Role.
Parameters:
name (str)
Name of the role to be altered. Must be an existing role.
action (str)
Modification operation to be applied to the role.
Allowed values are:
* **set_resource_group** --
Sets the resource group for an internal role. The resource
group must exist, otherwise, an empty string assigns the role
to the default resource group.
value (str)
The value of the modification, depending on input parameter
*action*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "alter_role(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( action, (basestring)), "alter_role(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( value, (basestring)), "alter_role(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__
assert isinstance( options, (dict)), "alter_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['action'] = action
obj['value'] = value
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/role', obj, convert_to_attr_dict = True )
return response
# end alter_role
# begin alter_schema
[docs] def alter_schema( self, schema_name = None, action = None, value = None, options
= {} ):
"""Used to change the name of a SQL-style `schema
<../../../../concepts/schemas/>`__, specified in input parameter
*schema_name*.
Parameters:
schema_name (str)
Name of the schema to be altered.
action (str)
Modification operation to be applied
Allowed values are:
* **rename_schema** --
Renames a schema to input parameter *value*. Has the same
naming restrictions as `tables
<../../../../concepts/tables/>`__.
value (str)
The value of the modification, depending on input parameter
*action*. For now the only value of input parameter *action*
is *rename_schema*. In this case the value is the new name of
the schema.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
schema_name (str)
Value of input parameter *schema_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( schema_name, (basestring)), "alter_schema(): Argument 'schema_name' must be (one) of type(s) '(basestring)'; given %s" % type( schema_name ).__name__
assert isinstance( action, (basestring)), "alter_schema(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( value, (basestring)), "alter_schema(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__
assert isinstance( options, (dict)), "alter_schema(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['schema_name'] = schema_name
obj['action'] = action
obj['value'] = value
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/schema', obj, convert_to_attr_dict = True )
return response
# end alter_schema
# begin alter_system_properties
[docs] def alter_system_properties( self, property_updates_map = None, options = {} ):
"""The :meth:`GPUdb.alter_system_properties` endpoint is primarily used to
simplify the testing of the system and is not expected to be used
during normal execution. Commands are given through the input
parameter *property_updates_map* whose keys are commands and values are
strings representing integer values (for example '8000') or boolean
values ('true' or 'false').
Parameters:
property_updates_map (dict of str to str)
Map containing the properties of the system to be updated.
Error if empty.
Allowed keys are:
* **sm_omp_threads** --
Set the number of OpenMP threads that will be used to service
filter & aggregation requests to the specified integer value.
* **kernel_omp_threads** --
Set the number of kernel OpenMP threads to the specified
integer value.
* **concurrent_kernel_execution** --
Enables concurrent kernel execution if the value is *true*
and disables it if the value is *false*.
Allowed values are:
* true
* false
* **subtask_concurrency_limit** --
Sets the maximum number of simultaneous threads allocated to
a given request, on each rank. Note that thread allocation
may also be limted by resource group limits and/or system
load.
* **chunk_size** --
Sets the number of records per chunk to be used for all new
tables.
* **evict_columns** --
Attempts to evict columns from memory to the persistent
store. Value string is a semicolon separated list of
entries, each entry being a table name optionally followed by
a comma and a comma separated list of column names to attempt
to evict. An empty value string will attempt to evict all
tables and columns.
* **execution_mode** --
Sets the execution_mode for kernel executions to the
specified string value. Possible values are host, device,
default (engine decides) or an integer value that indicates
max chunk size to exec on host
* **external_files_directory** --
Sets the root directory path where external table data files
are accessed from. Path must exist on the head node
* **flush_to_disk** --
Flushes any changes to any tables to the persistent store.
These changes include updates to the vector store, object
store, and text search store, Value string is ignored
* **clear_cache** --
Clears cached results. Useful to allow repeated timing of
endpoints. Value string is the name of the table for which
to clear the cached results, or an empty string to clear the
cached results for all tables.
* **communicator_test** --
Invoke the communicator test and report timing results. Value
string is a semicolon separated list of [key]=[value]
expressions. Expressions are: num_transactions=[num] where
num is the number of request reply transactions to invoke per
test; message_size=[bytes] where bytes is the size in bytes
of the messages to send; check_values=[enabled] where if
enabled is true the value of the messages received are
verified.
* **network_speed** --
Invoke the network speed test and report timing results.
Value string is a semicolon-separated list of [key]=[value]
expressions. Valid expressions are: seconds=[time] where
time is the time in seconds to run the test;
data_size=[bytes] where bytes is the size in bytes of the
block to be transferred; threads=[number of threads];
to_ranks=[space-separated list of ranks] where the list of
ranks is the ranks that rank 0 will send data to and get data
from. If to_ranks is unspecified then all worker ranks are
used.
* **request_timeout** --
Number of minutes after which filtering (e.g.,
:meth:`GPUdb.filter`) and aggregating (e.g.,
:meth:`GPUdb.aggregate_group_by`) queries will timeout. The
default value is '20'.
* **max_get_records_size** --
The maximum number of records the database will serve for a
given data retrieval call. The default value is '20000'.
* **max_grbc_batch_size** --
<DEVELOPER>
* **enable_audit** --
Enable or disable auditing.
* **audit_headers** --
Enable or disable auditing of request headers.
* **audit_body** --
Enable or disable auditing of request bodies.
* **audit_data** --
Enable or disable auditing of request data.
* **audit_response** --
Enable or disable auditing of response information.
* **shadow_agg_size** --
Size of the shadow aggregate chunk cache in bytes. The
default value is '10000000'.
* **shadow_filter_size** --
Size of the shadow filter chunk cache in bytes. The default
value is '10000000'.
* **synchronous_compression** --
compress vector on set_compression (instead of waiting for
background thread). The default value is 'false'.
* **enable_overlapped_equi_join** --
Enable overlapped-equi-join filter. The default value is
'true'.
* **kafka_batch_size** --
Maximum number of records to be ingested in a single batch.
The default value is '1000'.
* **kafka_poll_timeout** --
Maximum time (milliseconds) for each poll to get records from
kafka. The default value is '0'.
* **kafka_wait_time** --
Maximum time (seconds) to buffer records received from kafka
before ingestion. The default value is '30'.
* **egress_parquet_compression** --
Parquet file compression type.
Allowed values are:
* uncompressed
* snappy
* gzip
The default value is 'snappy'.
* **egress_single_file_max_size** --
Max file size (in MB) to allow saving to a single file. May
be overridden by target limitations. The default value is
'10000'.
* **max_concurrent_kernels** --
Sets the max_concurrent_kernels value of the conf.
* **tcs_per_tom** --
Sets the tcs_per_tom value of the conf.
* **tps_per_tom** --
Sets the tps_per_tom value of the conf.
* **ai_api_provider** --
AI API provider type
* **ai_api_url** --
AI API URL
* **ai_api_key** --
AI API key
* **ai_api_connection_timeout** --
AI API connection timeout in seconds
* **postgres_proxy_idle_connection_timeout** --
Idle connection timeout in seconds
* **postgres_proxy_keep_alive** --
Enable postgres proxy keep alive. The default value is
'false'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **evict_to_cold** --
If *true* and evict_columns is specified, the given objects
will be evicted to cold storage (if such a tier exists).
Allowed values are:
* true
* false
* **persist** --
If *true* the system configuration will be written to disk
upon successful application of this request. This will commit
the changes from this request and any additional in-memory
modifications.
Allowed values are:
* true
* false
The default value is 'true'.
Returns:
A dict with the following entries--
updated_properties_map (dict of str to str)
Map of values updated; for speed tests, a map of values
measured to the measurement
info (dict of str to str)
Additional information.
"""
assert isinstance( property_updates_map, (dict)), "alter_system_properties(): Argument 'property_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( property_updates_map ).__name__
assert isinstance( options, (dict)), "alter_system_properties(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['property_updates_map'] = self.__sanitize_dicts( property_updates_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/system/properties', obj, convert_to_attr_dict = True )
return response
# end alter_system_properties
# begin alter_table
[docs] def alter_table( self, table_name = None, action = None, value = None, options =
{} ):
"""Apply various modifications to a table or view. The
available modifications include the following:
Manage a table's columns--a column can be added, removed, or have its
`type and properties <../../../../concepts/types/>`__ modified,
including whether it is
`dictionary encoded <../../../../concepts/dictionary_encoding/>`__ or
not.
External tables cannot be modified except for their refresh method.
Create or delete a `column
<../../../../concepts/indexes/#column-index>`__,
`chunk skip <../../../../concepts/indexes/#chunk-skip-index>`__, or
`geospatial <../../../../concepts/indexes/#geospatial-index>`__ index.
This can speed up
certain operations when using expressions containing equality or
relational
operators on indexed columns. This only applies to tables.
Create or delete a `foreign key
<../../../../concepts/tables/#foreign-key>`__
on a particular column.
Manage a
`range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or a
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table's partitions.
Set (or reset) the `tier strategy
<../../../../rm/concepts/#tier-strategies>`__
of a table or view.
Refresh and manage the refresh mode of a
`materialized view <../../../../concepts/materialized_views/>`__ or an
`external table <../../../../concepts/external_tables/>`__.
Set the `time-to-live (TTL) <../../../../concepts/ttl/>`__. This can be
applied
to tables or views.
Set the global access mode (i.e. locking) for a table. This setting
trumps any
role-based access controls that may be in place; e.g., a user with
write access
to a table marked read-only will not be able to insert records into it.
The mode
can be set to read-only, write-only, read/write, and no access.
Parameters:
table_name (str)
Table on which the operation will be performed, in
[schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
Must be an existing table or view.
action (str)
Modification operation to be applied
Allowed values are:
* **allow_homogeneous_tables** --
No longer supported; action will be ignored.
* **create_index** --
Creates a `column (attribute) index
<../../../../concepts/indexes/#column-index>`__,
`chunk skip index
<../../../../concepts/indexes/#chunk-skip-index>`__, or
`geospatial index
<../../../../concepts/indexes/#geospatial-index>`__
(depending on the specified *index_type*), on the column name
specified in input parameter *value*.
If this column already has the specified index, an error will
be returned.
* **delete_index** --
Deletes a `column (attribute) index
<../../../../concepts/indexes/#column-index>`__,
`chunk skip index
<../../../../concepts/indexes/#chunk-skip-index>`__, or
`geospatial index
<../../../../concepts/indexes/#geospatial-index>`__
(depending on the specified *index_type*), on the column name
specified in input parameter *value*.
If this column does not have the specified index, an error
will be returned.
* **move_to_collection** --
[DEPRECATED--please use *move_to_schema* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Moves a table or view into a schema named
input parameter *value*. If the schema provided is
non-existent, it will be automatically created.
* **move_to_schema** --
Moves a table or view into a schema named input parameter
*value*.
If the schema provided is nonexistent, an error will be
thrown.
If input parameter *value* is empty, then the table or view
will be placed in the user's default schema.
* **protected** --
No longer used. Previously set whether the given input
parameter *table_name* should be protected or not. The input
parameter *value* would have been either 'true' or 'false'.
* **rename_table** --
Renames a table or view within its current schema to input
parameter *value*. Has the same naming restrictions as
`tables <../../../../concepts/tables/>`__.
* **ttl** --
Sets the `time-to-live <../../../../concepts/ttl/>`__ in
minutes of the table or view specified in input parameter
*table_name*.
* **add_column** --
Adds the column specified in input parameter *value* to the
table specified in input parameter *table_name*.
Use *column_type* and *column_properties* in input parameter
*options*
to set the column's type and properties, respectively.
* **change_column** --
Changes type and properties of the column specified in input
parameter *value*.
Use *column_type* and *column_properties* in input parameter
*options* to set
the column's type and properties, respectively. Note that
primary key and/or shard key columns cannot be changed.
All unchanging column properties must be listed for the
change to take place, e.g., to add dictionary encoding to
an existing 'char4' column, both 'char4' and 'dict' must be
specified in the input parameter *options* map.
* **set_column_compression** --
No longer supported; action will be ignored.
* **delete_column** --
Deletes the column specified in input parameter *value* from
the table specified in input parameter *table_name*.
* **create_foreign_key** --
Creates a `foreign key
<../../../../concepts/tables/#foreign-key>`__ specified in
input parameter *value* using the format '(source_column_name
[, ...]) references target_table_name(primary_key_column_name
[, ...]) [as foreign_key_name]'.
* **delete_foreign_key** --
Deletes a `foreign key
<../../../../concepts/tables/#foreign-key>`__. The input
parameter *value* should be the foreign_key_name specified
when creating the key or the complete string used to define
it.
* **add_partition** --
Adds the partition specified in input parameter *value*, to
either a `range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table.
* **remove_partition** --
Removes the partition specified in input parameter *value*
(and relocates all of its data to the default partition) from
either a `range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table.
* **delete_partition** --
Deletes the partition specified in input parameter *value*
(and all of its data) from either a `range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table.
* **set_global_access_mode** --
Sets the global access mode (i.e. locking) for the table
specified in input parameter *table_name*. Specify the access
mode in input parameter *value*. Valid modes are 'no_access',
'read_only', 'write_only' and 'read_write'.
* **refresh** --
For a `materialized view
<../../../../concepts/materialized_views/>`__, replays all
the table creation commands required to create the view. For
an `external table
<../../../../concepts/external_tables/>`__, reloads all data
in the table from its associated source files or `data source
<../../../../concepts/data_sources/>`__.
* **set_refresh_method** --
For a `materialized view
<../../../../concepts/materialized_views/>`__, sets the
method by which the view is refreshed to the method specified
in input parameter *value* - one of 'manual', 'periodic', or
'on_change'. For an `external table
<../../../../concepts/external_tables/>`__, sets the method
by which the table is refreshed to the method specified in
input parameter *value* - either 'manual' or 'on_start'.
* **set_refresh_start_time** --
Sets the time to start periodic refreshes of this
`materialized view
<../../../../concepts/materialized_views/>`__ to the datetime
string specified in input parameter *value* with format
'YYYY-MM-DD HH:MM:SS'. Subsequent refreshes occur at the
specified time + N * the refresh period.
* **set_refresh_stop_time** --
Sets the time to stop periodic refreshes of this
`materialized view
<../../../../concepts/materialized_views/>`__ to the datetime
string specified in input parameter *value* with format
'YYYY-MM-DD HH:MM:SS'.
* **set_refresh_period** --
Sets the time interval in seconds at which to refresh this
`materialized view
<../../../../concepts/materialized_views/>`__ to the value
specified in input parameter *value*. Also, sets the refresh
method to periodic if not already set.
* **set_refresh_span** --
Sets the future time-offset(in seconds) for the view refresh
to stop.
* **set_refresh_execute_as** --
Sets the user name to refresh this `materialized view
<../../../../concepts/materialized_views/>`__ to the value
specified in input parameter *value*.
* **remove_text_search_attributes** --
Removes `text search
<../../../../concepts/full_text_search/>`__ attribute from
all columns.
* **remove_shard_keys** --
Removes the shard key property from all columns, so that the
table will be considered randomly sharded. The data is not
moved. The input parameter *value* is ignored.
* **set_strategy_definition** --
Sets the `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns to the one specified in input parameter
*value*, replacing the existing tier strategy in its
entirety.
* **cancel_datasource_subscription** --
Permanently unsubscribe a data source that is loading
continuously as a stream. The data source can be Kafka / S3 /
Azure.
* **pause_datasource_subscription** --
Temporarily unsubscribe a data source that is loading
continuously as a stream. The data source can be Kafka / S3 /
Azure.
* **resume_datasource_subscription** --
Resubscribe to a paused data source subscription. The data
source can be Kafka / S3 / Azure.
* **change_owner** --
Change the owner resource group of the table.
value (str)
The value of the modification, depending on input parameter
*action*.
For example, if input parameter *action* is *add_column*, this
would be the column name;
while the column's definition would be covered by the
*column_type*,
*column_properties*, *column_default_value*,
and *add_column_expression* in input parameter *options*.
If input parameter *action* is *ttl*, it would be the number of
minutes for the new TTL.
If input parameter *action* is *refresh*, this field would be
blank.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **column_default_value** --
When adding a column, set a default value for existing
records. For nullable columns, the default value will be
null, regardless of data type.
* **column_properties** --
When adding or changing a column, set the column properties
(strings, separated by a comma: data, store_only,
text_search, char8, int8 etc).
* **column_type** --
When adding or changing a column, set the column type
(strings, separated by a comma: int, double, string, null
etc).
* **compression_type** --
No longer supported; option will be ignored.
Allowed values are:
* none
* snappy
* lz4
* lz4hc
The default value is 'snappy'.
* **copy_values_from_column** --
[DEPRECATED--please use *add_column_expression* instead.]
* **rename_column** --
When changing a column, specify new column name.
* **validate_change_column** --
When changing a column, validate the change before applying
it (or not).
Allowed values are:
* **true** --
Validate all values. A value too large (or too long) for
the new type will prevent any change.
* **false** --
When a value is too large or long, it will be truncated.
The default value is 'true'.
* **update_last_access_time** --
Indicates whether the `time-to-live
<../../../../concepts/ttl/>`__ (TTL) expiration countdown
timer should be reset to the table's TTL.
Allowed values are:
* **true** --
Reset the expiration countdown timer to the table's
configured TTL.
* **false** --
Don't reset the timer; expiration countdown will continue
from where it is, as if the table had not been accessed.
The default value is 'true'.
* **add_column_expression** --
When adding a column, an optional expression to use for the
new column's values. Any valid expression may be used,
including one containing references to existing columns in
the same table.
* **strategy_definition** --
Optional parameter for specifying the `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns when input parameter *action* is
*set_strategy_definition*, replacing the existing tier
strategy in its entirety.
* **index_type** --
Type of index to create, when input parameter *action* is
*create_index*,
or to delete, when input parameter *action* is
*delete_index*.
Allowed values are:
* **column** --
Create or delete a `column (attribute) index
<../../../../concepts/indexes/#column-index>`__.
* **chunk_skip** --
Create or delete a `chunk skip index
<../../../../concepts/indexes/#chunk-skip-index>`__.
* **geospatial** --
Create or delete a geospatial index
The default value is 'column'.
Returns:
A dict with the following entries--
table_name (str)
Table on which the operation was performed.
action (str)
Modification operation that was performed.
value (str)
The value of the modification that was performed.
type_id (str)
return the type_id (when changing a table, a new type may be
created)
type_definition (str)
return the type_definition (when changing a table, a new type
may be created)
properties (dict of str to lists of str)
return the type properties (when changing a table, a new type
may be created)
label (str)
return the type label (when changing a table, a new type may
be created)
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "alter_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( action, (basestring)), "alter_table(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( value, (basestring)), "alter_table(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__
assert isinstance( options, (dict)), "alter_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['action'] = action
obj['value'] = value
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/table', obj, convert_to_attr_dict = True )
return response
# end alter_table
# begin alter_table_columns
[docs] def alter_table_columns( self, table_name = None, column_alterations = None,
options = None ):
"""Apply various modifications to columns in a table, view. The available
modifications include the following:
Create or delete an `index
<../../../../concepts/indexes/#column-index>`__ on a
particular column. This can speed up certain operations when using
expressions
containing equality or relational operators on indexed columns. This
only
applies to tables.
Manage a table's columns--a column can be added, removed, or have its
`type and properties <../../../../concepts/types/>`__ modified,
including whether it is
`dictionary encoded <../../../../concepts/dictionary_encoding/>`__ or
not.
Parameters:
table_name (str)
Table on which the operation will be performed. Must be an
existing table or view, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
column_alterations (list of dicts of str to str)
List of alter table add/delete/change column requests - all for
the same table. Each request is a map that includes
'column_name', 'action' and the options specific for the
action. Note that the same options as in alter table requests
but in the same map as the column name and the action. For
example:
[{'column_name':'col_1','action':'change_column','rename_column':'col_2'},{'column_name':'col_1','action':'add_column',
'type':'int','default_value':'1'}] The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters.
Returns:
A dict with the following entries--
table_name (str)
Table on which the operation was performed.
type_id (str)
return the type_id (when changing a table, a new type may be
created)
type_definition (str)
return the type_definition (when changing a table, a new type
may be created)
properties (dict of str to lists of str)
return the type properties (when changing a table, a new type
may be created)
label (str)
return the type label (when changing a table, a new type may
be created)
column_alterations (list of dicts of str to str)
List of alter table add/delete/change column requests - all for
the same table. Each request is a map that includes
'column_name', 'action' and the options specific for the
action. Note that the same options as in alter table requests
but in the same map as the column name and the action. For
example:
[{'column_name':'col_1','action':'change_column','rename_column':'col_2'},{'column_name':'col_1','action':'add_column',
'type':'int','default_value':'1'}]
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "alter_table_columns(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_alterations = column_alterations if isinstance( column_alterations, list ) else ( [] if (column_alterations is None) else [ column_alterations ] )
assert isinstance( options, (dict)), "alter_table_columns(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_alterations'] = column_alterations
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/table/columns', obj, convert_to_attr_dict = True )
return response
# end alter_table_columns
# begin alter_table_metadata
# end alter_table_metadata
# begin alter_table_monitor
[docs] def alter_table_monitor( self, topic_id = None, monitor_updates_map = None,
options = None ):
"""Alters a table monitor previously created with
:meth:`GPUdb.create_table_monitor`.
Parameters:
topic_id (str)
The topic ID returned by :meth:`GPUdb.create_table_monitor`.
monitor_updates_map (dict of str to str)
Map containing the properties of the table monitor to be
updated. Error if empty.
Allowed keys are:
* **schema_name** --
Updates the schema name. If *schema_name*
doesn't exist, an error will be thrown. If *schema_name* is
empty, then the user's
default schema will be used.
options (dict of str to str)
Optional parameters.
Returns:
A dict with the following entries--
topic_id (str)
Value of input parameter *topic_id*.
info (dict of str to str)
Additional information.
"""
assert isinstance( topic_id, (basestring)), "alter_table_monitor(): Argument 'topic_id' must be (one) of type(s) '(basestring)'; given %s" % type( topic_id ).__name__
assert isinstance( monitor_updates_map, (dict)), "alter_table_monitor(): Argument 'monitor_updates_map' must be (one) of type(s) '(dict)'; given %s" % type( monitor_updates_map ).__name__
assert isinstance( options, (dict)), "alter_table_monitor(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['topic_id'] = topic_id
obj['monitor_updates_map'] = self.__sanitize_dicts( monitor_updates_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/tablemonitor', obj, convert_to_attr_dict = True )
return response
# end alter_table_monitor
# begin alter_tier
[docs] def alter_tier( self, name = None, options = {} ):
"""Alters properties of an exisiting
`tier <../../../../rm/concepts/#storage-tiers>`__ to facilitate
`resource management <../../../../rm/concepts/>`__.
To disable
`watermark-based eviction
<../../../../rm/concepts/#watermark-based-eviction>`__,
set both *high_watermark* and
*low_watermark* to 100.
Parameters:
name (str)
Name of the tier to be altered. Must be an existing tier group
name.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **capacity** --
Maximum size in bytes this tier may hold at once.
* **high_watermark** --
Threshold of usage of this tier's resource that once
exceeded, will trigger watermark-based eviction from this
tier.
* **low_watermark** --
Threshold of resource usage that once fallen below after
crossing the *high_watermark*, will cease watermark-based
eviction from this tier.
* **wait_timeout** --
Timeout in seconds for reading from or writing to this
resource. Applies to cold storage tiers only.
* **persist** --
If *true* the system configuration will be written to disk
upon successful application of this request. This will commit
the changes from this request and any additional in-memory
modifications.
Allowed values are:
* true
* false
The default value is 'true'.
* **rank** --
Apply the requested change only to a specific rank.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "alter_tier(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "alter_tier(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/tier', obj, convert_to_attr_dict = True )
return response
# end alter_tier
# begin alter_user
[docs] def alter_user( self, name = None, action = None, value = None, options = {} ):
"""Alters a user.
Parameters:
name (str)
Name of the user to be altered. Must be an existing user.
action (str)
Modification operation to be applied to the user.
Allowed values are:
* **set_password** --
Sets the password of the user. The user must be an internal
user.
* **set_resource_group** --
Sets the resource group for an internal user. The resource
group must exist, otherwise, an empty string assigns the user
to the default resource group.
* **set_default_schema** --
Set the default_schema for an internal user. An empty string
means the user will have no default schema.
value (str)
The value of the modification, depending on input parameter
*action*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "alter_user(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( action, (basestring)), "alter_user(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( value, (basestring)), "alter_user(): Argument 'value' must be (one) of type(s) '(basestring)'; given %s" % type( value ).__name__
assert isinstance( options, (dict)), "alter_user(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['action'] = action
obj['value'] = value
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/user', obj, convert_to_attr_dict = True )
return response
# end alter_user
# begin alter_video
[docs] def alter_video( self, path = None, options = {} ):
"""Alters a video.
Parameters:
path (str)
Fully-qualified `KiFS <../../../../tools/kifs/>`__ path to the
video to be altered.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the video.
Returns:
A dict with the following entries--
path (str)
Fully qualified KIFS path to the video file.
info (dict of str to str)
Additional information.
"""
assert isinstance( path, (basestring)), "alter_video(): Argument 'path' must be (one) of type(s) '(basestring)'; given %s" % type( path ).__name__
assert isinstance( options, (dict)), "alter_video(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['path'] = path
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/alter/video', obj, convert_to_attr_dict = True )
return response
# end alter_video
# begin append_records
[docs] def append_records( self, table_name = None, source_table_name = None, field_map
= None, options = {} ):
"""Append (or insert) all records from a source table
(specified by input parameter *source_table_name*) to a particular
target table
(specified by input parameter *table_name*). The field map
(specified by input parameter *field_map*) holds the user specified map
of target table
column names with their mapped source column names.
Parameters:
table_name (str)
The table name for the records to be appended, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
source_table_name (str)
The source table name to get records from, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table name.
field_map (dict of str to str)
Contains the mapping of column names from the target table
(specified by input parameter *table_name*) as the keys, and
corresponding column names or expressions (e.g., 'col_name+1')
from the source table (specified by input parameter
*source_table_name*). Must be existing column names in source
table and target table, and their types must be matched. For
details on using expressions, see `Expressions
<../../../../concepts/expressions/>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **offset** --
A positive integer indicating the number of initial results
to skip from input parameter *source_table_name*. Default is
0. The minimum allowed value is 0. The maximum allowed value
is MAX_INT. The default value is '0'.
* **limit** --
A positive integer indicating the maximum number of results
to be returned from input parameter *source_table_name*. Or
END_OF_SET (-9999) to indicate that the max number of results
should be returned. The default value is '-9999'.
* **expression** --
Optional filter expression to apply to the input parameter
*source_table_name*. The default value is ''.
* **order_by** --
Comma-separated list of the columns to be sorted by from
source table (specified by input parameter
*source_table_name*), e.g., 'timestamp asc, x desc'. The
*order_by* columns do not have to be present in input
parameter *field_map*. The default value is ''.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting source
table
records (specified by input parameter *source_table_name*)
into a target table
(specified by input parameter *table_name*) with a `primary
key <../../../../concepts/tables/#primary-keys>`__. If
set to *true*, any existing table record with
primary key values that match those of a source table record
being inserted will be replaced by that
new record (the new data will be "upserted"). If set to
*false*, any existing table record with primary
key values that match those of a source table record being
inserted will remain unchanged, while the
source record will be rejected and an error handled as
determined by
*ignore_existing_pk*. If the specified table does not have a
primary key,
then this option has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting source table records (specified by input parameter
*source_table_name*) into a target table
(specified by input parameter *table_name*) with a `primary
key <../../../../concepts/tables/#primary-keys>`__, only
used when not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any source table record being inserted that
is rejected for having primary key values that match those of
an existing target table record will
be ignored with no error generated. If *false*,
the rejection of any source table record for having primary
key values matching an existing target
table record will result in an error being raised. If the
specified table does not have a primary
key or if upsert mode is in effect (*update_on_existing_pk*
is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore source table records whose primary key values
collide with those of target table records
* **false** --
Raise an error for any source table record whose primary
key values collide with those of a target table record
The default value is 'false'.
* **truncate_strings** --
If set to *true*, it allows inserting longer strings into
smaller charN string columns by truncating the longer strings
to fit.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
"""
assert isinstance( table_name, (basestring)), "append_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( source_table_name, (basestring)), "append_records(): Argument 'source_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( source_table_name ).__name__
assert isinstance( field_map, (dict)), "append_records(): Argument 'field_map' must be (one) of type(s) '(dict)'; given %s" % type( field_map ).__name__
assert isinstance( options, (dict)), "append_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['source_table_name'] = source_table_name
obj['field_map'] = self.__sanitize_dicts( field_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/append/records', obj, convert_to_attr_dict = True )
return response
# end append_records
# begin clear_statistics
[docs] def clear_statistics( self, table_name = '', column_name = '', options = {} ):
"""Clears statistics (cardinality, mean value, etc.) for a column in a
specified table.
Parameters:
table_name (str)
Name of a table, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table. The default value is ''.
column_name (str)
Name of the column in input parameter *table_name* for which to
clear statistics. The column must be from an existing table. An
empty string clears statistics for all columns in the table.
The default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
column_name (str)
Value of input parameter *column_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "clear_statistics(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( column_name, (basestring)), "clear_statistics(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( options, (dict)), "clear_statistics(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_name'] = column_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/clear/statistics', obj, convert_to_attr_dict = True )
return response
# end clear_statistics
# begin clear_table
[docs] def clear_table( self, table_name = '', authorization = '', options = {} ):
"""Clears (drops) one or all tables in the database cluster. The
operation is synchronous meaning that the table will be cleared before
the
function returns. The response payload returns the status of the
operation along
with the name of the table that was cleared.
Parameters:
table_name (str)
Name of the table to be cleared, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table. Empty string clears all available tables,
though this behavior is be prevented by default via gpudb.conf
parameter 'disable_clear_all'. The default value is ''.
authorization (str)
No longer used. User can pass an empty string. The default
value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true* and if the table specified in input parameter
*table_name* does not exist no error is returned. If *false*
and if the table specified in input parameter *table_name*
does not exist then an error is returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name* for a given table, or
'ALL CLEARED' in case of clearing all tables.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "clear_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( authorization, (basestring)), "clear_table(): Argument 'authorization' must be (one) of type(s) '(basestring)'; given %s" % type( authorization ).__name__
assert isinstance( options, (dict)), "clear_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['authorization'] = authorization
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/clear/table', obj, convert_to_attr_dict = True )
return response
# end clear_table
# begin clear_table_monitor
[docs] def clear_table_monitor( self, topic_id = None, options = {} ):
"""Deactivates a table monitor previously created with
:meth:`GPUdb.create_table_monitor`.
Parameters:
topic_id (str)
The topic ID returned by :meth:`GPUdb.create_table_monitor`.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **keep_autogenerated_sink** --
If *true*, the auto-generated `datasink
<../../../../concepts/data_sinks/>`__ associated with this
monitor, if there is one, will be retained for further use.
If *false*, then the auto-generated sink will be dropped if
there are no other monitors referencing it.
Allowed values are:
* true
* false
The default value is 'false'.
* **clear_all_references** --
If *true*, all references that share the same input parameter
*topic_id* will be cleared.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
topic_id (str)
Value of input parameter *topic_id*.
info (dict of str to str)
Additional information.
"""
assert isinstance( topic_id, (basestring)), "clear_table_monitor(): Argument 'topic_id' must be (one) of type(s) '(basestring)'; given %s" % type( topic_id ).__name__
assert isinstance( options, (dict)), "clear_table_monitor(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['topic_id'] = topic_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/clear/tablemonitor', obj, convert_to_attr_dict = True )
return response
# end clear_table_monitor
# begin clear_trigger
[docs] def clear_trigger( self, trigger_id = None, options = {} ):
"""Clears or cancels the trigger identified by the specified handle. The
output returns the handle of the trigger cleared as well as indicating
success or failure of the trigger deactivation.
Parameters:
trigger_id (str)
ID for the trigger to be deactivated.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
trigger_id (str)
Value of input parameter *trigger_id*.
info (dict of str to str)
Additional information.
"""
assert isinstance( trigger_id, (basestring)), "clear_trigger(): Argument 'trigger_id' must be (one) of type(s) '(basestring)'; given %s" % type( trigger_id ).__name__
assert isinstance( options, (dict)), "clear_trigger(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['trigger_id'] = trigger_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/clear/trigger', obj, convert_to_attr_dict = True )
return response
# end clear_trigger
# begin collect_statistics
[docs] def collect_statistics( self, table_name = None, column_names = None, options =
{} ):
"""Collect statistics for a column(s) in a specified table.
Parameters:
table_name (str)
Name of a table, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
column_names (list of str)
List of one or more column names in input parameter
*table_name* for which to collect statistics (cardinality, mean
value, etc.). The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
column_names (list of str)
Value of input parameter *column_names*.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "collect_statistics(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( options, (dict)), "collect_statistics(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/collect/statistics', obj, convert_to_attr_dict = True )
return response
# end collect_statistics
# begin create_container_registry
def create_container_registry( self, registry_name = None, uri = None,
credential = None, options = {} ):
assert isinstance( registry_name, (basestring)), "create_container_registry(): Argument 'registry_name' must be (one) of type(s) '(basestring)'; given %s" % type( registry_name ).__name__
assert isinstance( uri, (basestring)), "create_container_registry(): Argument 'uri' must be (one) of type(s) '(basestring)'; given %s" % type( uri ).__name__
assert isinstance( credential, (basestring)), "create_container_registry(): Argument 'credential' must be (one) of type(s) '(basestring)'; given %s" % type( credential ).__name__
assert isinstance( options, (dict)), "create_container_registry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['registry_name'] = registry_name
obj['uri'] = uri
obj['credential'] = credential
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/container/registry', obj, convert_to_attr_dict = True )
return response
# end create_container_registry
# begin create_credential
[docs] def create_credential( self, credential_name = None, type = None, identity =
None, secret = None, options = {} ):
"""Create a new `credential <../../../../concepts/credentials/>`__.
Parameters:
credential_name (str)
Name of the credential to be created. Must contain only
letters, digits, and underscores, and cannot begin with a
digit. Must not match an existing credential name.
type (str)
Type of the credential to be created.
Allowed values are:
* aws_access_key
* aws_iam_role
* azure_ad
* azure_oauth
* azure_sas
* azure_storage_key
* docker
* gcs_service_account_id
* gcs_service_account_keys
* hdfs
* jdbc
* kafka
* confluent
identity (str)
User of the credential to be created.
secret (str)
Password of the credential to be created.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
credential_name (str)
Value of input parameter *credential_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( credential_name, (basestring)), "create_credential(): Argument 'credential_name' must be (one) of type(s) '(basestring)'; given %s" % type( credential_name ).__name__
assert isinstance( type, (basestring)), "create_credential(): Argument 'type' must be (one) of type(s) '(basestring)'; given %s" % type( type ).__name__
assert isinstance( identity, (basestring)), "create_credential(): Argument 'identity' must be (one) of type(s) '(basestring)'; given %s" % type( identity ).__name__
assert isinstance( secret, (basestring)), "create_credential(): Argument 'secret' must be (one) of type(s) '(basestring)'; given %s" % type( secret ).__name__
assert isinstance( options, (dict)), "create_credential(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['credential_name'] = credential_name
obj['type'] = type
obj['identity'] = identity
obj['secret'] = secret
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/credential', obj, convert_to_attr_dict = True )
return response
# end create_credential
# begin create_datasink
[docs] def create_datasink( self, name = None, destination = None, options = {} ):
"""Creates a `data sink <../../../../concepts/data_sinks/>`__, which
contains the
destination information for a data sink that is external to the
database.
Parameters:
name (str)
Name of the data sink to be created.
destination (str)
Destination for the output data in format
'storage_provider_type://path[:port]'.
Supported storage provider types are 'azure', 'gcs', 'hdfs',
'http', 'https', 'jdbc', 'kafka' and 's3'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **connection_timeout** --
Timeout in seconds for connecting to this data sink
* **wait_timeout** --
Timeout in seconds for waiting for a response from this data
sink
* **credential** --
Name of the `credential
<../../../../concepts/credentials/>`__ object to be used in
this data sink
* **s3_bucket_name** --
Name of the Amazon S3 bucket to use as the data sink
* **s3_region** --
Name of the Amazon S3 region where the given bucket is
located
* **s3_verify_ssl** --
Set to false for testing purposes or when necessary to bypass
TLS errors (e.g. self-signed certificates). This value is
true by default.
Allowed values are:
* true
* false
The default value is 'true'.
* **s3_use_virtual_addressing** --
When true (default), the requests URI should be specified in
virtual-hosted-style format where the bucket name is part of
the domain name in the URL.
Otherwise set to false to use path-style URI for requests.
Allowed values are:
* true
* false
The default value is 'true'.
* **s3_aws_role_arn** --
Amazon IAM Role ARN which has required S3 permissions that
can be assumed for the given S3 IAM user
* **s3_encryption_customer_algorithm** --
Customer encryption algorithm used encrypting data
* **s3_encryption_customer_key** --
Customer encryption key to encrypt or decrypt data
* **s3_encryption_type** --
Server side encryption type
* **s3_kms_key_id** --
KMS key
* **hdfs_kerberos_keytab** --
Kerberos keytab file location for the given HDFS user. This
may be a KIFS file.
* **hdfs_delegation_token** --
Delegation token for the given HDFS user
* **hdfs_use_kerberos** --
Use kerberos authentication for the given HDFS cluster.
Allowed values are:
* true
* false
The default value is 'false'.
* **azure_storage_account_name** --
Name of the Azure storage account to use as the data sink,
this is valid only if tenant_id is specified
* **azure_container_name** --
Name of the Azure storage container to use as the data sink
* **azure_tenant_id** --
Active Directory tenant ID (or directory ID)
* **azure_sas_token** --
Shared access signature token for Azure storage account to
use as the data sink
* **azure_oauth_token** --
Oauth token to access given storage container
* **gcs_bucket_name** --
Name of the Google Cloud Storage bucket to use as the data
sink
* **gcs_project_id** --
Name of the Google Cloud project to use as the data sink
* **gcs_service_account_keys** --
Google Cloud service account keys to use for authenticating
the data sink
* **jdbc_driver_jar_path** --
JDBC driver jar file location
* **jdbc_driver_class_name** --
Name of the JDBC driver class
* **kafka_topic_name** --
Name of the Kafka topic to publish to if input parameter
*destination* is a Kafka broker
* **max_batch_size** --
Maximum number of records per notification message. The
default value is '1'.
* **max_message_size** --
Maximum size in bytes of each notification message. The
default value is '1000000'.
* **json_format** --
The desired format of JSON encoded notifications message.
If *nested*, records are returned as an array. Otherwise,
only a single record per messages is returned.
Allowed values are:
* flat
* nested
The default value is 'flat'.
* **use_managed_credentials** --
When no credentials are supplied, we use anonymous access by
default. If this is set, we will use cloud provider user
settings.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_https** --
Use https to connect to datasink if true, otherwise use http.
Allowed values are:
* true
* false
The default value is 'true'.
* **skip_validation** --
Bypass validation of connection to this data sink.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "create_datasink(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( destination, (basestring)), "create_datasink(): Argument 'destination' must be (one) of type(s) '(basestring)'; given %s" % type( destination ).__name__
assert isinstance( options, (dict)), "create_datasink(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['destination'] = destination
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/datasink', obj, convert_to_attr_dict = True )
return response
# end create_datasink
# begin create_datasource
[docs] def create_datasource( self, name = None, location = None, user_name = None,
password = None, options = {} ):
"""Creates a `data source <../../../../concepts/data_sources/>`__, which
contains the
location and connection information for a data store that is external
to the database.
Parameters:
name (str)
Name of the data source to be created.
location (str)
Location of the remote storage in
'storage_provider_type://[storage_path[:storage_port]]' format.
Supported storage provider types are
'azure','gcs','hdfs','jdbc','kafka', 'confluent' and 's3'.
user_name (str)
Name of the remote system user; may be an empty string
password (str)
Password for the remote system user; may be an empty string
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **skip_validation** --
Bypass validation of connection to remote source.
Allowed values are:
* true
* false
The default value is 'false'.
* **connection_timeout** --
Timeout in seconds for connecting to this storage provider
* **wait_timeout** --
Timeout in seconds for reading from this storage provider
* **credential** --
Name of the `credential <../../../../concepts/credentials>`__
object to be used in data source
* **s3_bucket_name** --
Name of the Amazon S3 bucket to use as the data source
* **s3_region** --
Name of the Amazon S3 region where the given bucket is
located
* **s3_verify_ssl** --
Set to false for testing purposes or when necessary to bypass
TLS errors (e.g. self-signed certificates). This value is
true by default.
Allowed values are:
* true
* false
The default value is 'true'.
* **s3_use_virtual_addressing** --
Whether to use virtual addressing when referencing the Amazon
S3 source.
Allowed values are:
* **true** --
The requests URI should be specified in
virtual-hosted-style format where the bucket name is part
of the domain name in the URL.
* **false** --
Use path-style URI for requests.
The default value is 'true'.
* **s3_aws_role_arn** --
Amazon IAM Role ARN which has required S3 permissions that
can be assumed for the given S3 IAM user
* **s3_encryption_customer_algorithm** --
Customer encryption algorithm used encrypting data
* **s3_encryption_customer_key** --
Customer encryption key to encrypt or decrypt data
* **hdfs_kerberos_keytab** --
Kerberos keytab file location for the given HDFS user. This
may be a KIFS file.
* **hdfs_delegation_token** --
Delegation token for the given HDFS user
* **hdfs_use_kerberos** --
Use kerberos authentication for the given HDFS cluster.
Allowed values are:
* true
* false
The default value is 'false'.
* **azure_storage_account_name** --
Name of the Azure storage account to use as the data source,
this is valid only if tenant_id is specified
* **azure_container_name** --
Name of the Azure storage container to use as the data source
* **azure_tenant_id** --
Active Directory tenant ID (or directory ID)
* **azure_sas_token** --
Shared access signature token for Azure storage account to
use as the data source
* **azure_oauth_token** --
OAuth token to access given storage container
* **gcs_bucket_name** --
Name of the Google Cloud Storage bucket to use as the data
source
* **gcs_project_id** --
Name of the Google Cloud project to use as the data source
* **gcs_service_account_keys** --
Google Cloud service account keys to use for authenticating
the data source
* **is_stream** --
To load from Azure/GCS/S3 as a stream continuously.
Allowed values are:
* true
* false
The default value is 'false'.
* **kafka_topic_name** --
Name of the Kafka topic to use as the data source
* **jdbc_driver_jar_path** --
JDBC driver jar file location. This may be a KIFS file.
* **jdbc_driver_class_name** --
Name of the JDBC driver class
* **anonymous** --
Use anonymous connection to storage provider--DEPRECATED:
this is now the default. Specify use_managed_credentials for
non-anonymous connection.
Allowed values are:
* true
* false
The default value is 'true'.
* **use_managed_credentials** --
When no credentials are supplied, we use anonymous access by
default. If this is set, we will use cloud provider user
settings.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_https** --
Use https to connect to datasource if true, otherwise use
http.
Allowed values are:
* true
* false
The default value is 'true'.
* **schema_registry_location** --
Location of Confluent Schema Registry in
'[storage_path[:storage_port]]' format.
* **schema_registry_credential** --
Confluent Schema Registry `credential
<../../../../concepts/credentials>`__ object name.
* **schema_registry_port** --
Confluent Schema Registry port (optional).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "create_datasource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( location, (basestring)), "create_datasource(): Argument 'location' must be (one) of type(s) '(basestring)'; given %s" % type( location ).__name__
assert isinstance( user_name, (basestring)), "create_datasource(): Argument 'user_name' must be (one) of type(s) '(basestring)'; given %s" % type( user_name ).__name__
assert isinstance( password, (basestring)), "create_datasource(): Argument 'password' must be (one) of type(s) '(basestring)'; given %s" % type( password ).__name__
assert isinstance( options, (dict)), "create_datasource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['location'] = location
obj['user_name'] = user_name
obj['password'] = password
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/datasource', obj, convert_to_attr_dict = True )
return response
# end create_datasource
# begin create_delta_table
def create_delta_table( self, delta_table_name = None, table_name = None,
options = {} ):
assert isinstance( delta_table_name, (basestring)), "create_delta_table(): Argument 'delta_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( delta_table_name ).__name__
assert isinstance( table_name, (basestring)), "create_delta_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( options, (dict)), "create_delta_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['delta_table_name'] = delta_table_name
obj['table_name'] = table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/deltatable', obj, convert_to_attr_dict = True )
return response
# end create_delta_table
# begin create_directory
[docs] def create_directory( self, directory_name = None, options = {} ):
"""Creates a new directory in `KiFS <../../../../tools/kifs/>`__. The new
directory serves as a location in which the user can upload files using
:meth:`GPUdb.upload_files`.
Parameters:
directory_name (str)
Name of the directory in KiFS to be created.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_home_directory** --
When set, a home directory is created for the user name
provided in the value. The input parameter *directory_name*
must be an empty string in this case. The user must exist.
* **data_limit** --
The maximum capacity, in bytes, to apply to the created
directory. Set to -1 to indicate no upper limit. If empty,
the system default limit is applied.
* **no_error_if_exists** --
If *true*, does not return an error if the directory already
exists.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
directory_name (str)
Value of input parameter *directory_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( directory_name, (basestring)), "create_directory(): Argument 'directory_name' must be (one) of type(s) '(basestring)'; given %s" % type( directory_name ).__name__
assert isinstance( options, (dict)), "create_directory(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['directory_name'] = directory_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/directory', obj, convert_to_attr_dict = True )
return response
# end create_directory
# begin create_environment
[docs] def create_environment( self, environment_name = None, options = {} ):
"""Creates a new environment which can be used by `user-defined functions
<../../../../concepts/udf/>`__ (UDF).
Parameters:
environment_name (str)
Name of the environment to be created.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
environment_name (str)
Value of input parameter *environment_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( environment_name, (basestring)), "create_environment(): Argument 'environment_name' must be (one) of type(s) '(basestring)'; given %s" % type( environment_name ).__name__
assert isinstance( options, (dict)), "create_environment(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['environment_name'] = environment_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/environment', obj, convert_to_attr_dict = True )
return response
# end create_environment
# begin create_graph
[docs] def create_graph( self, graph_name = None, directed_graph = True, nodes = None,
edges = None, weights = None, restrictions = None, options
= {} ):
"""Creates a new graph network using given nodes, edges, weights, and
restrictions.
IMPORTANT: It's highly recommended that you review the
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
concepts documentation, the
`Graph REST Tutorial <../../../../guides/graph_rest_guide/>`__,
and/or some `graph examples <../../../../guide-tags/graph/>`__ before
using this endpoint.
Parameters:
graph_name (str)
Name of the graph resource to generate.
directed_graph (bool)
If set to *true*, the graph will be directed. If set to
*false*, the graph will not be directed. Consult `Directed
Graphs
<../../../../graph_solver/network_graph_solver/#directed-graphs>`__
for more details.
Allowed values are:
* true
* false
The default value is True.
nodes (list of str)
Nodes represent fundamental topological units of a graph.
Nodes must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS NODE_ID', expressions, e.g.,
'ST_MAKEPOINT(column1, column2) AS NODE_WKTPOINT', or constant
values, e.g.,
'{9, 10, 11} AS NODE_ID'.
If using constant values in an identifier combination, the
number of values
specified must match across the combination. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
edges (list of str)
Edges represent the required fundamental topological unit of
a graph that typically connect nodes. Edges must be specified
using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS EDGE_ID', expressions, e.g.,
'SUBSTR(column, 1, 6) AS EDGE_NODE1_NAME', or constant values,
e.g.,
"{'family', 'coworker'} AS EDGE_LABEL".
If using constant values in an identifier combination, the
number of values
specified must match across the combination. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
weights (list of str)
Weights represent a method of informing the graph solver of
the cost of including a given edge in a solution. Weights must
be specified
using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS WEIGHTS_EDGE_ID', expressions, e.g.,
'ST_LENGTH(wkt) AS WEIGHTS_VALUESPECIFIED', or constant values,
e.g.,
'{4, 15} AS WEIGHTS_VALUESPECIFIED'.
If using constant values in an identifier combination, the
number of values specified
must match across the combination. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
restrictions (list of str)
Restrictions represent a method of informing the graph
solver which edges and/or nodes should be ignored for the
solution. Restrictions
must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS RESTRICTIONS_EDGE_ID', expressions, e.g.,
'column/2 AS RESTRICTIONS_VALUECOMPARED', or constant values,
e.g.,
'{0, 0, 0, 1} AS RESTRICTIONS_ONOFFCOMPARED'.
If using constant values in an identifier combination, the
number of values
specified must match across the combination. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **merge_tolerance** --
If node geospatial positions are input (e.g., WKTPOINT, X,
Y), determines the minimum separation allowed between unique
nodes. If nodes are within the tolerance of each other, they
will be merged as a single node. The default value is
'1.0E-5'.
* **recreate** --
If set to *true* and the graph (using input parameter
*graph_name*) already exists, the graph is deleted and
recreated.
Allowed values are:
* true
* false
The default value is 'false'.
* **save_persist** --
If set to *true*, the graph will be saved in the persist
directory (see the `config reference
<../../../../config/#config-main-persistence>`__ for more
information). If set to *false*, the graph will be removed
when the graph server is shutdown.
Allowed values are:
* true
* false
The default value is 'false'.
* **add_table_monitor** --
Adds a table monitor to every table used in the creation of
the graph; this table monitor will trigger the graph to
update dynamically upon inserts to the source table(s). Note
that upon database restart, if *save_persist* is also set to
*true*, the graph will be fully reconstructed and the table
monitors will be reattached. For more details on table
monitors, see :meth:`GPUdb.create_table_monitor`.
Allowed values are:
* true
* false
The default value is 'false'.
* **graph_table** --
If specified, the created graph is also created as a table
with the given name, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. The
table will have the following identifier columns: 'EDGE_ID',
'EDGE_NODE1_ID', 'EDGE_NODE2_ID'. If left blank, no table is
created. The default value is ''.
* **add_turns** --
Adds dummy 'pillowed' edges around intersection nodes where
there are more than three edges so that additional weight
penalties can be imposed by the solve endpoints. (increases
the total number of edges).
Allowed values are:
* true
* false
The default value is 'false'.
* **server_id** --
Indicates which graph server(s) to send the request to.
Default is to send to the server with the most available
memory.
* **use_rtree** --
Use an range tree structure to accelerate and improve the
accuracy of snapping, especially to edges.
Allowed values are:
* true
* false
The default value is 'true'.
* **label_delimiter** --
If provided the label string will be split according to this
delimiter and each sub-string will be applied as a separate
label onto the specified edge. The default value is ''.
* **allow_multiple_edges** --
Multigraph choice; allowing multiple edges with the same node
pairs if set to true, otherwise, new edges with existing same
node pairs will not be inserted.
Allowed values are:
* true
* false
The default value is 'true'.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful creation on all servers.
num_nodes (long)
Total number of nodes created.
num_edges (long)
Total number of edges created.
edges_ids (list of longs)
[Deprecated] Edges given as pairs of node indices. Only
populated if export_create_results internal option is set to
true.
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "create_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( directed_graph, (bool)), "create_graph(): Argument 'directed_graph' must be (one) of type(s) '(bool)'; given %s" % type( directed_graph ).__name__
nodes = nodes if isinstance( nodes, list ) else ( [] if (nodes is None) else [ nodes ] )
edges = edges if isinstance( edges, list ) else ( [] if (edges is None) else [ edges ] )
weights = weights if isinstance( weights, list ) else ( [] if (weights is None) else [ weights ] )
restrictions = restrictions if isinstance( restrictions, list ) else ( [] if (restrictions is None) else [ restrictions ] )
assert isinstance( options, (dict)), "create_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['directed_graph'] = directed_graph
obj['nodes'] = nodes
obj['edges'] = edges
obj['weights'] = weights
obj['restrictions'] = restrictions
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/graph', obj, convert_to_attr_dict = True )
return response
# end create_graph
# begin create_job
[docs] def create_job( self, endpoint = None, request_encoding = 'binary', data = None,
data_str = None, options = {} ):
"""Create a job which will run asynchronously. The response returns a job
ID, which can be used to query the status and result of the job. The
status and the result of the job upon completion can be requested by
:meth:`GPUdb.get_job`.
Parameters:
endpoint (str)
Indicates which endpoint to execute, e.g. '/alter/table'.
request_encoding (str)
The encoding of the request payload for the job.
Allowed values are:
* binary
* json
* snappy
The default value is 'binary'.
data (bytes)
Binary-encoded payload for the job to be run asynchronously.
The payload must contain the relevant input parameters for the
endpoint indicated in input parameter *endpoint*. Please see
the documentation for the appropriate endpoint to see what
values must (or can) be specified. If this parameter is used,
then input parameter *request_encoding* must be *binary* or
*snappy*.
data_str (str)
JSON-encoded payload for the job to be run asynchronously. The
payload must contain the relevant input parameters for the
endpoint indicated in input parameter *endpoint*. Please see
the documentation for the appropriate endpoint to see what
values must (or can) be specified. If this parameter is used,
then input parameter *request_encoding* must be *json*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **job_tag** --
Tag to use for submitted job. The same tag could be used on
backup cluster to retrieve response for the job. Tags can use
letter, numbers, '_' and '-'
Returns:
A dict with the following entries--
job_id (long)
An identifier for the job created by this call.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **job_tag** --
The job tag specified by the user or if unspecified by user,
a unique identifier generated internally for the job across
clusters.
"""
assert isinstance( endpoint, (basestring)), "create_job(): Argument 'endpoint' must be (one) of type(s) '(basestring)'; given %s" % type( endpoint ).__name__
assert isinstance( request_encoding, (basestring)), "create_job(): Argument 'request_encoding' must be (one) of type(s) '(basestring)'; given %s" % type( request_encoding ).__name__
assert isinstance( data, (basestring, bytes)), "create_job(): Argument 'data' must be (one) of type(s) '(basestring, bytes)'; given %s" % type( data ).__name__
assert isinstance( data_str, (basestring)), "create_job(): Argument 'data_str' must be (one) of type(s) '(basestring)'; given %s" % type( data_str ).__name__
assert isinstance( options, (dict)), "create_job(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['endpoint'] = endpoint
obj['request_encoding'] = request_encoding
obj['data'] = data
obj['data_str'] = data_str
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/job', obj, convert_to_attr_dict = True )
return response
# end create_job
# begin create_join_table
[docs] def create_join_table( self, join_table_name = None, table_names = None,
column_names = None, expressions = [], options = {}
):
"""Creates a table that is the result of a SQL JOIN.
For join details and examples see: `Joins
<../../../../concepts/joins/>`__. For limitations, see `Join
Limitations and Cautions
<../../../../concepts/joins/#limitations-cautions>`__.
Parameters:
join_table_name (str)
Name of the join table to be created, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
table_names (list of str)
The list of table names composing the join, each in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
Corresponds to a SQL statement FROM clause. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
column_names (list of str)
List of member table columns or column expressions to be
included in the join. Columns can be prefixed with
'table_id.column_name', where 'table_id' is the table name or
alias. Columns can be aliased via the syntax 'column_name as
alias'. Wild cards '*' can be used to include all columns
across member tables or 'table_id.*' for all of a single
table's columns. Columns and column expressions composing the
join must be uniquely named or aliased--therefore, the '*' wild
card cannot be used if column names aren't unique across all
tables. The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
expressions (list of str)
An optional list of expressions to combine and filter the
joined tables. Corresponds to a SQL statement WHERE clause.
For details see: `expressions
<../../../../concepts/expressions/>`__. The default value is
an empty list ( [] ). The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*join_table_name*. This is always allowed even if the caller
does not have permission to create tables. The generated name
is returned in *qualified_join_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
join as part of input parameter *join_table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the join. If the schema
is non-existent, it will be automatically created. The
default value is ''.
* **max_query_dimensions** --
No longer used.
* **optimize_lookups** --
Use more memory to speed up the joining of tables.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the join
table specified in input parameter *join_table_name*.
* **view_id** --
view this projection is part of. The default value is ''.
* **no_count** --
Return a count of 0 for the join table for logging and for
:meth:`GPUdb.show_table`; optimization needed for large
overlapped equi-join stencils. The default value is 'false'.
* **chunk_size** --
Maximum number of records per joined-chunk for this table.
Defaults to the gpudb.conf file chunk size
Returns:
A dict with the following entries--
join_table_name (str)
Value of input parameter *join_table_name*.
count (long)
The number of records in the join table filtered by the given
select expression.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_join_table_name** --
The fully qualified name of the join table (i.e. including
the schema)
"""
assert isinstance( join_table_name, (basestring)), "create_join_table(): Argument 'join_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( join_table_name ).__name__
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
expressions = expressions if isinstance( expressions, list ) else ( [] if (expressions is None) else [ expressions ] )
assert isinstance( options, (dict)), "create_join_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['join_table_name'] = join_table_name
obj['table_names'] = table_names
obj['column_names'] = column_names
obj['expressions'] = expressions
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/jointable', obj, convert_to_attr_dict = True )
return response
# end create_join_table
# begin create_materialized_view
[docs] def create_materialized_view( self, table_name = None, options = {} ):
"""Initiates the process of creating a materialized view, reserving the
view's name to prevent other views or tables from being created with
that name.
For materialized view details and examples, see
`Materialized Views <../../../../concepts/materialized_views/>`__.
The response contains output parameter *view_id*, which is used to tag
each subsequent
operation (projection, union, aggregation, filter, or join) that will
compose
the view.
Parameters:
table_name (str)
Name of the table to be created that is the top-level table of
the materialized view, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
materialized view as part of input parameter *table_name* and
use :meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema which is to contain the newly
created view. If the schema provided is non-existent, it will
be automatically created.
* **execute_as** --
User name to use to run the refresh job
* **persist** --
If *true*, then the materialized view specified in input
parameter *table_name* will be persisted and will not expire
unless a *ttl* is specified. If *false*, then the
materialized view will be an in-memory table and will expire
unless a *ttl* is specified otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **refresh_span** --
Sets the future time-offset(in seconds) at which periodic
refresh stops
* **refresh_stop_time** --
When *refresh_method* is *periodic*, specifies the time at
which a periodic refresh is stopped. Value is a datetime
string with format 'YYYY-MM-DD HH:MM:SS'.
* **refresh_method** --
Method by which the join can be refreshed when the data in
underlying member tables have changed.
Allowed values are:
* **manual** --
Refresh only occurs when manually requested by calling
:meth:`GPUdb.alter_table` with an 'action' of 'refresh'
* **on_query** --
Refresh any time the view is queried.
* **on_change** --
If possible, incrementally refresh (refresh just those
records added) whenever an insert, update, delete or
refresh of input table is done. A full refresh is done if
an incremental refresh is not possible.
* **periodic** --
Refresh table periodically at rate specified by
*refresh_period*
The default value is 'manual'.
* **refresh_period** --
When *refresh_method* is *periodic*, specifies the period in
seconds at which refresh occurs
* **refresh_start_time** --
When *refresh_method* is *periodic*, specifies the first time
at which a refresh is to be done. Value is a datetime string
with format 'YYYY-MM-DD HH:MM:SS'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in input parameter *table_name*.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
view_id (str)
Value of view_id.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_table_name** --
The fully qualified name of the result table (i.e. including
the schema)
"""
assert isinstance( table_name, (basestring)), "create_materialized_view(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( options, (dict)), "create_materialized_view(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/materializedview', obj, convert_to_attr_dict = True )
return response
# end create_materialized_view
# begin create_proc
[docs] def create_proc( self, proc_name = None, execution_mode = 'distributed', files =
{}, command = '', args = [], options = {} ):
"""Creates an instance (proc) of the
`user-defined functions <../../../../concepts/udf/>`__ (UDF) specified
by the
given command, options, and files, and makes it available for
execution.
Parameters:
proc_name (str)
Name of the proc to be created. Must not be the name of a
currently existing proc.
execution_mode (str)
The execution mode of the proc.
Allowed values are:
* **distributed** --
Input table data will be divided into data
segments that are distributed across all nodes in the
cluster, and the proc
command will be invoked once per data segment in parallel.
Output table data
from each invocation will be saved to the same node as the
corresponding input
data.
* **nondistributed** --
The proc command will be invoked only once per
execution, and will not have direct access to any tables
named as input or
output table parameters in the call to
:meth:`GPUdb.execute_proc`. It will,
however, be able to access the database using native API
calls.
The default value is 'distributed'.
files (dict of str to str)
A map of the files that make up the proc. The keys of the
map are file names, and the values are the binary contents of
the files. The
file names may include subdirectory names (e.g. 'subdir/file')
but must not
resolve to a directory above the root for the proc.
Files may be loaded from existing files in KiFS. Those file
names should be
prefixed with the uri kifs:// and the values in the map should
be empty. The default value is an empty dict ( {} ).
command (str)
The command (excluding arguments) that will be invoked when
the proc is executed. It will be invoked from the directory
containing the proc
input parameter *files* and may be any command that can be
resolved from that directory.
It need not refer to a file actually in that directory; for
example, it could be
'java' if the proc is a Java application; however, any
necessary external
programs must be preinstalled on every database node. If the
command refers to a
file in that directory, it must be preceded with './' as per
Linux convention.
If not specified, and exactly one file is provided in input
parameter *files*, that file
will be invoked. The default value is ''.
args (list of str)
An array of command-line arguments that will be passed to input
parameter *command* when the proc is executed. The default
value is an empty list ( [] ). The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **max_concurrency_per_node** --
The maximum number of concurrent instances of the proc that
will be executed per node. 0 allows unlimited concurrency.
The default value is '0'.
* **set_environment** --
A python environment to use when executing the proc. Must be
an existing environment, else an error will be returned. The
default value is ''.
Returns:
A dict with the following entries--
proc_name (str)
Value of input parameter *proc_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( proc_name, (basestring)), "create_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( execution_mode, (basestring)), "create_proc(): Argument 'execution_mode' must be (one) of type(s) '(basestring)'; given %s" % type( execution_mode ).__name__
assert isinstance( files, (dict)), "create_proc(): Argument 'files' must be (one) of type(s) '(dict)'; given %s" % type( files ).__name__
assert isinstance( command, (basestring)), "create_proc(): Argument 'command' must be (one) of type(s) '(basestring)'; given %s" % type( command ).__name__
args = args if isinstance( args, list ) else ( [] if (args is None) else [ args ] )
assert isinstance( options, (dict)), "create_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['proc_name'] = proc_name
obj['execution_mode'] = execution_mode
obj['files'] = self.__sanitize_dicts( files )
obj['command'] = command
obj['args'] = args
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/proc', obj, convert_to_attr_dict = True )
return response
# end create_proc
# begin create_projection
[docs] def create_projection( self, table_name = None, projection_name = None,
column_names = None, options = {} ):
"""Creates a new `projection <../../../../concepts/projections/>`__ of
an existing table. A projection represents a subset of the columns
(potentially
including derived columns) of a table.
For projection details and examples, see
`Projections <../../../../concepts/projections/>`__. For limitations,
see
`Projection Limitations and Cautions
<../../../../concepts/projections/#limitations-and-cautions>`__.
`Window functions <../../../../concepts/window/>`__, which can perform
operations like moving averages, are available through this endpoint as
well as
:meth:`GPUdb.get_records_by_column`.
A projection can be created with a different
`shard key <../../../../concepts/tables/#shard-keys>`__ than the source
table.
By specifying *shard_key*, the projection will be sharded
according to the specified columns, regardless of how the source table
is
sharded. The source table can even be unsharded or replicated.
If input parameter *table_name* is empty, selection is performed
against a single-row
virtual table. This can be useful in executing temporal
(`NOW() <../../../../concepts/expressions/#date-time-functions>`__),
identity
(`USER()
<../../../../concepts/expressions/#user-security-functions>`__), or
constant-based functions
(`GEODIST(-77.11, 38.88, -71.06, 42.36)
<../../../../concepts/expressions/#scalar-functions>`__).
Parameters:
table_name (str)
Name of the existing table on which the projection is to be
applied, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. An
empty table name creates a projection from a single-row virtual
table, where columns specified should be constants or constant
expressions.
projection_name (str)
Name of the projection to be created, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
column_names (list of str)
List of columns from input parameter *table_name* to be
included in the projection. Can include derived columns. Can be
specified as aliased via the syntax 'column_name as alias'.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*projection_name*. If *persist* is *false* (or unspecified),
then this is always allowed even if the caller does not have
permission to create tables. The generated name is returned
in *qualified_projection_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
projection as part of input parameter *projection_name* and
use :meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the projection. If the
schema is non-existent, it will be automatically created.
The default value is ''.
* **expression** --
An optional filter `expression
<../../../../concepts/expressions/>`__ to be applied to the
source table prior to the projection. The default value is
''.
* **is_replicated** --
If *true* then the projection will be replicated even if the
source table is not.
Allowed values are:
* true
* false
The default value is 'false'.
* **offset** --
The number of initial results to skip (this can be useful for
paging through the results). The default value is '0'.
* **limit** --
The number of records to keep. The default value is '-9999'.
* **order_by** --
Comma-separated list of the columns to be sorted by; e.g.
'timestamp asc, x desc'. The columns specified must be
present in input parameter *column_names*. If any alias is
given for any column name, the alias must be used, rather
than the original column name. The default value is ''.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
projection.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the projection. The columns specified must be present in
input parameter *column_names*. If any alias is given for
any column name, the alias must be used, rather than the
original column name.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the
projection specified in input parameter *projection_name*.
* **shard_key** --
Comma-separated list of the columns to be sharded on; e.g.
'column1, column2'. The columns specified must be present in
input parameter *column_names*. If any alias is given for
any column name, the alias must be used, rather than the
original column name. The default value is ''.
* **persist** --
If *true*, then the projection specified in input parameter
*projection_name* will be persisted and will not expire
unless a *ttl* is specified. If *false*, then the
projection will be an in-memory table and will expire unless
a *ttl* is specified otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **preserve_dict_encoding** --
If *true*, then columns that were dict encoded in the source
table will be dict encoded in the projection.
Allowed values are:
* true
* false
The default value is 'true'.
* **retain_partitions** --
Determines whether the created projection will retain the
partitioning scheme from the source table.
Allowed values are:
* true
* false
The default value is 'false'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or column expressions by which records will be assigned to
partitions defined by *partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends on the choice of *partition_type*. See `range
partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__, `hash
partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*, a new partition will be created for values which
don't fall into an existing partition. Currently only
supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **view_id** --
ID of view of which this projection is a member. The default
value is ''.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
Returns:
A dict with the following entries--
projection_name (str)
Value of input parameter *projection_name*.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **count** --
Number of records in the final table
* **qualified_projection_name** --
The fully qualified name of the projection (i.e. including
the schema).
"""
assert isinstance( table_name, (basestring)), "create_projection(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( projection_name, (basestring)), "create_projection(): Argument 'projection_name' must be (one) of type(s) '(basestring)'; given %s" % type( projection_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( options, (dict)), "create_projection(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['projection_name'] = projection_name
obj['column_names'] = column_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/projection', obj, convert_to_attr_dict = True )
return response
# end create_projection
# begin create_resource_group
[docs] def create_resource_group( self, name = None, tier_attributes = {}, ranking =
None, adjoining_resource_group = '', options = {}
):
"""Creates a new resource group to facilitate resource management.
Parameters:
name (str)
Name of the group to be created. Must contain only letters,
digits, and underscores, and cannot begin with a digit. Must
not match existing resource group name.
tier_attributes (dict of str to dicts of str to str)
Optional map containing tier names and their respective
attribute group limits. The only valid attribute limit that
can be set is max_memory (in bytes) for the VRAM & RAM tiers.
For instance, to set max VRAM capacity to 1GB and max RAM
capacity to 10GB, use: {'VRAM':{'max_memory':'1000000000'},
'RAM':{'max_memory':'10000000000'}}. The default value is an
empty dict ( {} ).
Allowed keys are:
* **max_memory** --
Maximum amount of memory usable in the given tier at one time
for this group.
ranking (str)
Indicates the relative ranking among existing resource groups
where this new resource group will be placed. When using
*before* or *after*, specify which resource group this one will
be inserted before or after in input parameter
*adjoining_resource_group*.
Allowed values are:
* first
* last
* before
* after
adjoining_resource_group (str)
If input parameter *ranking* is *before* or *after*, this field
indicates the resource group before or after which the current
group will be placed; otherwise, leave blank. The default
value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **max_cpu_concurrency** --
Maximum number of simultaneous threads that will be used to
execute a request for this group.
* **max_data** --
Maximum amount of cumulative ram usage regardless of tier
status for this group.
* **max_scheduling_priority** --
Maximum priority of a scheduled task for this group.
* **max_tier_priority** --
Maximum priority of a tiered object for this group.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "create_resource_group(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( tier_attributes, (dict)), "create_resource_group(): Argument 'tier_attributes' must be (one) of type(s) '(dict)'; given %s" % type( tier_attributes ).__name__
assert isinstance( ranking, (basestring)), "create_resource_group(): Argument 'ranking' must be (one) of type(s) '(basestring)'; given %s" % type( ranking ).__name__
assert isinstance( adjoining_resource_group, (basestring)), "create_resource_group(): Argument 'adjoining_resource_group' must be (one) of type(s) '(basestring)'; given %s" % type( adjoining_resource_group ).__name__
assert isinstance( options, (dict)), "create_resource_group(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['tier_attributes'] = self.__sanitize_dicts( tier_attributes )
obj['ranking'] = ranking
obj['adjoining_resource_group'] = adjoining_resource_group
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/resourcegroup', obj, convert_to_attr_dict = True )
return response
# end create_resource_group
# begin create_role
[docs] def create_role( self, name = None, options = {} ):
"""Creates a new role.
.. note::
This method should be used for on-premise deployments only.
Parameters:
name (str)
Name of the role to be created. Must contain only lowercase
letters, digits, and underscores, and cannot begin with a
digit. Must not be the same name as an existing user or role.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **resource_group** --
Name of an existing resource group to associate with this
user
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "create_role(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "create_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/role', obj, convert_to_attr_dict = True )
return response
# end create_role
# begin create_schema
[docs] def create_schema( self, schema_name = None, options = {} ):
"""Creates a SQL-style `schema <../../../../concepts/schemas/>`__. Schemas
are containers for tables and views. Multiple tables and views can be
defined with the same name in different schemas.
Parameters:
schema_name (str)
Name of the schema to be created. Has the same naming
restrictions as `tables <../../../../concepts/tables/>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_exists** --
If *true*, prevents an error from occurring if the schema
already exists.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
schema_name (str)
Value of input parameter *schema_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( schema_name, (basestring)), "create_schema(): Argument 'schema_name' must be (one) of type(s) '(basestring)'; given %s" % type( schema_name ).__name__
assert isinstance( options, (dict)), "create_schema(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['schema_name'] = schema_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/schema', obj, convert_to_attr_dict = True )
return response
# end create_schema
# begin create_state_table
def create_state_table( self, table_name = None, input_table_name = None,
init_table_name = None, options = {} ):
assert isinstance( table_name, (basestring)), "create_state_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( input_table_name, (basestring)), "create_state_table(): Argument 'input_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( input_table_name ).__name__
assert isinstance( init_table_name, (basestring)), "create_state_table(): Argument 'init_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( init_table_name ).__name__
assert isinstance( options, (dict)), "create_state_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['input_table_name'] = input_table_name
obj['init_table_name'] = init_table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/statetable', obj, convert_to_attr_dict = True )
return response
# end create_state_table
# begin create_table
[docs] def create_table( self, table_name = None, type_id = None, options = {} ):
"""Creates a new table. If a new table is being created,
the type of the table is given by input parameter *type_id*, which must
be the ID of
a currently registered type (i.e. one created via
:meth:`GPUdb.create_type`).
A table may optionally be designated to use a
`replicated <../../../../concepts/tables/#replication>`__ distribution
scheme,
or be assigned: `foreign keys
<../../../../concepts/tables/#foreign-keys>`__ to
other tables, a `partitioning
<../../../../concepts/tables/#partitioning>`__
scheme, and/or a `tier strategy
<../../../../rm/concepts/#tier-strategies>`__.
Parameters:
table_name (str)
Name of the table to be created, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Error
for requests with existing table of the same name and type ID
may be suppressed by using the *no_error_if_exists* option.
type_id (str)
ID of a currently registered type. All objects added to the
newly created table will be of this type.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_exists** --
If *true*, prevents an error from occurring if the table
already exists and is of the given type. If a table with the
same ID but a different type exists, it is still an error.
Allowed values are:
* true
* false
The default value is 'false'.
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*table_name*. If *is_result_table* is *true*, then this is
always allowed even if the caller does not have permission to
create tables. The generated name is returned in
*qualified_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
input parameter *table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema which is to contain the newly
created table. If the schema is non-existent, it will be
automatically created.
* **is_collection** --
[DEPRECATED--please use :meth:`GPUdb.create_schema` to create
a schema instead] Indicates whether to create a schema
instead of a table.
Allowed values are:
* true
* false
The default value is 'false'.
* **disallow_homogeneous_tables** --
No longer supported; value will be ignored.
Allowed values are:
* true
* false
The default value is 'false'.
* **is_replicated** --
Affects the `distribution scheme
<../../../../concepts/tables/#distribution>`__ for the
table's data. If *true* and the given type has no explicit
`shard key <../../../../concepts/tables/#shard-key>`__
defined, the table will be `replicated
<../../../../concepts/tables/#replication>`__. If *false*,
the table will be `sharded
<../../../../concepts/tables/#sharding>`__ according to the
shard key specified in the given input parameter *type_id*,
or `randomly sharded
<../../../../concepts/tables/#random-sharding>`__, if no
shard key is specified. Note that a type containing a shard
key cannot be used to create a replicated table.
Allowed values are:
* true
* false
The default value is 'false'.
* **foreign_keys** --
Semicolon-separated list of `foreign keys
<../../../../concepts/tables/#foreign-keys>`__, of the format
'(source_column_name [, ...]) references
target_table_name(primary_key_column_name [, ...]) [as
foreign_key_name]'.
* **foreign_shard_key** --
Foreign shard key of the format 'source_column references
shard_by_column from target_table(primary_key_column)'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or column expressions by which records will be assigned to
partitions defined by *partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends on the choice of *partition_type*. See `range
partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__, `hash
partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*, a new partition will be created for values which
don't fall into an existing partition. Currently only
supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in input parameter *table_name*.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
table.
* **is_result_table** --
Indicates whether the table is a `memory-only table
<../../../../concepts/tables_memory_only/>`__. A result table
cannot contain columns with store_only or text_search
`data-handling <../../../../concepts/types/#data-handling>`__
or that are `non-charN strings
<../../../../concepts/types/#primitive-types>`__, and it will
not be retained if the server is restarted.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_id (str)
Value of input parameter *type_id*.
is_collection (bool)
[DEPRECATED--this will always return false] Indicates if the
created entity is a schema.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_table_name** --
The fully qualified name of the new table (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "create_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( type_id, (basestring)), "create_table(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__
assert isinstance( options, (dict)), "create_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['type_id'] = type_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/table', obj, convert_to_attr_dict = True )
return response
# end create_table
# begin create_table_external
[docs] def create_table_external( self, table_name = None, filepaths = None,
modify_columns = {}, create_table_options = {},
options = {} ):
"""Creates a new `external table
<../../../../concepts/external_tables/>`__, which is a
local database object whose source data is located externally to the
database. The source data can
be located either in `KiFS <../../../../tools/kifs/>`__; on the
cluster, accessible to the database; or
remotely, accessible via a pre-defined external `data source
<../../../../concepts/data_sources/>`__.
The external table can have its structure defined explicitly, via input
parameter *create_table_options*,
which contains many of the options from :meth:`GPUdb.create_table`; or
defined implicitly, inferred
from the source data.
Parameters:
table_name (str)
Name of the table to be created, in [schema_name.]table_name
format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting
`table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
filepaths (list of str)
A list of file paths from which data will be sourced;
For paths in `KiFS <../../../../tools/kifs/>`__, use the uri
prefix of kifs:// followed by the path to
a file or directory. File matching by prefix is supported, e.g.
kifs://dir/file would match dir/file_1
and dir/file_2. When prefix matching is used, the path must
start with a full, valid KiFS directory name.
If an external data source is specified in *datasource_name*,
these file
paths must resolve to accessible files at that data source
location. Prefix matching is supported.
If the data source is hdfs, prefixes must be aligned with
directories, i.e. partial file names will
not match.
If no data source is specified, the files are assumed to be
local to the database and must all be
accessible to the gpudb user, residing on the path (or relative
to the path) specified by the
external files directory in the Kinetica
`configuration file
<../../../../config/#config-main-external-files>`__. Wildcards
(*) can be used to
specify a group of files. Prefix matching is supported, the
prefixes must be aligned with
directories.
If the first path ends in .tsv, the text delimiter will be
defaulted to a tab character.
If the first path ends in .psv, the text delimiter will be
defaulted to a pipe character (|). The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
modify_columns (dict of str to dicts of str to str)
Not implemented yet. The default value is an empty dict ( {}
).
create_table_options (dict of str to str)
Options from :meth:`GPUdb.create_table`, allowing the structure
of the table to
be defined independently of the data source. The default value
is an empty dict ( {} ).
Allowed keys are:
* **type_id** --
ID of a currently registered `type
<../../../../concepts/types/>`__.
* **no_error_if_exists** --
If *true*,
prevents an error from occurring if the table already exists
and is of the given type. If a table with
the same name but a different type exists, it is still an
error.
Allowed values are:
* true
* false
The default value is 'false'.
* **is_replicated** --
Affects the `distribution scheme
<../../../../concepts/tables/#distribution>`__
for the table's data. If *true* and the
given table has no explicit `shard key
<../../../../concepts/tables/#shard-key>`__ defined, the
table will be `replicated
<../../../../concepts/tables/#replication>`__. If
*false*, the table will be
`sharded <../../../../concepts/tables/#sharding>`__ according
to the shard key specified in the
given *type_id*, or
`randomly sharded
<../../../../concepts/tables/#random-sharding>`__, if no
shard key is specified.
Note that a type containing a shard key cannot be used to
create a replicated table.
Allowed values are:
* true
* false
The default value is 'false'.
* **foreign_keys** --
Semicolon-separated list of
`foreign keys <../../../../concepts/tables/#foreign-keys>`__,
of the format
'(source_column_name [, ...]) references
target_table_name(primary_key_column_name [, ...]) [as
foreign_key_name]'.
* **foreign_shard_key** --
Foreign shard key of the format
'source_column references shard_by_column from
target_table(primary_key_column)'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or
column expressions by which records will be assigned to
partitions defined by
*partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends
on the choice of *partition_type*. See
`range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__,
`hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*,
a new partition will be created for values which don't fall
into an existing partition. Currently,
only supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in input parameter *table_name*.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
table.
* **is_result_table** --
Indicates whether the table is a
`memory-only table
<../../../../concepts/tables_memory_only/>`__. A result table
cannot contain
columns with store_only or text_search
`data-handling <../../../../concepts/types/#data-handling>`__
or that are
`non-charN strings
<../../../../concepts/types/#primitive-types>`__, and it will
not be retained if
the server is restarted.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__
for the table and its columns.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **bad_record_table_name** --
Name of a table to which records that were rejected are
written.
The bad-record-table has the following columns: line_number
(long), line_rejected (string),
error_message (string). When *error_handling* is
*abort*, bad records table is not populated.
* **bad_record_table_limit** --
A positive integer indicating the maximum number of records
that can be
written to the bad-record-table. The default value is
'10000'.
* **bad_record_table_limit_per_input** --
For subscriptions, a positive integer indicating the maximum
number
of records that can be written to the bad-record-table per
file/payload. Default value will be
*bad_record_table_limit* and total size of the table per rank
is limited to
*bad_record_table_limit*.
* **batch_size** --
Number of records to insert per batch when inserting data.
The default value is '50000'.
* **column_formats** --
For each target column specified, applies the
column-property-bound
format to the source data loaded into that column. Each
column format will contain a mapping of one
or more of its column properties to an appropriate format for
each property. Currently supported
column properties include date, time, & datetime. The
parameter value must be formatted as a JSON
string of maps of column names to maps of column properties
to their corresponding column formats,
e.g.,
'{ "order_date" : { "date" : "%Y.%m.%d" }, "order_time" : {
"time" : "%H:%M:%S" } }'.
See *default_column_formats* for valid format syntax.
* **columns_to_load** --
Specifies a comma-delimited list of columns from the source
data to
load. If more than one file is being loaded, this list
applies to all files.
Column numbers can be specified discretely or as a range.
For example, a value of '5,7,1..3' will
insert values from the fifth column in the source data into
the first column in the target table,
from the seventh column in the source data into the second
column in the target table, and from the
first through third columns in the source data into the third
through fifth columns in the target
table.
If the source data contains a header, column names matching
the file header names may be provided
instead of column numbers. If the target table doesn't
exist, the table will be created with the
columns in this order. If the target table does exist with
columns in a different order than the
source data, this list can be used to match the order of the
target table. For example, a value of
'C, B, A' will create a three column table with column C,
followed by column B, followed by column
A; or will insert those fields in that order into a table
created with columns in that order. If
the target table exists, the column names must match the
source data field names for a name-mapping
to be successful.
Mutually exclusive with *columns_to_skip*.
* **columns_to_skip** --
Specifies a comma-delimited list of columns from the source
data to
skip. Mutually exclusive with *columns_to_load*.
* **compression_type** --
Source data compression type.
Allowed values are:
* **none** --
No compression.
* **auto** --
Auto detect compression type
* **gzip** --
gzip file compression.
* **bzip2** --
bzip2 file compression.
The default value is 'auto'.
* **datasource_name** --
Name of an existing external data source from which data
file(s) specified in input parameter *filepaths* will be
loaded
* **default_column_formats** --
Specifies the default format to be applied to source data
loaded
into columns with the corresponding column property.
Currently supported column properties include
date, time, & datetime. This default column-property-bound
format can be overridden by specifying a
column property & format for a given target column in
*column_formats*. For
each specified annotation, the format will apply to all
columns with that annotation unless a custom
*column_formats* for that annotation is specified.
The parameter value must be formatted as a JSON string that
is a map of column properties to their
respective column formats, e.g., '{ "date" : "%Y.%m.%d",
"time" : "%H:%M:%S" }'. Column
formats are specified as a string of control characters and
plain text. The supported control
characters are 'Y', 'm', 'd', 'H', 'M', 'S', and 's', which
follow the Linux 'strptime()'
specification, as well as 's', which specifies seconds and
fractional seconds (though the fractional
component will be truncated past milliseconds).
Formats for the 'date' annotation must include the 'Y', 'm',
and 'd' control characters. Formats for
the 'time' annotation must include the 'H', 'M', and either
'S' or 's' (but not both) control
characters. Formats for the 'datetime' annotation meet both
the 'date' and 'time' control character
requirements. For example, '{"datetime" : "%m/%d/%Y %H:%M:%S"
}' would be used to interpret text
as "05/04/2000 12:12:11"
* **error_handling** --
Specifies how errors should be handled upon insertion.
Allowed values are:
* **permissive** --
Records with missing columns are populated with nulls if
possible; otherwise, the malformed records are skipped.
* **ignore_bad_records** --
Malformed records are skipped.
* **abort** --
Stops current insertion and aborts entire operation when an
error is encountered. Primary key collisions are
considered abortable errors in this mode.
The default value is 'abort'.
* **external_table_type** --
Specifies whether the external table holds a local copy of
the external data.
Allowed values are:
* **materialized** --
Loads a copy of the external data into the database,
refreshed on demand
* **logical** --
External data will not be loaded into the database; the
data will be retrieved from the source upon servicing each
query against the external table
The default value is 'materialized'.
* **file_type** --
Specifies the type of the file(s) whose records will be
inserted.
Allowed values are:
* **avro** --
Avro file format
* **delimited_text** --
Delimited text file format; e.g., CSV, TSV, PSV, etc.
* **gdb** --
Esri/GDB file format
* **json** --
Json file format
* **parquet** --
Apache Parquet file format
* **shapefile** --
ShapeFile file format
The default value is 'delimited_text'.
* **gdal_configuration_options** --
Comma separated list of gdal conf options, for the specific
requets: key=value
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when
not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any record being inserted that is rejected
for having primary key values that match those of an existing
table record will be ignored with no
error generated. If *false*, the rejection of any
record for having primary key values matching an existing
record will result in an error being
reported, as determined by *error_handling*. If the
specified table does not
have a primary key or if upsert mode is in effect
(*update_on_existing_pk* is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore new records whose primary key values collide with
those of existing records
* **false** --
Treat as errors any new records whose primary key values
collide with those of existing records
The default value is 'false'.
* **ingestion_mode** --
Whether to do a full load, dry run, or perform a type
inference on the source data.
Allowed values are:
* **full** --
Run a type inference on the source data (if needed) and
ingest
* **dry_run** --
Does not load data, but walks through the source data and
determines the number of valid records, taking into account
the current mode of *error_handling*.
* **type_inference_only** --
Infer the type of the source data and return, without
ingesting any data. The inferred type is returned in the
response.
The default value is 'full'.
* **jdbc_fetch_size** --
The JDBC fetch size, which determines how many rows to fetch
per round trip. The default value is '50000'.
* **kafka_consumers_per_rank** --
Number of Kafka consumer threads per rank (valid range 1-6).
The default value is '1'.
* **kafka_group_id** --
The group id to be used when consuming data from a Kafka
topic (valid only for Kafka datasource subscriptions).
* **kafka_offset_reset_policy** --
Policy to determine whether the Kafka data consumption starts
either at earliest offset or latest offset.
Allowed values are:
* earliest
* latest
The default value is 'earliest'.
* **kafka_optimistic_ingest** --
Enable optimistic ingestion where Kafka topic offsets and
table data are committed independently to achieve
parallelism.
Allowed values are:
* true
* false
The default value is 'false'.
* **kafka_subscription_cancel_after** --
Sets the Kafka subscription lifespan (in minutes). Expired
subscription will be cancelled automatically.
* **kafka_type_inference_fetch_timeout** --
Maximum time to collect Kafka messages before type
inferencing on the set of them.
* **layer** --
Geo files layer(s) name(s): comma separated.
* **loading_mode** --
Scheme for distributing the extraction and loading of data
from the source data file(s). This option applies only when
loading files that are local to the database.
Allowed values are:
* **head** --
The head node loads all data. All files must be available
to the head node.
* **distributed_shared** --
The head node coordinates loading data by worker
processes across all nodes from shared files available to
all workers.
NOTE:
Instead of existing on a shared source, the files can be
duplicated on a source local to each host
to improve performance, though the files must appear as the
same data set from the perspective of
all hosts performing the load.
* **distributed_local** --
A single worker process on each node loads all files
that are available to it. This option works best when each
worker loads files from its own file
system, to maximize performance. In order to avoid data
duplication, either each worker performing
the load needs to have visibility to a set of files unique
to it (no file is visible to more than
one node) or the target table needs to have a primary key
(which will allow the worker to
automatically deduplicate data).
NOTE:
If the target table doesn't exist, the table structure will
be determined by the head node. If the
head node has no files local to it, it will be unable to
determine the structure and the request
will fail.
If the head node is configured to have no worker processes,
no data strictly accessible to the head
node will be loaded.
The default value is 'head'.
* **local_time_offset** --
Apply an offset to Avro local timestamp columns.
* **max_records_to_load** --
Limit the number of records to load in this request: if this
number
is larger than *batch_size*, then the number of records
loaded will be
limited to the next whole number of *batch_size* (per working
thread).
* **num_tasks_per_rank** --
Number of tasks for reading file per rank. Default will be
system configuration parameter,
external_file_reader_num_tasks.
* **poll_interval** --
If *true*, the number of
seconds between attempts to load external files into the
table. If zero, polling will be continuous
as long as data is found. If no data is found, the interval
will steadily increase to a maximum of
60 seconds. The default value is '0'.
* **primary_keys** --
Comma separated list of column names to set as primary keys,
when not specified in the type.
* **refresh_method** --
Method by which the table can be refreshed from its source
data.
Allowed values are:
* **manual** --
Refresh only occurs when manually requested by invoking the
refresh action of :meth:`GPUdb.alter_table` on this table.
* **on_start** --
Refresh table on database startup and when manually
requested by invoking the refresh action of
:meth:`GPUdb.alter_table` on this table.
The default value is 'manual'.
* **schema_registry_schema_name** --
Name of the Avro schema in the schema registry to use when
reading Avro records.
* **shard_keys** --
Comma separated list of column names to set as shard keys,
when not specified in the type.
* **skip_lines** --
Skip number of lines from begining of file.
* **subscribe** --
Continuously poll the data source to check for new data and
load it into the table.
Allowed values are:
* true
* false
The default value is 'false'.
* **table_insert_mode** --
Insertion scheme to use when inserting records from multiple
shapefiles.
Allowed values are:
* **single** --
Insert all records into a single table.
* **table_per_file** --
Insert records from each file into a new table
corresponding to that file.
The default value is 'single'.
* **text_comment_string** --
Specifies the character string that should be interpreted as
a comment line
prefix in the source data. All lines in the data starting
with the provided string are ignored.
For *delimited_text* *file_type* only. The default value is
'#'.
* **text_delimiter** --
Specifies the character delimiting field values in the source
data
and field names in the header (if present).
For *delimited_text* *file_type* only. The default value is
','.
* **text_escape_character** --
Specifies the character that is used to escape other
characters in
the source data.
An 'a', 'b', 'f', 'n', 'r', 't', or 'v' preceded by an escape
character will be interpreted as the
ASCII bell, backspace, form feed, line feed, carriage return,
horizontal tab, & vertical tab,
respectively. For example, the escape character followed by
an 'n' will be interpreted as a newline
within a field value.
The escape character can also be used to escape the quoting
character, and will be treated as an
escape character whether it is within a quoted field value or
not.
For *delimited_text* *file_type* only.
* **text_has_header** --
Indicates whether the source data contains a header row.
For *delimited_text* *file_type* only.
Allowed values are:
* true
* false
The default value is 'true'.
* **text_header_property_delimiter** --
Specifies the delimiter for
`column properties
<../../../../concepts/types/#column-properties>`__ in the
header row (if
present). Cannot be set to same value as *text_delimiter*.
For *delimited_text* *file_type* only. The default value is
'|'.
* **text_null_string** --
Specifies the character string that should be interpreted as
a null
value in the source data.
For *delimited_text* *file_type* only. The default value is
'\\N'.
* **text_quote_character** --
Specifies the character that should be interpreted as a field
value
quoting character in the source data. The character must
appear at beginning and end of field value
to take effect. Delimiters within quoted fields are treated
as literals and not delimiters. Within
a quoted field, two consecutive quote characters will be
interpreted as a single literal quote
character, effectively escaping it. To not have a quote
character, specify an empty string.
For *delimited_text* *file_type* only. The default value is
'"'.
* **text_search_columns** --
Add 'text_search' property to internally inferenced string
columns.
Comma seperated list of column names or '*' for all columns.
To add 'text_search' property only to
string columns greater than or equal to a minimum size, also
set the
*text_search_min_column_length*
* **text_search_min_column_length** --
Set the minimum column size for strings to apply the
'text_search' property to. Used only when
*text_search_columns* has a value.
* **truncate_strings** --
If set to *true*, truncate string values that are longer than
the column's type size.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_table** --
If set to *true*, truncates the table specified by input
parameter *table_name* prior to loading the file(s).
Allowed values are:
* true
* false
The default value is 'false'.
* **type_inference_mode** --
Optimize type inferencing for either speed or accuracy.
Allowed values are:
* **accuracy** --
Scans data to get exactly-typed & sized columns for all
data scanned.
* **speed** --
Scans data and picks the widest possible column types so
that 'all' values will fit with minimum data scanned
The default value is 'speed'.
* **remote_query** --
Remote SQL query from which data will be sourced
* **remote_query_filter_column** --
Name of column to be used for splitting *remote_query* into
multiple sub-queries using the data distribution of given
column
* **remote_query_increasing_column** --
Column on subscribed remote query result that will increase
for new records (e.g., TIMESTAMP).
* **remote_query_partition_column** --
Alias name for *remote_query_filter_column*.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into a
table
with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted will
be replaced by that new record (the new
data will be 'upserted'). If set to *false*,
any existing table record with primary key values that match
those of a record being inserted will
remain unchanged, while the new record will be rejected and
the error handled as determined by
*ignore_existing_pk* & *error_handling*. If the
specified table does not have a primary key, then this option
has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_id (str)
ID of the currently registered table structure `type
<../../../../concepts/types/>`__ for this external table
type_definition (str)
A JSON string describing the columns of the created external
table
type_label (str)
The user-defined description associated with the table's
structure
type_properties (dict of str to lists of str)
A mapping of each external table column name to an array of
column properties associated with that column
count_inserted (long)
Number of records inserted into the external table.
count_skipped (long)
Number of records skipped, when not running in *abort* error
handling mode.
count_updated (long)
[Not yet implemented] Number of records updated within the
external table.
info (dict of str to str)
Additional information.
files (list of str)
"""
assert isinstance( table_name, (basestring)), "create_table_external(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
filepaths = filepaths if isinstance( filepaths, list ) else ( [] if (filepaths is None) else [ filepaths ] )
assert isinstance( modify_columns, (dict)), "create_table_external(): Argument 'modify_columns' must be (one) of type(s) '(dict)'; given %s" % type( modify_columns ).__name__
assert isinstance( create_table_options, (dict)), "create_table_external(): Argument 'create_table_options' must be (one) of type(s) '(dict)'; given %s" % type( create_table_options ).__name__
assert isinstance( options, (dict)), "create_table_external(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['filepaths'] = filepaths
obj['modify_columns'] = self.__sanitize_dicts( modify_columns )
obj['create_table_options'] = self.__sanitize_dicts( create_table_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/table/external', obj, convert_to_attr_dict = True )
return response
# end create_table_external
# begin create_table_monitor
[docs] def create_table_monitor( self, table_name = None, options = {} ):
"""Creates a monitor that watches for a single table modification event
type (insert, update, or delete) on a particular table (identified by
input parameter *table_name*) and forwards event notifications to
subscribers via ZMQ.
After this call completes, subscribe to the returned output parameter
*topic_id* on the
ZMQ table monitor port (default 9002). Each time an operation of the
given type
on the table completes, a multipart message is published for that
topic; the
first part contains only the topic ID, and each subsequent part
contains one
binary-encoded Avro object that corresponds to the event and can be
decoded
using output parameter *type_schema*. The monitor will continue to run
(regardless of
whether or not there are any subscribers) until deactivated with
:meth:`GPUdb.clear_table_monitor`.
For more information on table monitors, see
`Table Monitors <../../../../concepts/table_monitors/>`__.
Parameters:
table_name (str)
Name of the table to monitor, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **event** --
Type of modification event on the target table to be
monitored by this table monitor.
Allowed values are:
* **insert** --
Get notifications of new record insertions. The new row
images are forwarded to the subscribers.
* **update** --
Get notifications of update operations. The modified row
count information is forwarded to the subscribers.
* **delete** --
Get notifications of delete operations. The deleted row
count information is forwarded to the subscribers.
The default value is 'insert'.
* **monitor_id** --
ID to use for this monitor instead of a randomly generated
one
* **datasink_name** --
Name of an existing `data sink
<../../../../concepts/data_sinks/>`__ to send change data
notifications to
* **destination** --
Destination for the output data in format
'destination_type://path[:port]'. Supported destination types
are 'http', 'https' and 'kafka'.
* **kafka_topic_name** --
Name of the Kafka topic to publish to if *destination* in
input parameter *options* is specified and is a Kafka broker
* **increasing_column** --
Column on subscribed table that will increase for new records
(e.g., TIMESTAMP).
* **expression** --
Filter expression to limit records for notification
* **refresh_method** --
Method controlling when the table monitor reports changes to
the input parameter *table_name*.
Allowed values are:
* **on_change** --
Report changes as they occur.
* **periodic** --
Report changes periodically at rate specified by
*refresh_period*.
The default value is 'on_change'.
* **refresh_period** --
When *refresh_method* is *periodic*, specifies the period in
seconds at which changes are reported.
* **refresh_start_time** --
When *refresh_method* is *periodic*, specifies the first time
at which changes are reported. Value is a datetime string
with format 'YYYY-MM-DD HH:MM:SS'.
Returns:
A dict with the following entries--
topic_id (str)
The ZMQ topic ID to subscribe to for table events.
table_name (str)
Value of input parameter *table_name*.
type_schema (str)
JSON Avro schema of the table, for use in decoding published
records.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **insert_topic_id** --
The topic id for 'insert' *event* in input parameter
*options*
* **update_topic_id** --
The topic id for 'update' *event* in input parameter
*options*
* **delete_topic_id** --
The topic id for 'delete' *event* in input parameter
*options*
* **insert_type_schema** --
The JSON Avro schema of the table in output parameter
*table_name*
* **update_type_schema** --
The JSON Avro schema for 'update' events
* **delete_type_schema** --
The JSON Avro schema for 'delete' events
"""
assert isinstance( table_name, (basestring)), "create_table_monitor(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( options, (dict)), "create_table_monitor(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/tablemonitor', obj, convert_to_attr_dict = True )
return response
# end create_table_monitor
# begin create_trigger_by_area
[docs] def create_trigger_by_area( self, request_id = None, table_names = None,
x_column_name = None, x_vector = None,
y_column_name = None, y_vector = None, options =
{} ):
"""Sets up an area trigger mechanism for two column_names for one or
more tables. (This function is essentially the two-dimensional version
of
:meth:`GPUdb.create_trigger_by_range`.) Once the trigger has been
activated, any
record added to the listed tables(s) via :meth:`GPUdb.insert_records`
with the
chosen columns' values falling within the specified region will trip
the
trigger. All such records will be queued at the trigger port (by
default '9001'
but able to be retrieved via :meth:`GPUdb.show_system_status`) for any
listening
client to collect. Active triggers can be cancelled by using the
:meth:`GPUdb.clear_trigger` endpoint or by clearing all relevant
tables.
The output returns the trigger handle as well as indicating success or
failure
of the trigger activation.
Parameters:
request_id (str)
User-created ID for the trigger. The ID can be alphanumeric,
contain symbols, and must contain at least one character.
table_names (list of str)
Names of the tables on which the trigger will be activated and
maintained, each in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
x_column_name (str)
Name of a numeric column on which the trigger is activated.
Usually 'x' for geospatial data points.
x_vector (list of floats)
The respective coordinate values for the region on which the
trigger is activated. This usually translates to the
x-coordinates of a geospatial region. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
y_column_name (str)
Name of a second numeric column on which the trigger is
activated. Usually 'y' for geospatial data points.
y_vector (list of floats)
The respective coordinate values for the region on which the
trigger is activated. This usually translates to the
y-coordinates of a geospatial region. Must be the same length
as xvals. The user can provide a single element (which will
be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
trigger_id (str)
Value of input parameter *request_id*.
info (dict of str to str)
Additional information.
"""
assert isinstance( request_id, (basestring)), "create_trigger_by_area(): Argument 'request_id' must be (one) of type(s) '(basestring)'; given %s" % type( request_id ).__name__
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
assert isinstance( x_column_name, (basestring)), "create_trigger_by_area(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
x_vector = x_vector if isinstance( x_vector, list ) else ( [] if (x_vector is None) else [ x_vector ] )
assert isinstance( y_column_name, (basestring)), "create_trigger_by_area(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
y_vector = y_vector if isinstance( y_vector, list ) else ( [] if (y_vector is None) else [ y_vector ] )
assert isinstance( options, (dict)), "create_trigger_by_area(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['request_id'] = request_id
obj['table_names'] = table_names
obj['x_column_name'] = x_column_name
obj['x_vector'] = x_vector
obj['y_column_name'] = y_column_name
obj['y_vector'] = y_vector
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/trigger/byarea', obj, convert_to_attr_dict = True )
return response
# end create_trigger_by_area
# begin create_trigger_by_range
[docs] def create_trigger_by_range( self, request_id = None, table_names = None,
column_name = None, min = None, max = None,
options = {} ):
"""Sets up a simple range trigger for a column_name for one or more
tables. Once the trigger has been activated, any record added to the
listed
tables(s) via :meth:`GPUdb.insert_records` with the chosen
column_name's value
falling within the specified range will trip the trigger. All such
records will
be queued at the trigger port (by default '9001' but able to be
retrieved via
:meth:`GPUdb.show_system_status`) for any listening client to collect.
Active
triggers can be cancelled by using the :meth:`GPUdb.clear_trigger`
endpoint or by
clearing all relevant tables.
The output returns the trigger handle as well as indicating success or
failure
of the trigger activation.
Parameters:
request_id (str)
User-created ID for the trigger. The ID can be alphanumeric,
contain symbols, and must contain at least one character.
table_names (list of str)
Tables on which the trigger will be active, each in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
column_name (str)
Name of a numeric column_name on which the trigger is
activated.
min (float)
The lower bound (inclusive) for the trigger range.
max (float)
The upper bound (inclusive) for the trigger range.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
trigger_id (str)
Value of input parameter *request_id*.
info (dict of str to str)
Additional information.
"""
assert isinstance( request_id, (basestring)), "create_trigger_by_range(): Argument 'request_id' must be (one) of type(s) '(basestring)'; given %s" % type( request_id ).__name__
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
assert isinstance( column_name, (basestring)), "create_trigger_by_range(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( min, (int, long, float)), "create_trigger_by_range(): Argument 'min' must be (one) of type(s) '(int, long, float)'; given %s" % type( min ).__name__
assert isinstance( max, (int, long, float)), "create_trigger_by_range(): Argument 'max' must be (one) of type(s) '(int, long, float)'; given %s" % type( max ).__name__
assert isinstance( options, (dict)), "create_trigger_by_range(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['request_id'] = request_id
obj['table_names'] = table_names
obj['column_name'] = column_name
obj['min'] = min
obj['max'] = max
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/trigger/byrange', obj, convert_to_attr_dict = True )
return response
# end create_trigger_by_range
# begin create_type
[docs] def create_type( self, type_definition = None, label = None, properties = {},
options = {} ):
"""Creates a new type describing the layout of a table. The type
definition is a JSON string describing the fields (i.e. columns) of the
type. Each field consists of a name and a data type. Supported data
types are: double, float, int, long, string, and bytes. In addition,
one or more properties can be specified for each column which customize
the memory usage and query availability of that column. Note that some
properties are mutually exclusive--i.e. they cannot be specified for
any given column simultaneously. One example of mutually exclusive
properties are *data* and *store_only*.
A single `primary key <../../../../concepts/tables/#primary-keys>`__
and/or single `shard key <../../../../concepts/tables/#shard-keys>`__
can be set across one or more columns. If a primary key is specified,
then a uniqueness constraint is enforced, in that only a single object
can exist with a given primary key column value (or set of values for
the key columns, if using a composite primary key). When
:meth:`inserting <GPUdb.insert_records>` data into a table with a
primary key, depending on the parameters in the request, incoming
objects with primary key values that match existing objects will either
overwrite (i.e. update) the existing object or will be skipped and not
added into the set.
Example of a type definition with some of the parameters::
{"type":"record",
"name":"point",
"fields":[{"name":"msg_id","type":"string"},
{"name":"x","type":"double"},
{"name":"y","type":"double"},
{"name":"TIMESTAMP","type":"double"},
{"name":"source","type":"string"},
{"name":"group_id","type":"string"},
{"name":"OBJECT_ID","type":"string"}]
}
Properties::
{"group_id":["store_only"],
"msg_id":["store_only","text_search"]
}
Parameters:
type_definition (str)
a JSON string describing the columns of the type to be
registered.
label (str)
A user-defined description string which can be used to
differentiate between tables and types with otherwise identical
schemas.
properties (dict of str to lists of str)
Each key-value pair specifies the properties to use for a given
column where the key is the column name. All keys used must be
relevant column names for the given table. Specifying any
property overrides the default properties for that column
(which is based on the column's data type).
Allowed values are:
* **data** --
Default property for all numeric and string type columns;
makes the column available for GPU queries.
* **text_search** --
Valid only for select 'string' columns. Enables full text
search--see `Full Text Search
<../../../../concepts/full_text_search/>`__ for details and
applicable string column types. Can be set independently of
*data* and *store_only*.
* **store_only** --
Persist the column value but do not make it available to
queries (e.g. :meth:`GPUdb.filter`)-i.e. it is mutually
exclusive to the *data* property. Any 'bytes' type column
must have a *store_only* property. This property reduces
system memory usage.
* **disk_optimized** --
Works in conjunction with the *data* property for string
columns. This property reduces system disk usage by disabling
reverse string lookups. Queries like :meth:`GPUdb.filter`,
:meth:`GPUdb.filter_by_list`, and
:meth:`GPUdb.filter_by_value` work as usual but
:meth:`GPUdb.aggregate_unique` and
:meth:`GPUdb.aggregate_group_by` are not allowed on columns
with this property.
* **timestamp** --
Valid only for 'long' columns. Indicates that this field
represents a timestamp and will be provided in milliseconds
since the Unix epoch: 00:00:00 Jan 1 1970. Dates represented
by a timestamp must fall between the year 1000 and the year
2900.
* **ulong** --
Valid only for 'string' columns. It represents an unsigned
long integer data type. The string can only be interpreted as
an unsigned long data type with minimum value of zero, and
maximum value of 18446744073709551615.
* **uuid** --
Valid only for 'string' columns. It represents an uuid data
type. Internally, it is stored as a 128-bit integer.
* **decimal** --
Valid only for 'string' columns. It represents a SQL type
NUMERIC(19, 4) data type. There can be up to 15 digits
before the decimal point and up to four digits in the
fractional part. The value can be positive or negative
(indicated by a minus sign at the beginning). This property
is mutually exclusive with the *text_search* property.
* **date** --
Valid only for 'string' columns. Indicates that this field
represents a date and will be provided in the format
'YYYY-MM-DD'. The allowable range is 1000-01-01 through
2900-01-01. This property is mutually exclusive with the
*text_search* property.
* **time** --
Valid only for 'string' columns. Indicates that this field
represents a time-of-day and will be provided in the format
'HH:MM:SS.mmm'. The allowable range is 00:00:00.000 through
23:59:59.999. This property is mutually exclusive with the
*text_search* property.
* **datetime** --
Valid only for 'string' columns. Indicates that this field
represents a datetime and will be provided in the format
'YYYY-MM-DD HH:MM:SS.mmm'. The allowable range is 1000-01-01
00:00:00.000 through 2900-01-01 23:59:59.999. This property
is mutually exclusive with the *text_search* property.
* **char1** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 1 character.
* **char2** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 2 characters.
* **char4** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 4 characters.
* **char8** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 8 characters.
* **char16** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 16 characters.
* **char32** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 32 characters.
* **char64** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 64 characters.
* **char128** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 128 characters.
* **char256** --
This property provides optimized memory, disk and query
performance for string columns. Strings with this property
must be no longer than 256 characters.
* **boolean** --
This property provides optimized memory and query performance
for int columns. Ints with this property must be between 0
and 1(inclusive)
* **int8** --
This property provides optimized memory and query performance
for int columns. Ints with this property must be between -128
and +127 (inclusive)
* **int16** --
This property provides optimized memory and query performance
for int columns. Ints with this property must be between
-32768 and +32767 (inclusive)
* **ipv4** --
This property provides optimized memory, disk and query
performance for string columns representing IPv4 addresses
(i.e. 192.168.1.1). Strings with this property must be of the
form: A.B.C.D where A, B, C and D are in the range of 0-255.
* **wkt** --
Valid only for 'string' and 'bytes' columns. Indicates that
this field contains geospatial geometry objects in Well-Known
Text (WKT) or Well-Known Binary (WKB) format.
* **primary_key** --
This property indicates that this column will be part of (or
the entire) `primary key
<../../../../concepts/tables/#primary-keys>`__.
* **shard_key** --
This property indicates that this column will be part of (or
the entire) `shard key
<../../../../concepts/tables/#shard-keys>`__.
* **nullable** --
This property indicates that this column is nullable.
However, setting this property is insufficient for making the
column nullable. The user must declare the type of the
column as a union between its regular type and 'null' in the
avro schema for the record type in input parameter
*type_definition*. For example, if a column is of type
integer and is nullable, then the entry for the column in the
avro schema must be: ['int', 'null'].
The C++, C#, Java, and Python APIs have built-in convenience
for bypassing setting the avro schema by hand. For those
languages, one can use this property as usual and not have to
worry about the avro schema for the record.
* **dict** --
This property indicates that this column should be
`dictionary encoded
<../../../../concepts/dictionary_encoding/>`__. It can only
be used in conjunction with restricted string (charN), int,
long or date columns. Dictionary encoding is best for columns
where the cardinality (the number of unique values) is
expected to be low. This property can save a large amount of
memory.
* **init_with_now** --
For 'date', 'time', 'datetime', or 'timestamp' column types,
replace empty strings and invalid timestamps with 'NOW()'
upon insert.
* **init_with_uuid** --
For 'uuid' type, replace empty strings and invalid UUID
values with randomly-generated UUIDs upon insert.
The default value is an empty dict ( {} ).
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
type_id (str)
An identifier representing the created type. This type_id can
be used in subsequent calls to :meth:`create a table
<GPUdb.create_table>`
type_definition (str)
Value of input parameter *type_definition*.
label (str)
Value of input parameter *label*.
properties (dict of str to lists of str)
Value of input parameter *properties*.
info (dict of str to str)
Additional information.
"""
assert isinstance( type_definition, (basestring)), "create_type(): Argument 'type_definition' must be (one) of type(s) '(basestring)'; given %s" % type( type_definition ).__name__
assert isinstance( label, (basestring)), "create_type(): Argument 'label' must be (one) of type(s) '(basestring)'; given %s" % type( label ).__name__
assert isinstance( properties, (dict)), "create_type(): Argument 'properties' must be (one) of type(s) '(dict)'; given %s" % type( properties ).__name__
assert isinstance( options, (dict)), "create_type(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['type_definition'] = type_definition
obj['label'] = label
obj['properties'] = self.__sanitize_dicts( properties )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/type', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create a record type for this type and save it
record_type = RecordType.from_type_schema( response.label, response.type_definition, response.properties )
self.save_known_type( response.type_id, record_type)
return response
# end create_type
# begin create_union
[docs] def create_union( self, table_name = None, table_names = None,
input_column_names = None, output_column_names = None,
options = {} ):
"""Merges data from one or more tables with comparable data types into a
new table.
The following merges are supported:
UNION (DISTINCT/ALL) - For data set union details and examples, see
`Union <../../../../concepts/unions/>`__. For limitations, see `Union
Limitations and Cautions
<../../../../concepts/unions/#limitations-and-cautions>`__.
INTERSECT (DISTINCT/ALL) - For data set intersection details and
examples, see `Intersect <../../../../concepts/intersect/>`__. For
limitations, see `Intersect Limitations
<../../../../concepts/intersect/#limitations>`__.
EXCEPT (DISTINCT/ALL) - For data set subtraction details and examples,
see `Except <../../../../concepts/except/>`__. For limitations, see
`Except Limitations <../../../../concepts/except/#limitations>`__.
MERGE VIEWS - For a given set of `filtered views
<../../../../concepts/filtered_views/>`__ on a single table, creates a
single filtered view containing all of the unique records across all of
the given filtered data sets.
Non-charN 'string' and 'bytes' column types cannot be merged, nor can
columns marked as `store-only
<../../../../concepts/types/#data-handling>`__.
Parameters:
table_name (str)
Name of the table to be created, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
table_names (list of str)
The list of table names to merge, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
contain the names of one or more existing tables. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
input_column_names (list of lists of str)
The list of columns from each of the corresponding input
tables. The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
output_column_names (list of str)
The list of names of the columns to be stored in the output
table. The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*table_name*. If *persist* is *false* (or unspecified), then
this is always allowed even if the caller does not have
permission to create tables. The generated name is returned
in *qualified_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
projection as part of input parameter *table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of the schema for the output table. If
the schema provided is non-existent, it will be automatically
created. The default value is ''.
* **mode** --
If *merge_views*, then this operation will merge the provided
views. All input parameter *table_names* must be views from
the same underlying base table.
Allowed values are:
* **union_all** --
Retains all rows from the specified tables.
* **union** --
Retains all unique rows from the specified tables (synonym
for *union_distinct*).
* **union_distinct** --
Retains all unique rows from the specified tables.
* **except** --
Retains all unique rows from the first table that do not
appear in the second table (only works on 2 tables).
* **except_all** --
Retains all rows(including duplicates) from the first table
that do not appear in the second table (only works on 2
tables).
* **intersect** --
Retains all unique rows that appear in both of the
specified tables (only works on 2 tables).
* **intersect_all** --
Retains all rows(including duplicates) that appear in both
of the specified tables (only works on 2 tables).
* **merge_views** --
Merge two or more views (or views of views) of the same
base data set into a new view. If this mode is selected
input parameter *input_column_names* AND input parameter
*output_column_names* must be empty. The resulting view
would match the results of a SQL OR operation, e.g., if
filter 1 creates a view using the expression 'x = 20' and
filter 2 creates a view using the expression 'x <= 10',
then the merge views operation creates a new view using the
expression 'x = 20 OR x <= 10'.
The default value is 'union_all'.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
output table.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the output table. The columns specified must be present in
input parameter *output_column_names*.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the output
table specified in input parameter *table_name*.
* **persist** --
If *true*, then the output table specified in input parameter
*table_name* will be persisted and will not expire unless a
*ttl* is specified. If *false*, then the output table will
be an in-memory table and will expire unless a *ttl* is
specified otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **view_id** --
ID of view of which this output table is a member. The
default value is ''.
* **force_replicated** --
If *true*, then the output table specified in input parameter
*table_name* will be replicated even if the source tables are
not.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **count** --
Number of records in the final table
* **qualified_table_name** --
The fully qualified name of the result table (i.e. including
the schema)
"""
assert isinstance( table_name, (basestring)), "create_union(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
input_column_names = input_column_names if isinstance( input_column_names, list ) else ( [] if (input_column_names is None) else [ input_column_names ] )
output_column_names = output_column_names if isinstance( output_column_names, list ) else ( [] if (output_column_names is None) else [ output_column_names ] )
assert isinstance( options, (dict)), "create_union(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['table_names'] = table_names
obj['input_column_names'] = input_column_names
obj['output_column_names'] = output_column_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/union', obj, convert_to_attr_dict = True )
return response
# end create_union
# begin create_user_external
[docs] def create_user_external( self, name = None, options = {} ):
"""Creates a new external user (a user whose credentials are managed by an
external LDAP).
.. note::
This method should be used for on-premise deployments only.
Parameters:
name (str)
Name of the user to be created. Must exactly match the user's
name in the external LDAP, prefixed with a @. Must not be the
same name as an existing user.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **resource_group** --
Name of an existing resource group to associate with this
user
* **default_schema** --
Default schema to associate with this user
* **create_home_directory** --
When *true*, a home directory in KiFS is created for this
user.
Allowed values are:
* true
* false
The default value is 'true'.
* **directory_data_limit** --
The maximum capacity to apply to the created directory if
*create_home_directory* is *true*. Set to -1 to indicate no
upper limit. If empty, the system default limit is applied.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "create_user_external(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "create_user_external(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/user/external', obj, convert_to_attr_dict = True )
return response
# end create_user_external
# begin create_user_internal
[docs] def create_user_internal( self, name = None, password = None, options = {} ):
"""Creates a new internal user (a user whose credentials are managed by
the database system).
Parameters:
name (str)
Name of the user to be created. Must contain only lowercase
letters, digits, and underscores, and cannot begin with a
digit. Must not be the same name as an existing user or role.
password (str)
Initial password of the user to be created. May be an empty
string for no password.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **resource_group** --
Name of an existing resource group to associate with this
user
* **default_schema** --
Default schema to associate with this user
* **create_home_directory** --
When *true*, a home directory in KiFS is created for this
user.
Allowed values are:
* true
* false
The default value is 'true'.
* **directory_data_limit** --
The maximum capacity to apply to the created directory if
*create_home_directory* is *true*. Set to -1 to indicate no
upper limit. If empty, the system default limit is applied.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
"""
assert isinstance( name, (basestring)), "create_user_internal(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( password, (basestring)), "create_user_internal(): Argument 'password' must be (one) of type(s) '(basestring)'; given %s" % type( password ).__name__
assert isinstance( options, (dict)), "create_user_internal(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['password'] = password
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/user/internal', obj, convert_to_attr_dict = True )
return response
# end create_user_internal
# begin create_video
[docs] def create_video( self, attribute = None, begin = None, duration_seconds = None,
end = None, frames_per_second = None, style = None, path =
None, style_parameters = None, options = {} ):
"""Creates a job to generate a sequence of raster images that visualize
data over a specified time.
Parameters:
attribute (str)
The animated attribute to map to the video's frames. Must be
present in the LAYERS specified for the visualization. This is
often a time-related field but may be any numeric type.
begin (str)
The start point for the video. Accepts an expression evaluable
over the input parameter *attribute*.
duration_seconds (float)
Seconds of video to produce
end (str)
The end point for the video. Accepts an expression evaluable
over the input parameter *attribute*.
frames_per_second (float)
The presentation frame rate of the encoded video in frames per
second.
style (str)
The name of the visualize mode; should correspond to the schema
used for the input parameter *style_parameters* field.
Allowed values are:
* chart
* raster
* classbreak
* contour
* heatmap
* labels
path (str)
Fully-qualified `KiFS <../../../../tools/kifs/>`__ path. Write
access is required. A file must not exist at that path, unless
*replace_if_exists* is *true*.
style_parameters (str)
A string containing the JSON-encoded visualize request. Must
correspond to the visualize mode specified in the input
parameter *style* field.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the video.
* **window** --
Specified using the data-type corresponding to the input
parameter *attribute*. For a window of size W, a video frame
rendered for time t will visualize data in the interval
[t-W,t]. The minimum window size is the interval between
successive frames. The minimum value is the default. If a
value less than the minimum value is specified, it is
replaced with the minimum window size. Larger values will
make changes throughout the video appear more smooth while
smaller values will capture fast variations in the data.
* **no_error_if_exists** --
If *true*, does not return an error if the video already
exists. Ignored if *replace_if_exists* is *true*.
Allowed values are:
* false
* true
The default value is 'false'.
* **replace_if_exists** --
If *true*, deletes any existing video with the same path
before creating a new video.
Allowed values are:
* false
* true
The default value is 'false'.
Returns:
A dict with the following entries--
job_id (long)
An identifier for the created job.
path (str)
Fully qualified KIFS path to the video file.
info (dict of str to str)
Additional information.
"""
assert isinstance( attribute, (basestring)), "create_video(): Argument 'attribute' must be (one) of type(s) '(basestring)'; given %s" % type( attribute ).__name__
assert isinstance( begin, (basestring)), "create_video(): Argument 'begin' must be (one) of type(s) '(basestring)'; given %s" % type( begin ).__name__
assert isinstance( duration_seconds, (int, long, float)), "create_video(): Argument 'duration_seconds' must be (one) of type(s) '(int, long, float)'; given %s" % type( duration_seconds ).__name__
assert isinstance( end, (basestring)), "create_video(): Argument 'end' must be (one) of type(s) '(basestring)'; given %s" % type( end ).__name__
assert isinstance( frames_per_second, (int, long, float)), "create_video(): Argument 'frames_per_second' must be (one) of type(s) '(int, long, float)'; given %s" % type( frames_per_second ).__name__
assert isinstance( style, (basestring)), "create_video(): Argument 'style' must be (one) of type(s) '(basestring)'; given %s" % type( style ).__name__
assert isinstance( path, (basestring)), "create_video(): Argument 'path' must be (one) of type(s) '(basestring)'; given %s" % type( path ).__name__
assert isinstance( style_parameters, (basestring)), "create_video(): Argument 'style_parameters' must be (one) of type(s) '(basestring)'; given %s" % type( style_parameters ).__name__
assert isinstance( options, (dict)), "create_video(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['attribute'] = attribute
obj['begin'] = begin
obj['duration_seconds'] = duration_seconds
obj['end'] = end
obj['frames_per_second'] = frames_per_second
obj['style'] = style
obj['path'] = path
obj['style_parameters'] = style_parameters
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/create/video', obj, convert_to_attr_dict = True )
return response
# end create_video
# begin delete_directory
[docs] def delete_directory( self, directory_name = None, options = {} ):
"""Deletes a directory from `KiFS <../../../../tools/kifs/>`__.
Parameters:
directory_name (str)
Name of the directory in KiFS to be deleted. The directory must
contain no files, unless *recursive* is *true*
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **recursive** --
If *true*, will delete directory and all files residing in
it. If false, directory must be empty for deletion.
Allowed values are:
* true
* false
The default value is 'false'.
* **no_error_if_not_exists** --
If *true*, no error is returned if specified directory does
not exist.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
directory_name (str)
Value of input parameter *directory_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( directory_name, (basestring)), "delete_directory(): Argument 'directory_name' must be (one) of type(s) '(basestring)'; given %s" % type( directory_name ).__name__
assert isinstance( options, (dict)), "delete_directory(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['directory_name'] = directory_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/directory', obj, convert_to_attr_dict = True )
return response
# end delete_directory
# begin delete_files
[docs] def delete_files( self, file_names = None, options = {} ):
"""Deletes one or more files from `KiFS <../../../../tools/kifs/>`__.
Parameters:
file_names (list of str)
An array of names of files to be deleted. File paths may
contain wildcard characters after the KiFS directory delimeter.
Accepted wildcard characters are asterisk (*) to represent any
string of zero or more characters, and question mark (?) to
indicate a single character. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true*, no error is returned if a specified file does not
exist.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
file_names (list of str)
Names of the files deleted from KiFS
info (dict of str to str)
Additional information.
"""
file_names = file_names if isinstance( file_names, list ) else ( [] if (file_names is None) else [ file_names ] )
assert isinstance( options, (dict)), "delete_files(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['file_names'] = file_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/files', obj, convert_to_attr_dict = True )
return response
# end delete_files
# begin delete_graph
[docs] def delete_graph( self, graph_name = None, options = {} ):
"""Deletes an existing graph from the graph server and/or persist.
Parameters:
graph_name (str)
Name of the graph to be deleted.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **delete_persist** --
If set to *true*, the graph is removed from the server and
persist. If set to *false*, the graph is removed from the
server but is left in persist. The graph can be reloaded from
persist if it is recreated with the same 'graph_name'.
Allowed values are:
* true
* false
The default value is 'true'.
* **server_id** --
Indicates which graph server(s) to send the request to.
Default is to send to get information about all the servers.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful deletion.
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "delete_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( options, (dict)), "delete_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/graph', obj, convert_to_attr_dict = True )
return response
# end delete_graph
# begin delete_proc
[docs] def delete_proc( self, proc_name = None, options = {} ):
"""Deletes a proc. Any currently running instances of the proc will be
killed.
Parameters:
proc_name (str)
Name of the proc to be deleted. Must be the name of a currently
existing proc.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
proc_name (str)
Value of input parameter *proc_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( proc_name, (basestring)), "delete_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( options, (dict)), "delete_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['proc_name'] = proc_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/proc', obj, convert_to_attr_dict = True )
return response
# end delete_proc
# begin delete_records
[docs] def delete_records( self, table_name = None, expressions = None, options = {} ):
"""Deletes record(s) matching the provided criteria from the given table.
The record selection criteria can either be one or more input
parameter *expressions* (matching multiple records), a single record
identified by *record_id* options, or all records when using
*delete_all_records*. Note that the three selection criteria are
mutually exclusive. This operation cannot be run on a view. The
operation is synchronous meaning that a response will not be available
until the request is completely processed and all the matching records
are deleted.
Parameters:
table_name (str)
Name of the table from which to delete records, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
contain the name of an existing table; not applicable to views.
expressions (list of str)
A list of the actual predicates, one for each select; format
should follow the guidelines provided `here
<../../../../concepts/expressions/>`__. Specifying one or more
input parameter *expressions* is mutually exclusive to
specifying *record_id* in the input parameter *options*. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **global_expression** --
An optional global expression to reduce the search space of
the input parameter *expressions*. The default value is ''.
* **record_id** --
A record ID identifying a single record, obtained at the time
of :meth:`insertion of the record <GPUdb.insert_records>` or
by calling :meth:`GPUdb.get_records_from_collection` with the
*return_record_ids* option. This option cannot be used to
delete records from `replicated
<../../../../concepts/tables/#replication>`__ tables.
* **delete_all_records** --
If set to *true*, all records in the table will be deleted.
If set to *false*, then the option is effectively ignored.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
count_deleted (long)
Total number of records deleted across all expressions.
counts_deleted (list of longs)
Total number of records deleted per expression.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "delete_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
expressions = expressions if isinstance( expressions, list ) else ( [] if (expressions is None) else [ expressions ] )
assert isinstance( options, (dict)), "delete_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['expressions'] = expressions
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/records', obj, convert_to_attr_dict = True )
return response
# end delete_records
# begin delete_resource_group
[docs] def delete_resource_group( self, name = None, options = {} ):
"""Deletes a resource group.
Parameters:
name (str)
Name of the resource group to be deleted.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **cascade_delete** --
If *true*, delete any existing entities owned by this group.
Otherwise this request will return an error of any such
entities exist.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "delete_resource_group(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "delete_resource_group(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/resourcegroup', obj, convert_to_attr_dict = True )
return response
# end delete_resource_group
# begin delete_role
[docs] def delete_role( self, name = None, options = {} ):
"""Deletes an existing role.
.. note::
This method should be used for on-premise deployments only.
Parameters:
name (str)
Name of the role to be deleted. Must be an existing role.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "delete_role(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "delete_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/role', obj, convert_to_attr_dict = True )
return response
# end delete_role
# begin delete_user
[docs] def delete_user( self, name = None, options = {} ):
"""Deletes an existing user.
.. note::
This method should be used for on-premise deployments only.
Parameters:
name (str)
Name of the user to be deleted. Must be an existing user.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "delete_user(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "delete_user(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/delete/user', obj, convert_to_attr_dict = True )
return response
# end delete_user
# begin download_files
[docs] def download_files( self, file_names = None, read_offsets = None, read_lengths =
None, options = {} ):
"""Downloads one or more files from `KiFS <../../../../tools/kifs/>`__.
Parameters:
file_names (list of str)
An array of the file names to download from KiFS. File paths
may contain wildcard characters after the KiFS directory
delimeter.
Accepted wildcard characters are asterisk (*) to represent any
string of zero or more characters, and question mark (?) to
indicate a single character. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
read_offsets (list of longs)
An array of starting byte offsets from which to read each
respective file in input parameter *file_names*. Must either be
empty or the same length
as input parameter *file_names*. If empty, files are downloaded
in their entirety. If not
empty, input parameter *read_lengths* must also not be empty.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
read_lengths (list of longs)
Array of number of bytes to read from each respective file
in input parameter *file_names*. Must either be empty or the
same length as
input parameter *file_names*. If empty, files are downloaded in
their entirety. If not
empty, input parameter *read_offsets* must also not be empty.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **file_encoding** --
Encoding to be applied to the output file data. When using
JSON serialization it is recommended to specify this as
*base64*.
Allowed values are:
* **base64** --
Apply base64 encoding to the output file data.
* **none** --
Do not apply any encoding to the output file data.
The default value is 'none'.
Returns:
A dict with the following entries--
file_names (list of str)
Names of the files downloaded from KiFS
file_data (list of str)
Data for the respective downloaded files listed in output
parameter *file_names*
info (dict of str to str)
Additional information.
"""
file_names = file_names if isinstance( file_names, list ) else ( [] if (file_names is None) else [ file_names ] )
read_offsets = read_offsets if isinstance( read_offsets, list ) else ( [] if (read_offsets is None) else [ read_offsets ] )
read_lengths = read_lengths if isinstance( read_lengths, list ) else ( [] if (read_lengths is None) else [ read_lengths ] )
assert isinstance( options, (dict)), "download_files(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['file_names'] = file_names
obj['read_offsets'] = read_offsets
obj['read_lengths'] = read_lengths
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/download/files', obj, convert_to_attr_dict = True )
return response
# end download_files
# begin drop_container_registry
def drop_container_registry( self, registry_name = None, options = {} ):
assert isinstance( registry_name, (basestring)), "drop_container_registry(): Argument 'registry_name' must be (one) of type(s) '(basestring)'; given %s" % type( registry_name ).__name__
assert isinstance( options, (dict)), "drop_container_registry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['registry_name'] = registry_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/container/registry', obj, convert_to_attr_dict = True )
return response
# end drop_container_registry
# begin drop_credential
[docs] def drop_credential( self, credential_name = None, options = {} ):
"""Drop an existing `credential <../../../../concepts/credentials/>`__.
Parameters:
credential_name (str)
Name of the credential to be dropped. Must be an existing
credential.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
credential_name (str)
Value of input parameter *credential_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( credential_name, (basestring)), "drop_credential(): Argument 'credential_name' must be (one) of type(s) '(basestring)'; given %s" % type( credential_name ).__name__
assert isinstance( options, (dict)), "drop_credential(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['credential_name'] = credential_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/credential', obj, convert_to_attr_dict = True )
return response
# end drop_credential
# begin drop_datasink
[docs] def drop_datasink( self, name = None, options = {} ):
"""Drops an existing `data sink <../../../../concepts/data_sinks/>`__.
By default, if any `table monitors
<../../../../concepts/table_monitors>`__ use this
sink as a destination, the request will be blocked unless option
*clear_table_monitors* is
*true*.
Parameters:
name (str)
Name of the data sink to be dropped. Must be an existing data
sink.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **clear_table_monitors** --
If *true*, any `table monitors
<../../../../concepts/table_monitors/>`__ that use this data
sink will be cleared.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "drop_datasink(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "drop_datasink(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/datasink', obj, convert_to_attr_dict = True )
return response
# end drop_datasink
# begin drop_datasource
[docs] def drop_datasource( self, name = None, options = {} ):
"""Drops an existing `data source <../../../../concepts/data_sources/>`__.
Any external
tables that depend on the data source must be dropped before it can be
dropped.
Parameters:
name (str)
Name of the data source to be dropped. Must be an existing data
source.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "drop_datasource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "drop_datasource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/datasource', obj, convert_to_attr_dict = True )
return response
# end drop_datasource
# begin drop_environment
[docs] def drop_environment( self, environment_name = None, options = {} ):
"""Drop an existing `user-defined function <../../../../concepts/udf/>`__
(UDF) environment.
Parameters:
environment_name (str)
Name of the environment to be dropped. Must be an existing
environment.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true* and if the environment specified in input parameter
*environment_name* does not exist, no error is returned. If
*false* and if the environment specified in input parameter
*environment_name* does not exist, then an error is returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
environment_name (str)
Value of input parameter *environment_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( environment_name, (basestring)), "drop_environment(): Argument 'environment_name' must be (one) of type(s) '(basestring)'; given %s" % type( environment_name ).__name__
assert isinstance( options, (dict)), "drop_environment(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['environment_name'] = environment_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/environment', obj, convert_to_attr_dict = True )
return response
# end drop_environment
# begin drop_model
def drop_model( self, model_name = None, options = {} ):
assert isinstance( model_name, (basestring)), "drop_model(): Argument 'model_name' must be (one) of type(s) '(basestring)'; given %s" % type( model_name ).__name__
assert isinstance( options, (dict)), "drop_model(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['model_name'] = model_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/model', obj, convert_to_attr_dict = True )
return response
# end drop_model
# begin drop_schema
[docs] def drop_schema( self, schema_name = None, options = {} ):
"""Drops an existing SQL-style `schema <../../../../concepts/schemas/>`__,
specified in input parameter *schema_name*.
Parameters:
schema_name (str)
Name of the schema to be dropped. Must be an existing schema.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true* and if the schema specified in input parameter
*schema_name* does not exist, no error is returned. If
*false* and if the schema specified in input parameter
*schema_name* does not exist, then an error is returned.
Allowed values are:
* true
* false
The default value is 'false'.
* **cascade** --
If *true*, all tables within the schema will be dropped. If
*false*, the schema will be dropped only if empty.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
schema_name (str)
Value of input parameter *schema_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( schema_name, (basestring)), "drop_schema(): Argument 'schema_name' must be (one) of type(s) '(basestring)'; given %s" % type( schema_name ).__name__
assert isinstance( options, (dict)), "drop_schema(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['schema_name'] = schema_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/drop/schema', obj, convert_to_attr_dict = True )
return response
# end drop_schema
# begin evaluate_model
def evaluate_model( self, model_name = None, replicas = None, deployment_mode =
None, source_table = None, destination_table = None,
options = {} ):
assert isinstance( model_name, (basestring)), "evaluate_model(): Argument 'model_name' must be (one) of type(s) '(basestring)'; given %s" % type( model_name ).__name__
assert isinstance( replicas, (int, long, float)), "evaluate_model(): Argument 'replicas' must be (one) of type(s) '(int, long, float)'; given %s" % type( replicas ).__name__
assert isinstance( deployment_mode, (basestring)), "evaluate_model(): Argument 'deployment_mode' must be (one) of type(s) '(basestring)'; given %s" % type( deployment_mode ).__name__
assert isinstance( source_table, (basestring)), "evaluate_model(): Argument 'source_table' must be (one) of type(s) '(basestring)'; given %s" % type( source_table ).__name__
assert isinstance( destination_table, (basestring)), "evaluate_model(): Argument 'destination_table' must be (one) of type(s) '(basestring)'; given %s" % type( destination_table ).__name__
assert isinstance( options, (dict)), "evaluate_model(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['model_name'] = model_name
obj['replicas'] = replicas
obj['deployment_mode'] = deployment_mode
obj['source_table'] = source_table
obj['destination_table'] = destination_table
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/evaluate/model', obj, convert_to_attr_dict = True )
return response
# end evaluate_model
# begin execute_proc
[docs] def execute_proc( self, proc_name = None, params = {}, bin_params = {},
input_table_names = [], input_column_names = {},
output_table_names = [], options = {} ):
"""Executes a proc. This endpoint is asynchronous and does not wait for
the proc to complete before returning.
If the proc being executed is distributed, input parameter
*input_table_names* &
input parameter *input_column_names* may be passed to the proc to use
for reading data,
and input parameter *output_table_names* may be passed to the proc to
use for writing
data.
If the proc being executed is non-distributed, these table parameters
will be
ignored.
Parameters:
proc_name (str)
Name of the proc to execute. Must be the name of a currently
existing proc.
params (dict of str to str)
A map containing named parameters to pass to the proc. Each
key/value pair specifies the name of a parameter and its value.
The default value is an empty dict ( {} ).
bin_params (dict of str to str)
A map containing named binary parameters to pass to the proc.
Each key/value pair specifies the name of a parameter and its
value. The default value is an empty dict ( {} ).
input_table_names (list of str)
Names of the tables containing data to be passed to the
proc. Each name specified must be the name of a currently
existing table, in
[schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
If no table names are specified, no data will be passed to the
proc. This
parameter is ignored if the proc has a non-distributed
execution mode. The default value is an empty list ( [] ).
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
input_column_names (dict of str to lists of str)
Map of table names from input parameter *input_table_names* to
lists
of names of columns from those tables that will be passed to
the proc. Each
column name specified must be the name of an existing column in
the
corresponding table. If a table name from input parameter
*input_table_names* is not
included, all columns from that table will be passed to the
proc. This
parameter is ignored if the proc has a non-distributed
execution mode. The default value is an empty dict ( {} ).
output_table_names (list of str)
Names of the tables to which output data from the proc will
be written, each in [schema_name.]table_name format, using
standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__
and meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
If a specified table does not exist, it will automatically be
created with the
same schema as the corresponding table (by order) from
input parameter *input_table_names*, excluding any primary and
shard keys. If a specified
table is a non-persistent result table, it must not have
primary or shard keys.
If no table names are specified, no output data can be returned
from the proc.
This parameter is ignored if the proc has a non-distributed
execution mode. The default value is an empty list ( [] ).
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **cache_input** --
A comma-delimited list of table names from input parameter
*input_table_names* from which input data will be cached for
use in subsequent calls to :meth:`GPUdb.execute_proc` with
the *use_cached_input* option. Cached input data will be
retained until the proc status is cleared with the
:meth:`clear_complete <GPUdb.show_proc_status>` option of
:meth:`GPUdb.show_proc_status` and all proc instances using
the cached data have completed. The default value is ''.
* **use_cached_input** --
A comma-delimited list of run IDs (as returned from prior
calls to :meth:`GPUdb.execute_proc`) of running or completed
proc instances from which input data cached using the
*cache_input* option will be used. Cached input data will not
be used for any tables specified in input parameter
*input_table_names*, but data from all other tables cached
for the specified run IDs will be passed to the proc. If the
same table was cached for multiple specified run IDs, the
cached data from the first run ID specified in the list that
includes that table will be used. The default value is ''.
* **run_tag** --
A string that, if not empty, can be used in subsequent calls
to :meth:`GPUdb.show_proc_status` or :meth:`GPUdb.kill_proc`
to identify the proc instance. The default value is ''.
* **max_output_lines** --
The maximum number of lines of output from stdout and stderr
to return via :meth:`GPUdb.show_proc_status`. If the number
of lines output exceeds the maximum, earlier lines are
discarded. The default value is '100'.
* **execute_at_startup** --
If *true*, an instance of the proc will run when the database
is started instead of running immediately. The output
parameter *run_id* can be retrieved using
:meth:`GPUdb.show_proc` and used in
:meth:`GPUdb.show_proc_status`.
Allowed values are:
* true
* false
The default value is 'false'.
* **execute_at_startup_as** --
Sets the alternate user name to execute this proc instance as
when *execute_at_startup* is *true*. The default value is
''.
Returns:
A dict with the following entries--
run_id (str)
The run ID of the running proc instance. This may be passed to
:meth:`GPUdb.show_proc_status` to obtain status information, or
:meth:`GPUdb.kill_proc` to kill the proc instance.
info (dict of str to str)
Additional information.
"""
assert isinstance( proc_name, (basestring)), "execute_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( params, (dict)), "execute_proc(): Argument 'params' must be (one) of type(s) '(dict)'; given %s" % type( params ).__name__
assert isinstance( bin_params, (dict)), "execute_proc(): Argument 'bin_params' must be (one) of type(s) '(dict)'; given %s" % type( bin_params ).__name__
input_table_names = input_table_names if isinstance( input_table_names, list ) else ( [] if (input_table_names is None) else [ input_table_names ] )
assert isinstance( input_column_names, (dict)), "execute_proc(): Argument 'input_column_names' must be (one) of type(s) '(dict)'; given %s" % type( input_column_names ).__name__
output_table_names = output_table_names if isinstance( output_table_names, list ) else ( [] if (output_table_names is None) else [ output_table_names ] )
assert isinstance( options, (dict)), "execute_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['proc_name'] = proc_name
obj['params'] = self.__sanitize_dicts( params )
obj['bin_params'] = self.__sanitize_dicts( bin_params )
obj['input_table_names'] = input_table_names
obj['input_column_names'] = self.__sanitize_dicts( input_column_names )
obj['output_table_names'] = output_table_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/execute/proc', obj, convert_to_attr_dict = True )
return response
# end execute_proc
# begin execute_sql
[docs] def execute_sql( self, statement = None, offset = 0, limit = -9999, encoding =
'binary', request_schema_str = '', data = [], options = {}
):
"""Execute a SQL statement (query, DML, or DDL).
See `SQL Support <../../../../sql/>`__ for the complete set of
supported SQL commands.
Parameters:
statement (str)
SQL statement (query, DML, or DDL) to be executed
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records; either 'binary' or
'json'.
Allowed values are:
* binary
* json
The default value is 'binary'.
request_schema_str (str)
Avro schema of input parameter *data*. The default value is
''.
data (list of str)
An array of binary-encoded data for the records to be binded to
the SQL query. Or use *query_parameters* to pass the data in
JSON format. The default value is an empty list ( [] ). The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **cost_based_optimization** --
If *false*, disables the cost-based optimization of the given
query.
Allowed values are:
* true
* false
The default value is 'false'.
* **distributed_joins** --
If *true*, enables the use of distributed joins in servicing
the given query. Any query requiring a distributed join will
succeed, though hints can be used in the query to change the
distribution of the source data to allow the query to
succeed.
Allowed values are:
* true
* false
The default value is 'false'.
* **distributed_operations** --
If *true*, enables the use of distributed operations in
servicing the given query. Any query requiring a distributed
join will succeed, though hints can be used in the query to
change the distribution of the source data to allow the query
to succeed.
Allowed values are:
* true
* false
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into or updating a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only
used when primary key record collisions are rejected
(*update_on_existing_pk*
is *false*). If set to
*true*, any record insert/update that is rejected
for resulting in a primary key collision with an existing
table record will be ignored with no error
generated. If *false*, the rejection of any
insert/update for resulting in a primary key collision will
cause an error to be reported. If the
specified table does not have a primary key or if
*update_on_existing_pk* is
*true*, then this option has no effect.
Allowed values are:
* **true** --
Ignore inserts/updates that result in primary key
collisions with existing records
* **false** --
Treat as errors any inserts/updates that result in primary
key collisions with existing records
The default value is 'false'.
* **late_materialization** --
If *true*, Joins/Filters results will always be materialized
( saved to result tables format).
Allowed values are:
* true
* false
The default value is 'false'.
* **paging_table** --
When empty or the specified paging table not exists, the
system will create a paging table and return when query
output has more records than the user asked. If the paging
table exists in the system, the records from the paging table
are returned without evaluating the query.
* **paging_table_ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the paging
table.
* **parallel_execution** --
If *false*, disables the parallel step execution of the given
query.
Allowed values are:
* true
* false
The default value is 'true'.
* **plan_cache** --
If *false*, disables plan caching for the given query.
Allowed values are:
* true
* false
The default value is 'true'.
* **prepare_mode** --
If *true*, compiles a query into an execution plan and saves
it in query cache. Query execution is not performed and an
empty response will be returned to user.
Allowed values are:
* true
* false
The default value is 'false'.
* **preserve_dict_encoding** --
If *true*, then columns that were dict encoded in the source
table will be dict encoded in the projection table.
Allowed values are:
* true
* false
The default value is 'true'.
* **query_parameters** --
Query parameters in JSON array or arrays (for inserting
multiple rows). This can be used instead of input parameter
*data* and input parameter *request_schema_str*.
* **results_caching** --
If *false*, disables caching of the results of the given
query.
Allowed values are:
* true
* false
The default value is 'true'.
* **rule_based_optimization** --
If *false*, disables rule-based rewrite optimizations for the
given query.
Allowed values are:
* true
* false
The default value is 'true'.
* **ssq_optimization** --
If *false*, scalar subqueries will be translated into joins.
Allowed values are:
* true
* false
The default value is 'true'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the
intermediate result tables used in query execution.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into or
updating
a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted or
updated will be replaced by that record.
If set to *false*, any such primary key
collision will result in the insert/update being rejected and
the error handled as determined by
*ignore_existing_pk*. If the specified table does not have a
primary key,
then this option has no effect.
Allowed values are:
* **true** --
Replace the collided-into record with the record inserted
or updated when a new/modified record causes a primary key
collision with an existing record
* **false** --
Reject the insert or update when it results in a primary
key collision with an existing record
The default value is 'false'.
* **validate_change_column** --
When changing a column using alter table, validate the change
before applying it. If *true*, then validate all values. A
value too large (or too long) for the new type will prevent
any change. If *false*, then when a value is too large or
long, it will be truncated.
Allowed values are:
* true
* false
The default value is 'true'.
* **current_schema** --
Use the supplied value as the `default schema
<../../../../concepts/schemas/#default-schema>`__ when
processing this SQL command.
Returns:
A dict with the following entries--
count_affected (long)
The number of objects/records affected.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
binary_encoded_response (bytes)
Avro binary encoded response.
json_encoded_response (str)
Avro JSON encoded response.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
Allowed values are:
* true
* false
paging_table (str)
Name of the table that has the result records of the query.
Valid, when output parameter *has_more_records* is *true*
(Subject to config.paging_tables_enabled)
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **count** --
Number of records in the final table
record_type (:class:`RecordType` or None)
A :class:`RecordType` object using which the user can decode
the binarydata by using :meth:`GPUdbRecord.decode_binary_data`.
If JSON encodingis used, then None.
"""
assert isinstance( statement, (basestring)), "execute_sql(): Argument 'statement' must be (one) of type(s) '(basestring)'; given %s" % type( statement ).__name__
assert isinstance( offset, (int, long, float)), "execute_sql(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "execute_sql(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "execute_sql(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( request_schema_str, (basestring)), "execute_sql(): Argument 'request_schema_str' must be (one) of type(s) '(basestring)'; given %s" % type( request_schema_str ).__name__
data = data if isinstance( data, list ) else ( [] if (data is None) else [ data ] )
assert isinstance( options, (dict)), "execute_sql(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['statement'] = statement
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['request_schema_str'] = request_schema_str
obj['data'] = data
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/execute/sql', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record type and save it in the response, if applicable
if encoding == "binary":
record_type = RecordType.from_dynamic_schema( response.response_schema_str, response.binary_encoded_response )
response["record_type"] = record_type
else:
response["record_type"] = None
return response
# end execute_sql
# begin execute_sql_and_decode
[docs] def execute_sql_and_decode( self, statement = None, offset = 0, limit = -9999,
encoding = 'binary', request_schema_str = '',
data = [], options = {}, record_type = None,
force_primitive_return_types = True,
get_column_major = True ):
"""Execute a SQL statement (query, DML, or DDL).
See `SQL Support <../../../../sql/>`__ for the complete set of
supported SQL commands.
Parameters:
statement (str)
SQL statement (query, DML, or DDL) to be executed
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records; either 'binary' or
'json'.
Allowed values are:
* binary
* json
The default value is 'binary'.
request_schema_str (str)
Avro schema of input parameter *data*. The default value is
''.
data (list of str)
An array of binary-encoded data for the records to be binded to
the SQL query. Or use *query_parameters* to pass the data in
JSON format. The default value is an empty list ( [] ). The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **cost_based_optimization** --
If *false*, disables the cost-based optimization of the given
query.
Allowed values are:
* true
* false
The default value is 'false'.
* **distributed_joins** --
If *true*, enables the use of distributed joins in servicing
the given query. Any query requiring a distributed join will
succeed, though hints can be used in the query to change the
distribution of the source data to allow the query to
succeed.
Allowed values are:
* true
* false
The default value is 'false'.
* **distributed_operations** --
If *true*, enables the use of distributed operations in
servicing the given query. Any query requiring a distributed
join will succeed, though hints can be used in the query to
change the distribution of the source data to allow the query
to succeed.
Allowed values are:
* true
* false
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into or updating a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only
used when primary key record collisions are rejected
(*update_on_existing_pk*
is *false*). If set to
*true*, any record insert/update that is rejected
for resulting in a primary key collision with an existing
table record will be ignored with no error
generated. If *false*, the rejection of any
insert/update for resulting in a primary key collision will
cause an error to be reported. If the
specified table does not have a primary key or if
*update_on_existing_pk* is
*true*, then this option has no effect.
Allowed values are:
* **true** --
Ignore inserts/updates that result in primary key
collisions with existing records
* **false** --
Treat as errors any inserts/updates that result in primary
key collisions with existing records
The default value is 'false'.
* **late_materialization** --
If *true*, Joins/Filters results will always be materialized
( saved to result tables format).
Allowed values are:
* true
* false
The default value is 'false'.
* **paging_table** --
When empty or the specified paging table not exists, the
system will create a paging table and return when query
output has more records than the user asked. If the paging
table exists in the system, the records from the paging table
are returned without evaluating the query.
* **paging_table_ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the paging
table.
* **parallel_execution** --
If *false*, disables the parallel step execution of the given
query.
Allowed values are:
* true
* false
The default value is 'true'.
* **plan_cache** --
If *false*, disables plan caching for the given query.
Allowed values are:
* true
* false
The default value is 'true'.
* **prepare_mode** --
If *true*, compiles a query into an execution plan and saves
it in query cache. Query execution is not performed and an
empty response will be returned to user.
Allowed values are:
* true
* false
The default value is 'false'.
* **preserve_dict_encoding** --
If *true*, then columns that were dict encoded in the source
table will be dict encoded in the projection table.
Allowed values are:
* true
* false
The default value is 'true'.
* **query_parameters** --
Query parameters in JSON array or arrays (for inserting
multiple rows). This can be used instead of input parameter
*data* and input parameter *request_schema_str*.
* **results_caching** --
If *false*, disables caching of the results of the given
query.
Allowed values are:
* true
* false
The default value is 'true'.
* **rule_based_optimization** --
If *false*, disables rule-based rewrite optimizations for the
given query.
Allowed values are:
* true
* false
The default value is 'true'.
* **ssq_optimization** --
If *false*, scalar subqueries will be translated into joins.
Allowed values are:
* true
* false
The default value is 'true'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the
intermediate result tables used in query execution.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into or
updating
a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted or
updated will be replaced by that record.
If set to *false*, any such primary key
collision will result in the insert/update being rejected and
the error handled as determined by
*ignore_existing_pk*. If the specified table does not have a
primary key,
then this option has no effect.
Allowed values are:
* **true** --
Replace the collided-into record with the record inserted
or updated when a new/modified record causes a primary key
collision with an existing record
* **false** --
Reject the insert or update when it results in a primary
key collision with an existing record
The default value is 'false'.
* **validate_change_column** --
When changing a column using alter table, validate the change
before applying it. If *true*, then validate all values. A
value too large (or too long) for the new type will prevent
any change. If *false*, then when a value is too large or
long, it will be truncated.
Allowed values are:
* true
* false
The default value is 'true'.
* **current_schema** --
Use the supplied value as the `default schema
<../../../../concepts/schemas/#default-schema>`__ when
processing this SQL command.
record_type (:class:`RecordType` or None)
The record type expected in the results, or None to
determinethe appropriate type automatically. If known,
providing thismay improve performance in binary mode. Not used
in JSON mode.The default value is None.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A dict with the following entries--
count_affected (long)
The number of objects/records affected.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
Allowed values are:
* true
* false
paging_table (str)
Name of the table that has the result records of the query.
Valid, when output parameter *has_more_records* is *true*
(Subject to config.paging_tables_enabled)
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **count** --
Number of records in the final table
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( statement, (basestring)), "execute_sql_and_decode(): Argument 'statement' must be (one) of type(s) '(basestring)'; given %s" % type( statement ).__name__
assert isinstance( offset, (int, long, float)), "execute_sql_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "execute_sql_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "execute_sql_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( request_schema_str, (basestring)), "execute_sql_and_decode(): Argument 'request_schema_str' must be (one) of type(s) '(basestring)'; given %s" % type( request_schema_str ).__name__
data = data if isinstance( data, list ) else ( [] if (data is None) else [ data ] )
assert isinstance( options, (dict)), "execute_sql_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "execute_sql_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
assert isinstance(force_primitive_return_types, bool), "execute_sql_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
assert isinstance(get_column_major, bool), "execute_sql_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/execute/sql", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['statement'] = statement
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['request_schema_str'] = request_schema_str
obj['data'] = data
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/execute/sql', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_type = record_type if record_type else RecordType.from_dynamic_schema( response.response_schema_str, raw_response, response.binary_encoded_response )
records = record_type.decode_dynamic_records( raw_response, response.binary_encoded_response )
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
# Transpose the data to column-major, if requested by the user
if get_column_major:
records = GPUdbRecord.transpose_data_to_col_major( records )
response["records"] = records
else:
records = json.loads( response.json_encoded_response )
if get_column_major:
# Get column-major data
records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response.response_schema_str )
else:
# Get row-major data
records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response.response_schema_str )
response["records"] = records
# end if
del response["binary_encoded_response"]
del response["json_encoded_response"]
return response
# end execute_sql_and_decode
# begin export_records_to_files
[docs] def export_records_to_files( self, table_name = None, filepath = None, options =
{} ):
"""Export records from a table to files. All tables can be exported, in
full or partial
(see *columns_to_export* and *columns_to_skip*).
Additional filtering can be applied when using export table with
expression through SQL.
Default destination is KIFS, though other storage types (Azure, S3,
GCS, and HDFS) are supported
through *datasink_name*; see :meth:`GPUdb.create_datasink`.
Server's local file system is not supported. Default file format is
delimited text. See options for
different file types and different options for each file type. Table
is saved to a single file if
within max file size limits (may vary depending on datasink type). If
not, then table is split into
multiple files; these may be smaller than the max size limit.
All filenames created are returned in the response.
Parameters:
table_name (str)
filepath (str)
Path to data export target. If input parameter *filepath* has
a file extension, it is
read as the name of a file. If input parameter *filepath* is a
directory, then the source table name with a
random UUID appended will be used as the name of each exported
file, all written to that directory.
If filepath is a filename, then all exported files will have a
random UUID appended to the given
name. In either case, the target directory specified or
implied must exist. The names of all
exported files are returned in the response.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **batch_size** --
Number of records to be exported as a batch. The default
value is '1000000'.
* **column_formats** --
For each source column specified, applies the
column-property-bound
format. Currently supported column properties include date,
time, & datetime. The parameter value
must be formatted as a JSON string of maps of column names to
maps of column properties to their
corresponding column formats, e.g.,
'{ "order_date" : { "date" : "%Y.%m.%d" }, "order_time" : {
"time" : "%H:%M:%S" } }'.
See *default_column_formats* for valid format syntax.
* **columns_to_export** --
Specifies a comma-delimited list of columns from the source
table to
export, written to the output file in the order they are
given.
Column names can be provided, in which case the target file
will use those names as the column
headers as well.
Alternatively, column numbers can be specified--discretely or
as a range. For example, a value of
'5,7,1..3' will write values from the fifth column in the
source table into the first column in the
target file, from the seventh column in the source table into
the second column in the target file,
and from the first through third columns in the source table
into the third through fifth columns in
the target file.
Mutually exclusive with *columns_to_skip*.
* **columns_to_skip** --
Comma-separated list of column names or column numbers to not
export. All columns in the source table not specified will
be written to the target file in the
order they appear in the table definition. Mutually
exclusive with
*columns_to_export*.
* **datasink_name** --
Datasink name, created using :meth:`GPUdb.create_datasink`.
* **default_column_formats** --
Specifies the default format to use to write data. Currently
supported column properties include date, time, & datetime.
This default column-property-bound
format can be overridden by specifying a column property &
format for a given source column in
*column_formats*. For each specified annotation, the format
will apply to all
columns with that annotation unless custom *column_formats*
for that
annotation are specified.
The parameter value must be formatted as a JSON string that
is a map of column properties to their
respective column formats, e.g., '{ "date" : "%Y.%m.%d",
"time" : "%H:%M:%S" }'. Column
formats are specified as a string of control characters and
plain text. The supported control
characters are 'Y', 'm', 'd', 'H', 'M', 'S', and 's', which
follow the Linux 'strptime()'
specification, as well as 's', which specifies seconds and
fractional seconds (though the fractional
component will be truncated past milliseconds).
Formats for the 'date' annotation must include the 'Y', 'm',
and 'd' control characters. Formats for
the 'time' annotation must include the 'H', 'M', and either
'S' or 's' (but not both) control
characters. Formats for the 'datetime' annotation meet both
the 'date' and 'time' control character
requirements. For example, '{"datetime" : "%m/%d/%Y %H:%M:%S"
}' would be used to write text
as "05/04/2000 12:12:11"
* **export_ddl** --
Save DDL to a separate file. The default value is 'false'.
* **file_extension** --
Extension to give the export file. The default value is
'.csv'.
* **file_type** --
Specifies the file format to use when exporting data.
Allowed values are:
* **delimited_text** --
Delimited text file format; e.g., CSV, TSV, PSV, etc.
The default value is 'delimited_text'.
* **kinetica_header** --
Whether to include a Kinetica proprietary header. Will not be
written if *text_has_header* is
*false*.
Allowed values are:
* true
* false
The default value is 'false'.
* **kinetica_header_delimiter** --
If a Kinetica proprietary header is included, then specify a
property separator. Different from column delimiter. The
default value is '|'.
* **compression_type** --
File compression type. GZip can be applied to text and
Parquet files. Snappy can only be applied to Parquet files,
and is the default compression for them.
Allowed values are:
* uncompressed
* snappy
* gzip
* **single_file** --
Save records to a single file. This option may be ignored if
file
size exceeds internal file size limits (this limit will
differ on different targets).
Allowed values are:
* true
* false
* overwrite
The default value is 'true'.
* **single_file_max_size** --
Max file size (in MB) to allow saving to a single file. May
be overridden by target limitations. The default value is
''.
* **text_delimiter** --
Specifies the character to write out to delimit field values
and
field names in the header (if present).
For *delimited_text* *file_type* only. The default value is
','.
* **text_has_header** --
Indicates whether to write out a header row.
For *delimited_text* *file_type* only.
Allowed values are:
* true
* false
The default value is 'true'.
* **text_null_string** --
Specifies the character string that should be written out for
the null
value in the data.
For *delimited_text* *file_type* only. The default value is
'\\N'.
Returns:
A dict with the following entries--
table_name (str)
Name of source table
count_exported (long)
Number of source table records exported
count_skipped (long)
Number of source table records skipped
files (list of str)
Names of all exported files
last_timestamp (long)
Timestamp of last file scanned
data_text (list of str)
data_bytes (list of str)
info (dict of str to str)
Additional information
"""
assert isinstance( table_name, (basestring)), "export_records_to_files(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( filepath, (basestring)), "export_records_to_files(): Argument 'filepath' must be (one) of type(s) '(basestring)'; given %s" % type( filepath ).__name__
assert isinstance( options, (dict)), "export_records_to_files(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['filepath'] = filepath
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/export/records/tofiles', obj, convert_to_attr_dict = True )
return response
# end export_records_to_files
# begin export_records_to_table
[docs] def export_records_to_table( self, table_name = None, remote_query = '', options
= {} ):
"""Exports records from source table to the specified target table in an
external database
Parameters:
table_name (str)
Name of the table from which the data will be exported to
remote database, in
[schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
remote_query (str)
Parameterized insert query to export gpudb table data into
remote database. The default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **batch_size** --
Batch size, which determines how many rows to export per
round trip. The default value is '200000'.
* **datasink_name** --
Name of an existing external data sink to which table name
specified in input parameter *table_name* will be exported
* **jdbc_session_init_statement** --
Executes the statement per each jdbc session before doing
actual load. The default value is ''.
* **jdbc_connection_init_statement** --
Executes the statement once before doing actual load. The
default value is ''.
* **remote_table** --
Name of the target table to which source table is exported.
When this option is specified remote_query cannot be
specified. The default value is ''.
* **use_st_geomfrom_casts** --
Wraps parametrized variables with st_geomfromtext or
st_geomfromwkb based on source column type.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_indexed_parameters** --
Uses $n style syntax when generating insert query for
remote_table option.
Allowed values are:
* true
* false
The default value is 'true'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
count_inserted (long)
Number of records inserted into the target table.
count_skipped (long)
Number of records skipped.
count_updated (long)
[Not yet implemented] Number of records updated within the
target table.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "export_records_to_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( remote_query, (basestring)), "export_records_to_table(): Argument 'remote_query' must be (one) of type(s) '(basestring)'; given %s" % type( remote_query ).__name__
assert isinstance( options, (dict)), "export_records_to_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['remote_query'] = remote_query
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/export/records/totable', obj, convert_to_attr_dict = True )
return response
# end export_records_to_table
# begin filter
[docs] def filter( self, table_name = None, view_name = '', expression = None, options
= {} ):
"""Filters data based on the specified expression. The results are
stored in a `result set <../../../../concepts/filtered_views/>`__ with
the
given input parameter *view_name*.
For details see `Expressions <../../../../concepts/expressions/>`__.
The response message contains the number of points for which the
expression
evaluated to be true, which is equivalent to the size of the result
view.
Parameters:
table_name (str)
Name of the table to filter, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. This
may be the name of a table or a view (when chaining queries).
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
expression (str)
The select expression to filter the specified table. For
details see `Expressions
<../../../../concepts/expressions/>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **view_id** --
view this filtered-view is part of. The default value is ''.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the view
specified in input parameter *view_name*.
Returns:
A dict with the following entries--
count (long)
The number of records that matched the given select expression.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( expression, (basestring)), "filter(): Argument 'expression' must be (one) of type(s) '(basestring)'; given %s" % type( expression ).__name__
assert isinstance( options, (dict)), "filter(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['expression'] = expression
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter', obj, convert_to_attr_dict = True )
return response
# end filter
# begin filter_by_area
[docs] def filter_by_area( self, table_name = None, view_name = '', x_column_name =
None, x_vector = None, y_column_name = None, y_vector =
None, options = {} ):
"""Calculates which objects from a table are within a named area of
interest (NAI/polygon). The operation is synchronous, meaning that a
response
will not be returned until all the matching objects are fully
available. The
response payload provides the count of the resulting set. A new
resultant set
(view) which satisfies the input NAI restriction specification is
created with
the name input parameter *view_name* passed in as part of the input.
Parameters:
table_name (str)
Name of the table to filter, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. This
may be the name of a table or a view (when chaining queries).
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
x_column_name (str)
Name of the column containing the x values to be filtered.
x_vector (list of floats)
List of x coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
y_column_name (str)
Name of the column containing the y values to be filtered.
y_vector (list of floats)
List of y coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the area filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_area(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_area(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( x_column_name, (basestring)), "filter_by_area(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
x_vector = x_vector if isinstance( x_vector, list ) else ( [] if (x_vector is None) else [ x_vector ] )
assert isinstance( y_column_name, (basestring)), "filter_by_area(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
y_vector = y_vector if isinstance( y_vector, list ) else ( [] if (y_vector is None) else [ y_vector ] )
assert isinstance( options, (dict)), "filter_by_area(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['x_column_name'] = x_column_name
obj['x_vector'] = x_vector
obj['y_column_name'] = y_column_name
obj['y_vector'] = y_vector
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byarea', obj, convert_to_attr_dict = True )
return response
# end filter_by_area
# begin filter_by_area_geometry
[docs] def filter_by_area_geometry( self, table_name = None, view_name = '',
column_name = None, x_vector = None, y_vector =
None, options = {} ):
"""Calculates which geospatial geometry objects from a table intersect
a named area of interest (NAI/polygon). The operation is synchronous,
meaning
that a response will not be returned until all the matching objects are
fully
available. The response payload provides the count of the resulting
set. A new
resultant set (view) which satisfies the input NAI restriction
specification is
created with the name input parameter *view_name* passed in as part of
the input.
Parameters:
table_name (str)
Name of the table to filter, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. This
may be the name of a table or a view (when chaining queries).
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_name (str)
Name of the geospatial geometry column to be filtered.
x_vector (list of floats)
List of x coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
y_vector (list of floats)
List of y coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] The schema for the newly created view. If the
schema is non-existent, it will be automatically created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the area filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_area_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_area_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_name, (basestring)), "filter_by_area_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
x_vector = x_vector if isinstance( x_vector, list ) else ( [] if (x_vector is None) else [ x_vector ] )
y_vector = y_vector if isinstance( y_vector, list ) else ( [] if (y_vector is None) else [ y_vector ] )
assert isinstance( options, (dict)), "filter_by_area_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_name'] = column_name
obj['x_vector'] = x_vector
obj['y_vector'] = y_vector
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byarea/geometry', obj, convert_to_attr_dict = True )
return response
# end filter_by_area_geometry
# begin filter_by_box
[docs] def filter_by_box( self, table_name = None, view_name = '', x_column_name =
None, min_x = None, max_x = None, y_column_name = None,
min_y = None, max_y = None, options = {} ):
"""Calculates how many objects within the given table lie in a
rectangular box. The operation is synchronous, meaning that a response
will not
be returned until all the objects are fully available. The response
payload
provides the count of the resulting set. A new resultant set which
satisfies the
input NAI restriction specification is also created when a input
parameter *view_name* is
passed in as part of the input payload.
Parameters:
table_name (str)
Name of the table on which the bounding box operation will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
x_column_name (str)
Name of the column on which to perform the bounding box query.
Must be a valid numeric column.
min_x (float)
Lower bound for the column chosen by input parameter
*x_column_name*. Must be less than or equal to input parameter
*max_x*.
max_x (float)
Upper bound for input parameter *x_column_name*. Must be
greater than or equal to input parameter *min_x*.
y_column_name (str)
Name of a column on which to perform the bounding box query.
Must be a valid numeric column.
min_y (float)
Lower bound for input parameter *y_column_name*. Must be less
than or equal to input parameter *max_y*.
max_y (float)
Upper bound for input parameter *y_column_name*. Must be
greater than or equal to input parameter *min_y*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the box filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_box(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_box(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( x_column_name, (basestring)), "filter_by_box(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( min_x, (int, long, float)), "filter_by_box(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "filter_by_box(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( y_column_name, (basestring)), "filter_by_box(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( min_y, (int, long, float)), "filter_by_box(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "filter_by_box(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( options, (dict)), "filter_by_box(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['x_column_name'] = x_column_name
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['y_column_name'] = y_column_name
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/bybox', obj, convert_to_attr_dict = True )
return response
# end filter_by_box
# begin filter_by_box_geometry
[docs] def filter_by_box_geometry( self, table_name = None, view_name = '', column_name
= None, min_x = None, max_x = None, min_y =
None, max_y = None, options = {} ):
"""Calculates which geospatial geometry objects from a table intersect
a rectangular box. The operation is synchronous, meaning that a
response will
not be returned until all the objects are fully available. The response
payload
provides the count of the resulting set. A new resultant set which
satisfies the
input NAI restriction specification is also created when a input
parameter *view_name* is
passed in as part of the input payload.
Parameters:
table_name (str)
Name of the table on which the bounding box operation will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_name (str)
Name of the geospatial geometry column to be filtered.
min_x (float)
Lower bound for the x-coordinate of the rectangular box. Must
be less than or equal to input parameter *max_x*.
max_x (float)
Upper bound for the x-coordinate of the rectangular box. Must
be greater than or equal to input parameter *min_x*.
min_y (float)
Lower bound for the y-coordinate of the rectangular box. Must
be less than or equal to input parameter *max_y*.
max_y (float)
Upper bound for the y-coordinate of the rectangular box. Must
be greater than or equal to input parameter *min_y*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the box filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_box_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_box_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_name, (basestring)), "filter_by_box_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( min_x, (int, long, float)), "filter_by_box_geometry(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "filter_by_box_geometry(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "filter_by_box_geometry(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "filter_by_box_geometry(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( options, (dict)), "filter_by_box_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_name'] = column_name
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/bybox/geometry', obj, convert_to_attr_dict = True )
return response
# end filter_by_box_geometry
# begin filter_by_geometry
[docs] def filter_by_geometry( self, table_name = None, view_name = '', column_name =
None, input_wkt = '', operation = None, options = {}
):
"""Applies a geometry filter against a geospatial geometry column in a
given table or view. The filtering geometry is provided by input
parameter *input_wkt*.
Parameters:
table_name (str)
Name of the table on which the filter by geometry will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table or view containing a geospatial geometry
column.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_name (str)
Name of the column to be used in the filter. Must be a
geospatial geometry column.
input_wkt (str)
A geometry in WKT format that will be used to filter the
objects in input parameter *table_name*. The default value is
''.
operation (str)
The geometric filtering operation to perform
Allowed values are:
* **contains** --
Matches records that contain the given WKT in input parameter
*input_wkt*, i.e. the given WKT is within the bounds of a
record's geometry.
* **crosses** --
Matches records that cross the given WKT.
* **disjoint** --
Matches records that are disjoint from the given WKT.
* **equals** --
Matches records that are the same as the given WKT.
* **intersects** --
Matches records that intersect the given WKT.
* **overlaps** --
Matches records that overlap the given WKT.
* **touches** --
Matches records that touch the given WKT.
* **within** --
Matches records that are within the given WKT.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the geometry filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_name, (basestring)), "filter_by_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( input_wkt, (basestring)), "filter_by_geometry(): Argument 'input_wkt' must be (one) of type(s) '(basestring)'; given %s" % type( input_wkt ).__name__
assert isinstance( operation, (basestring)), "filter_by_geometry(): Argument 'operation' must be (one) of type(s) '(basestring)'; given %s" % type( operation ).__name__
assert isinstance( options, (dict)), "filter_by_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_name'] = column_name
obj['input_wkt'] = input_wkt
obj['operation'] = operation
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/bygeometry', obj, convert_to_attr_dict = True )
return response
# end filter_by_geometry
# begin filter_by_list
[docs] def filter_by_list( self, table_name = None, view_name = '', column_values_map =
None, options = {} ):
"""Calculates which records from a table have values in the given list
for the corresponding column. The operation is synchronous, meaning
that a
response will not be returned until all the objects are fully
available. The
response payload provides the count of the resulting set. A new
resultant set
(view) which satisfies the input filter specification is also created
if a
input parameter *view_name* is passed in as part of the request.
For example, if a type definition has the columns 'x' and 'y', then a
filter by
list query with the column map
{"x":["10.1", "2.3"], "y":["0.0", "-31.5", "42.0"]} will return
the count of all data points whose x and y values match both in the
respective
x- and y-lists, e.g., "x = 10.1 and y = 0.0", "x = 2.3 and y = -31.5",
etc.
However, a record with "x = 10.1 and y = -31.5" or "x = 2.3 and y =
0.0"
would not be returned because the values in the given lists do not
correspond.
Parameters:
table_name (str)
Name of the table to filter, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. This
may be the name of a table or a view (when chaining queries).
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_values_map (dict of str to lists of str)
List of values for the corresponding column in the table
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
* **filter_mode** --
String indicating the filter mode, either 'in_list' or
'not_in_list'.
Allowed values are:
* **in_list** --
The filter will match all items that are in the provided
list(s).
* **not_in_list** --
The filter will match all items that are not in the
provided list(s).
The default value is 'in_list'.
Returns:
A dict with the following entries--
count (long)
The number of records passing the list filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_list(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_list(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_values_map, (dict)), "filter_by_list(): Argument 'column_values_map' must be (one) of type(s) '(dict)'; given %s" % type( column_values_map ).__name__
assert isinstance( options, (dict)), "filter_by_list(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_values_map'] = self.__sanitize_dicts( column_values_map )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/bylist', obj, convert_to_attr_dict = True )
return response
# end filter_by_list
# begin filter_by_radius
[docs] def filter_by_radius( self, table_name = None, view_name = '', x_column_name =
None, x_center = None, y_column_name = None, y_center
= None, radius = None, options = {} ):
"""Calculates which objects from a table lie within a circle with the
given radius and center point (i.e. circular NAI). The operation is
synchronous,
meaning that a response will not be returned until all the objects are
fully
available. The response payload provides the count of the resulting
set. A new
resultant set (view) which satisfies the input circular NAI restriction
specification is also created if a input parameter *view_name* is
passed in as part of
the request.
For track data, all track points that lie within the circle plus one
point on
either side of the circle (if the track goes beyond the circle) will be
included
in the result.
Parameters:
table_name (str)
Name of the table on which the filter by radius operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
x_column_name (str)
Name of the column to be used for the x-coordinate (the
longitude) of the center.
x_center (float)
Value of the longitude of the center. Must be within [-180.0,
180.0]. The minimum allowed value is -180. The maximum allowed
value is 180.
y_column_name (str)
Name of the column to be used for the y-coordinate-the
latitude-of the center.
y_center (float)
Value of the latitude of the center. Must be within [-90.0,
90.0]. The minimum allowed value is -90. The maximum allowed
value is 90.
radius (float)
The radius of the circle within which the search will be
performed. Must be a non-zero positive value. It is in meters;
so, for example, a value of '42000' means 42 km. The minimum
allowed value is 0. The maximum allowed value is MAX_INT.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema which is to contain the newly
created view. If the schema is non-existent, it will be
automatically created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the radius filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_radius(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_radius(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( x_column_name, (basestring)), "filter_by_radius(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( x_center, (int, long, float)), "filter_by_radius(): Argument 'x_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( x_center ).__name__
assert isinstance( y_column_name, (basestring)), "filter_by_radius(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( y_center, (int, long, float)), "filter_by_radius(): Argument 'y_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( y_center ).__name__
assert isinstance( radius, (int, long, float)), "filter_by_radius(): Argument 'radius' must be (one) of type(s) '(int, long, float)'; given %s" % type( radius ).__name__
assert isinstance( options, (dict)), "filter_by_radius(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['x_column_name'] = x_column_name
obj['x_center'] = x_center
obj['y_column_name'] = y_column_name
obj['y_center'] = y_center
obj['radius'] = radius
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byradius', obj, convert_to_attr_dict = True )
return response
# end filter_by_radius
# begin filter_by_radius_geometry
[docs] def filter_by_radius_geometry( self, table_name = None, view_name = '',
column_name = None, x_center = None, y_center
= None, radius = None, options = {} ):
"""Calculates which geospatial geometry objects from a table intersect
a circle with the given radius and center point (i.e. circular NAI).
The
operation is synchronous, meaning that a response will not be returned
until all
the objects are fully available. The response payload provides the
count of the
resulting set. A new resultant set (view) which satisfies the input
circular NAI
restriction specification is also created if a input parameter
*view_name* is passed in
as part of the request.
Parameters:
table_name (str)
Name of the table on which the filter by radius operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_name (str)
Name of the geospatial geometry column to be filtered.
x_center (float)
Value of the longitude of the center. Must be within [-180.0,
180.0]. The minimum allowed value is -180. The maximum allowed
value is 180.
y_center (float)
Value of the latitude of the center. Must be within [-90.0,
90.0]. The minimum allowed value is -90. The maximum allowed
value is 90.
radius (float)
The radius of the circle within which the search will be
performed. Must be a non-zero positive value. It is in meters;
so, for example, a value of '42000' means 42 km. The minimum
allowed value is 0. The maximum allowed value is MAX_INT.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the radius filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_radius_geometry(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_radius_geometry(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_name, (basestring)), "filter_by_radius_geometry(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( x_center, (int, long, float)), "filter_by_radius_geometry(): Argument 'x_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( x_center ).__name__
assert isinstance( y_center, (int, long, float)), "filter_by_radius_geometry(): Argument 'y_center' must be (one) of type(s) '(int, long, float)'; given %s" % type( y_center ).__name__
assert isinstance( radius, (int, long, float)), "filter_by_radius_geometry(): Argument 'radius' must be (one) of type(s) '(int, long, float)'; given %s" % type( radius ).__name__
assert isinstance( options, (dict)), "filter_by_radius_geometry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_name'] = column_name
obj['x_center'] = x_center
obj['y_center'] = y_center
obj['radius'] = radius
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byradius/geometry', obj, convert_to_attr_dict = True )
return response
# end filter_by_radius_geometry
# begin filter_by_range
[docs] def filter_by_range( self, table_name = None, view_name = '', column_name =
None, lower_bound = None, upper_bound = None, options =
{} ):
"""Calculates which objects from a table have a column that is within
the given bounds. An object from the table identified by input
parameter *table_name* is
added to the view input parameter *view_name* if its column is within
[input parameter *lower_bound*, input parameter *upper_bound*]
(inclusive). The operation is
synchronous. The response provides a count of the number of objects
which passed
the bound filter. Although this functionality can also be accomplished
with the
standard filter function, it is more efficient.
For track objects, the count reflects how many points fall within the
given
bounds (which may not include all the track points of any given track).
Parameters:
table_name (str)
Name of the table on which the filter by range operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_name (str)
Name of a column on which the operation would be applied.
lower_bound (float)
Value of the lower bound (inclusive).
upper_bound (float)
Value of the upper bound (inclusive).
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the range filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_range(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_range(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_name, (basestring)), "filter_by_range(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( lower_bound, (int, long, float)), "filter_by_range(): Argument 'lower_bound' must be (one) of type(s) '(int, long, float)'; given %s" % type( lower_bound ).__name__
assert isinstance( upper_bound, (int, long, float)), "filter_by_range(): Argument 'upper_bound' must be (one) of type(s) '(int, long, float)'; given %s" % type( upper_bound ).__name__
assert isinstance( options, (dict)), "filter_by_range(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_name'] = column_name
obj['lower_bound'] = lower_bound
obj['upper_bound'] = upper_bound
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byrange', obj, convert_to_attr_dict = True )
return response
# end filter_by_range
# begin filter_by_series
[docs] def filter_by_series( self, table_name = None, view_name = '', track_id = None,
target_track_ids = None, options = {} ):
"""Filters objects matching all points of the given track (works only
on track type data). It allows users to specify a particular track to
find all
other points in the table that fall within specified ranges (spatial
and
temporal) of all points of the given track. Additionally, the user can
specify
another track to see if the two intersect (or go close to each other
within the
specified ranges). The user also has the flexibility of using different
metrics
for the spatial distance calculation: Euclidean (flat geometry) or
Great Circle
(spherical geometry to approximate the Earth's surface distances). The
filtered
points are stored in a newly created result set. The return value of
the
function is the number of points in the resultant set (view).
This operation is synchronous, meaning that a response will not be
returned
until all the objects are fully available.
Parameters:
table_name (str)
Name of the table on which the filter by track operation will
be performed, in [schema_name.]table_name format, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be a currently existing table with a `track
<../../../../geospatial/geo_objects/>`__ present.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
track_id (str)
The ID of the track which will act as the filtering points.
Must be an existing track within the given table.
target_track_ids (list of str)
Up to one track ID to intersect with the "filter" track. If any
provided, it must be an valid track ID within the given set.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **spatial_radius** --
A positive number passed as a string representing the radius
of the search area centered around each track point's
geospatial coordinates. The value is interpreted in meters.
Required parameter.
* **time_radius** --
A positive number passed as a string representing the maximum
allowable time difference between the timestamps of a
filtered object and the given track's points. The value is
interpreted in seconds. Required parameter.
* **spatial_distance_metric** --
A string representing the coordinate system to use for the
spatial search criteria. Acceptable values are 'euclidean'
and 'great_circle'. Optional parameter; default is
'euclidean'.
Allowed values are:
* euclidean
* great_circle
Returns:
A dict with the following entries--
count (long)
The number of records passing the series filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_series(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_series(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( track_id, (basestring)), "filter_by_series(): Argument 'track_id' must be (one) of type(s) '(basestring)'; given %s" % type( track_id ).__name__
target_track_ids = target_track_ids if isinstance( target_track_ids, list ) else ( [] if (target_track_ids is None) else [ target_track_ids ] )
assert isinstance( options, (dict)), "filter_by_series(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['track_id'] = track_id
obj['target_track_ids'] = target_track_ids
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byseries', obj, convert_to_attr_dict = True )
return response
# end filter_by_series
# begin filter_by_string
[docs] def filter_by_string( self, table_name = None, view_name = '', expression =
None, mode = None, column_names = None, options = {}
):
"""Calculates which objects from a table or view match a string
expression for the given string columns. Setting
*case_sensitive* can modify case sensitivity in matching
for all modes except *search*. For
*search* mode details and limitations, see
`Full Text Search <../../../../concepts/full_text_search/>`__.
Parameters:
table_name (str)
Name of the table on which the filter operation will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table or view.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
expression (str)
The expression with which to filter the table.
mode (str)
The string filtering mode to apply. See below for details.
Allowed values are:
* **search** --
Full text search query with wildcards and boolean operators.
Note that for this mode, no column can be specified in input
parameter *column_names*; all string columns of the table
that have text search enabled will be searched.
* **equals** --
Exact whole-string match (accelerated).
* **contains** --
Partial substring match (not accelerated). If the column is
a string type (non-charN) and the number of records is too
large, it will return 0.
* **starts_with** --
Strings that start with the given expression (not
accelerated). If the column is a string type (non-charN) and
the number of records is too large, it will return 0.
* **regex** --
Full regular expression search (not accelerated). If the
column is a string type (non-charN) and the number of records
is too large, it will return 0.
column_names (list of str)
List of columns on which to apply the filter. Ignored for
*search* mode. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **case_sensitive** --
If *false* then string filtering will ignore case. Does not
apply to *search* mode.
Allowed values are:
* true
* false
The default value is 'true'.
Returns:
A dict with the following entries--
count (long)
The number of records that passed the string filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_string(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_string(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( expression, (basestring)), "filter_by_string(): Argument 'expression' must be (one) of type(s) '(basestring)'; given %s" % type( expression ).__name__
assert isinstance( mode, (basestring)), "filter_by_string(): Argument 'mode' must be (one) of type(s) '(basestring)'; given %s" % type( mode ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( options, (dict)), "filter_by_string(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['expression'] = expression
obj['mode'] = mode
obj['column_names'] = column_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/bystring', obj, convert_to_attr_dict = True )
return response
# end filter_by_string
# begin filter_by_table
[docs] def filter_by_table( self, table_name = None, view_name = '', column_name =
None, source_table_name = None,
source_table_column_name = None, options = {} ):
"""Filters objects in one table based on objects in another table. The
user must specify matching column types from the two tables (i.e. the
target
table from which objects will be filtered and the source table based on
which
the filter will be created); the column names need not be the same. If
a
input parameter *view_name* is specified, then the filtered objects
will then be put in a
newly created view. The operation is synchronous, meaning that a
response will
not be returned until all objects are fully available in the result
view. The
return value contains the count (i.e. the size) of the resulting view.
Parameters:
table_name (str)
Name of the table whose data will be filtered, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
column_name (str)
Name of the column by whose value the data will be filtered
from the table designated by input parameter *table_name*.
source_table_name (str)
Name of the table whose data will be compared against in the
table called input parameter *table_name*, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
source_table_column_name (str)
Name of the column in the input parameter *source_table_name*
whose values will be used as the filter for table input
parameter *table_name*. Must be a geospatial geometry column if
in 'spatial' mode; otherwise, Must match the type of the input
parameter *column_name*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **filter_mode** --
String indicating the filter mode, either *in_table* or
*not_in_table*.
Allowed values are:
* in_table
* not_in_table
The default value is 'in_table'.
* **mode** --
Mode - should be either *spatial* or *normal*.
Allowed values are:
* normal
* spatial
The default value is 'normal'.
* **buffer** --
Buffer size, in meters. Only relevant for *spatial* mode.
The default value is '0'.
* **buffer_method** --
Method used to buffer polygons. Only relevant for *spatial*
mode.
Allowed values are:
* **geos** --
Use geos 1 edge per corner algorithm
The default value is 'normal'.
* **max_partition_size** --
Maximum number of points in a partition. Only relevant for
*spatial* mode. The default value is '0'.
* **max_partition_score** --
Maximum number of points * edges in a partition. Only
relevant for *spatial* mode. The default value is '8000000'.
* **x_column_name** --
Name of column containing x value of point being filtered in
*spatial* mode. The default value is 'x'.
* **y_column_name** --
Name of column containing y value of point being filtered in
*spatial* mode. The default value is 'y'.
Returns:
A dict with the following entries--
count (long)
The number of records in input parameter *table_name* that have
input parameter *column_name* values matching input parameter
*source_table_column_name* values in input parameter
*source_table_name*.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_table(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( column_name, (basestring)), "filter_by_table(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( source_table_name, (basestring)), "filter_by_table(): Argument 'source_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( source_table_name ).__name__
assert isinstance( source_table_column_name, (basestring)), "filter_by_table(): Argument 'source_table_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( source_table_column_name ).__name__
assert isinstance( options, (dict)), "filter_by_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['column_name'] = column_name
obj['source_table_name'] = source_table_name
obj['source_table_column_name'] = source_table_column_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/bytable', obj, convert_to_attr_dict = True )
return response
# end filter_by_table
# begin filter_by_value
[docs] def filter_by_value( self, table_name = None, view_name = '', is_string = None,
value = 0, value_str = '', column_name = None, options
= {} ):
"""Calculates which objects from a table has a particular value for a
particular column. The input parameters provide a way to specify either
a String
or a Double valued column and a desired value for the column on which
the filter
is performed. The operation is synchronous, meaning that a response
will not be
returned until all the objects are fully available. The response
payload
provides the count of the resulting set. A new result view which
satisfies the
input filter restriction specification is also created with a view name
passed
in as part of the input payload. Although this functionality can also
be
accomplished with the standard filter function, it is more efficient.
Parameters:
table_name (str)
Name of an existing table on which to perform the calculation,
in [schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
is_string (bool)
Indicates whether the value being searched for is string or
numeric.
value (float)
The value to search for. The default value is 0.
value_str (str)
The string value to search for. The default value is ''.
column_name (str)
Name of a column on which the filter by value would be applied.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
Returns:
A dict with the following entries--
count (long)
The number of records passing the value filter.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_view_name** --
The fully qualified name of the view (i.e. including the
schema)
"""
assert isinstance( table_name, (basestring)), "filter_by_value(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( view_name, (basestring)), "filter_by_value(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
assert isinstance( is_string, (bool)), "filter_by_value(): Argument 'is_string' must be (one) of type(s) '(bool)'; given %s" % type( is_string ).__name__
assert isinstance( value, (int, long, float)), "filter_by_value(): Argument 'value' must be (one) of type(s) '(int, long, float)'; given %s" % type( value ).__name__
assert isinstance( value_str, (basestring)), "filter_by_value(): Argument 'value_str' must be (one) of type(s) '(basestring)'; given %s" % type( value_str ).__name__
assert isinstance( column_name, (basestring)), "filter_by_value(): Argument 'column_name' must be (one) of type(s) '(basestring)'; given %s" % type( column_name ).__name__
assert isinstance( options, (dict)), "filter_by_value(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['view_name'] = view_name
obj['is_string'] = is_string
obj['value'] = value
obj['value_str'] = value_str
obj['column_name'] = column_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/filter/byvalue', obj, convert_to_attr_dict = True )
return response
# end filter_by_value
# begin get_job
[docs] def get_job( self, job_id = None, options = {} ):
"""Get the status and result of asynchronously running job. See the
:meth:`GPUdb.create_job` for starting an asynchronous job. Some fields
of the response are filled only after the submitted job has finished
execution.
Parameters:
job_id (long)
A unique identifier for the job whose status and result is to
be fetched.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **job_tag** --
Job tag returned in call to create the job
Returns:
A dict with the following entries--
endpoint (str)
The endpoint which is being executed asynchronously. E.g.
'/alter/table'.
job_status (str)
Status of the submitted job.
Allowed values are:
* **RUNNING** --
The job is currently executing.
* **DONE** --
The job execution has successfully completed and the response
is included in the output parameter *job_response* or output
parameter *job_response_str* field
* **ERROR** --
The job was attempted, but an error was encountered. The
output parameter *status_map* contains the details of the
error in error_message
* **CANCELLED** --
Job cancellation was requested while the execution was in
progress.
running (bool)
True if the end point is still executing.
progress (int)
Approximate percentage of the job completed.
successful (bool)
True if the job execution completed and no errors were
encountered.
response_encoding (str)
The encoding of the job result (contained in output parameter
*job_response* or output parameter *job_response_str*.
Allowed values are:
* **binary** --
The job result is binary-encoded. It is contained in output
parameter *job_response*.
* **json** --
The job result is json-encoded. It is contained in output
parameter *job_response_str*.
job_response (bytes)
The binary-encoded response of the job. This field is
populated only when the job has completed and output parameter
*response_encoding* is *binary*
job_response_str (str)
The json-encoded response of the job. This field is populated
only when the job has completed and output parameter
*response_encoding* is *json*
status_map (dict of str to str)
Map of various status strings for the executed job.
Allowed keys are:
* **error_message** --
Explains what error occurred while running the job
asynchronously. This entry only exists when the job status
is *ERROR*.
info (dict of str to str)
Additional information.
"""
assert isinstance( job_id, (int, long, float)), "get_job(): Argument 'job_id' must be (one) of type(s) '(int, long, float)'; given %s" % type( job_id ).__name__
assert isinstance( options, (dict)), "get_job(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['job_id'] = job_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/get/job', obj, convert_to_attr_dict = True )
return response
# end get_job
# begin get_records
[docs] def get_records( self, table_name = None, offset = 0, limit = -9999, encoding =
'binary', options = {}, get_record_type = True ):
"""Retrieves records from a given table, optionally filtered by an
expression and/or sorted by a column. This operation can be performed
on tables
and views. Records can be returned encoded as binary, json, or geojson.
This operation supports paging through the data via the input parameter
*offset* and
input parameter *limit* parameters. Note that when paging through a
table, if the table
(or the underlying table in case of a view) is updated (records are
inserted,
deleted or modified) the records retrieved may differ between calls
based on the
updates applied.
Parameters:
table_name (str)
Name of the table or view from which the records will be
fetched, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records; one of *binary*,
*json*, or *geojson*.
Allowed values are:
* binary
* json
* geojson
The default value is 'binary'.
options (dict of str to str)
The default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **fast_index_lookup** --
Indicates if indexes should be used to perform the lookup for
a given expression if possible. Only applicable if there is
no sorting, the expression contains only equivalence
comparisons based on existing tables indexes and the range of
requested values is from [0 to END_OF_SET].
Allowed values are:
* true
* false
The default value is 'true'.
* **sort_by** --
Optional column that the data should be sorted by. Empty by
default (i.e. no sorting is applied).
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending. If sort_order is provided, sort_by
has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
get_record_type (bool)
If True, deduce and return the record type for the returned
records. Default is True.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_name (str)
type_schema (str)
Avro schema of output parameter *records_binary* or output
parameter *records_json*
records_binary (list of str)
If the input parameter *encoding* was 'binary', then this list
contains the binary encoded records retrieved from the table,
otherwise not populated.
records_json (list of str)
If the input parameter *encoding* was 'json', then this list
contains the JSON encoded records retrieved from the table. If
the input parameter *encoding* was 'geojson' this list contains
a single entry consisting of a GeoJSON FeatureCollection
containing a feature per record. Otherwise not populated.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information.
record_type (:class:`RecordType` or None)
A :class:`RecordType` object using which the user can decode
the binarydata by using :meth:`GPUdbRecord.decode_binary_data`.
Available only if get_record_type is True.
"""
assert isinstance( table_name, (basestring)), "get_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( offset, (int, long, float)), "get_records(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( isinstance(get_record_type, bool) ), "get_records: Argument 'get_record_type' must be a boolean; given %s" % type( get_record_type ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/get/records', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record type and save it in the response, if user asks for it
if get_record_type:
record_type = self.get_known_type( response.type_name )
response["record_type"] = record_type
return response
# end get_records
# begin get_records_and_decode
[docs] def get_records_and_decode( self, table_name = None, offset = 0, limit = -9999,
encoding = 'binary', options = {}, record_type =
None, force_primitive_return_types = True ):
"""Retrieves records from a given table, optionally filtered by an
expression and/or sorted by a column. This operation can be performed
on tables
and views. Records can be returned encoded as binary, json, or geojson.
This operation supports paging through the data via the input parameter
*offset* and
input parameter *limit* parameters. Note that when paging through a
table, if the table
(or the underlying table in case of a view) is updated (records are
inserted,
deleted or modified) the records retrieved may differ between calls
based on the
updates applied.
Parameters:
table_name (str)
Name of the table or view from which the records will be
fetched, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records; one of *binary*,
*json*, or *geojson*.
Allowed values are:
* binary
* json
* geojson
The default value is 'binary'.
options (dict of str to str)
The default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **fast_index_lookup** --
Indicates if indexes should be used to perform the lookup for
a given expression if possible. Only applicable if there is
no sorting, the expression contains only equivalence
comparisons based on existing tables indexes and the range of
requested values is from [0 to END_OF_SET].
Allowed values are:
* true
* false
The default value is 'true'.
* **sort_by** --
Optional column that the data should be sorted by. Empty by
default (i.e. no sorting is applied).
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending. If sort_order is provided, sort_by
has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
record_type (:class:`RecordType` or None)
The record type expected in the results, or None to
determinethe appropriate type automatically. If known,
providing thismay improve performance in binary mode. Not used
in JSON mode.The default value is None.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_name (str)
type_schema (str)
Avro schema of output parameter *records_binary* or output
parameter *records_json*
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( table_name, (basestring)), "get_records_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( offset, (int, long, float)), "get_records_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "get_records_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
assert isinstance(force_primitive_return_types, bool), "get_records_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/get/records', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_type = record_type if record_type else self.get_known_type( response.type_name )
records = record_type.decode_records( raw_response, response.records_binary )
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
response["records"] = records
else:
response["records"] = [ json.loads(_r, object_pairs_hook = collections.OrderedDict)
for _r in response.records_json ]
# end if
del response["records_binary"]
del response["records_json"]
return response
# end get_records_and_decode
# begin get_records_by_column
[docs] def get_records_by_column( self, table_name = None, column_names = None, offset
= 0, limit = -9999, encoding = 'binary', options
= {} ):
"""For a given table, retrieves the values from the requested
column(s). Maps of column name to the array of values as well as the
column data
type are returned. This endpoint supports pagination with the input
parameter *offset*
and input parameter *limit* parameters.
`Window functions <../../../../concepts/window/>`__, which can perform
operations like moving averages, are available through this endpoint as
well as
:meth:`GPUdb.create_projection`.
When using pagination, if the table (or the underlying table in the
case of a
view) is modified (records are inserted, updated, or deleted) during a
call to
the endpoint, the records or values retrieved may differ between calls
based on
the type of the update, e.g., the contiguity across pages cannot be
relied upon.
If input parameter *table_name* is empty, selection is performed
against a single-row
virtual table. This can be useful in executing temporal
(`NOW() <../../../../concepts/expressions/#date-time-functions>`__),
identity
(`USER()
<../../../../concepts/expressions/#user-security-functions>`__), or
constant-based functions
(`GEODIST(-77.11, 38.88, -71.06, 42.36)
<../../../../concepts/expressions/#scalar-functions>`__).
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
Parameters:
table_name (str)
Name of the table or view on which this operation will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. An
empty table name retrieves one record from a single-row virtual
table, where columns specified should be constants or constant
expressions.
column_names (list of str)
The list of column values to retrieve. The user can provide
a single element (which will be automatically promoted to a
list internally) or a list.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records; either *binary* or
*json*.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
The default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **sort_by** --
Optional column that the data should be sorted by. Used in
conjunction with *sort_order*. The *order_by* option can be
used in lieu of *sort_by* / *sort_order*. The default value
is ''.
* **sort_order** --
String indicating how the returned values should be sorted -
*ascending* or *descending*. If *sort_order* is provided,
*sort_by* has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
* **order_by** --
Comma-separated list of the columns to be sorted by as well
as the sort direction, e.g., 'timestamp asc, x desc'. The
default value is ''.
* **convert_wkts_to_wkbs** --
If *true*, then WKT string columns will be returned as WKB
bytes.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
The same table name as was passed in the parameter list.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
binary_encoded_response (bytes)
Avro binary encoded response.
json_encoded_response (str)
Avro JSON encoded response.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information.
record_type (:class:`RecordType` or None)
A :class:`RecordType` object using which the user can decode
the binarydata by using :meth:`GPUdbRecord.decode_binary_data`.
If JSON encodingis used, then None.
"""
assert isinstance( table_name, (basestring)), "get_records_by_column(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( offset, (int, long, float)), "get_records_by_column(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_by_column(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_by_column(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_by_column(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/get/records/bycolumn', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record type and save it in the response, if applicable
if encoding == "binary":
record_type = RecordType.from_dynamic_schema( response.response_schema_str, response.binary_encoded_response )
response["record_type"] = record_type
else:
response["record_type"] = None
return response
# end get_records_by_column
# begin get_records_by_column_and_decode
[docs] def get_records_by_column_and_decode( self, table_name = None, column_names =
None, offset = 0, limit = -9999,
encoding = 'binary', options = {},
record_type = None,
force_primitive_return_types = True,
get_column_major = True ):
"""For a given table, retrieves the values from the requested
column(s). Maps of column name to the array of values as well as the
column data
type are returned. This endpoint supports pagination with the input
parameter *offset*
and input parameter *limit* parameters.
`Window functions <../../../../concepts/window/>`__, which can perform
operations like moving averages, are available through this endpoint as
well as
:meth:`GPUdb.create_projection`.
When using pagination, if the table (or the underlying table in the
case of a
view) is modified (records are inserted, updated, or deleted) during a
call to
the endpoint, the records or values retrieved may differ between calls
based on
the type of the update, e.g., the contiguity across pages cannot be
relied upon.
If input parameter *table_name* is empty, selection is performed
against a single-row
virtual table. This can be useful in executing temporal
(`NOW() <../../../../concepts/expressions/#date-time-functions>`__),
identity
(`USER()
<../../../../concepts/expressions/#user-security-functions>`__), or
constant-based functions
(`GEODIST(-77.11, 38.88, -71.06, 42.36)
<../../../../concepts/expressions/#scalar-functions>`__).
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
Parameters:
table_name (str)
Name of the table or view on which this operation will be
performed, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. An
empty table name retrieves one record from a single-row virtual
table, where columns specified should be constants or constant
expressions.
column_names (list of str)
The list of column values to retrieve. The user can provide
a single element (which will be automatically promoted to a
list internally) or a list.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records; either *binary* or
*json*.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
The default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **sort_by** --
Optional column that the data should be sorted by. Used in
conjunction with *sort_order*. The *order_by* option can be
used in lieu of *sort_by* / *sort_order*. The default value
is ''.
* **sort_order** --
String indicating how the returned values should be sorted -
*ascending* or *descending*. If *sort_order* is provided,
*sort_by* has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
* **order_by** --
Comma-separated list of the columns to be sorted by as well
as the sort direction, e.g., 'timestamp asc, x desc'. The
default value is ''.
* **convert_wkts_to_wkbs** --
If *true*, then WKT string columns will be returned as WKB
bytes.
Allowed values are:
* true
* false
The default value is 'false'.
record_type (:class:`RecordType` or None)
The record type expected in the results, or None to
determinethe appropriate type automatically. If known,
providing thismay improve performance in binary mode. Not used
in JSON mode.The default value is None.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A dict with the following entries--
table_name (str)
The same table name as was passed in the parameter list.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( table_name, (basestring)), "get_records_by_column_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( offset, (int, long, float)), "get_records_by_column_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_by_column_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_by_column_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_by_column_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "get_records_by_column_and_decode: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
assert isinstance(force_primitive_return_types, bool), "get_records_by_column_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
assert isinstance(get_column_major, bool), "get_records_by_column_and_decode: Argument 'get_column_major' must be bool; given %s" % type( get_column_major ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records/bycolumn", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['column_names'] = column_names
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/get/records/bycolumn', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_type = record_type if record_type else RecordType.from_dynamic_schema( response.response_schema_str, raw_response, response.binary_encoded_response )
records = record_type.decode_dynamic_records( raw_response, response.binary_encoded_response )
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
# Transpose the data to column-major, if requested by the user
if get_column_major:
records = GPUdbRecord.transpose_data_to_col_major( records )
response["records"] = records
else:
records = json.loads( response.json_encoded_response )
if get_column_major:
# Get column-major data
records = GPUdbRecord.decode_dynamic_json_data_column_major( records, response.response_schema_str )
else:
# Get row-major data
records = GPUdbRecord.decode_dynamic_json_data_row_major( records, response.response_schema_str )
response["records"] = records
# end if
del response["binary_encoded_response"]
del response["json_encoded_response"]
return response
# end get_records_by_column_and_decode
# begin get_records_by_series
[docs] def get_records_by_series( self, table_name = None, world_table_name = None,
offset = 0, limit = 250, encoding = 'binary',
options = {} ):
"""Retrieves the complete series/track records from the given
input parameter *world_table_name* based on the partial track
information contained in
the input parameter *table_name*.
This operation supports paging through the data via the input parameter
*offset* and
input parameter *limit* parameters.
In contrast to :meth:`GPUdb.get_records` this returns records grouped
by
series/track. So if input parameter *offset* is 0 and input parameter
*limit* is 5 this operation
would return the first 5 series/tracks in input parameter *table_name*.
Each series/track
will be returned sorted by their TIMESTAMP column.
Parameters:
table_name (str)
Name of the table or view for which series/tracks will be
fetched, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
world_table_name (str)
Name of the table containing the complete series/track
information to be returned for the tracks present in the input
parameter *table_name*, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
Typically this is used when retrieving series/tracks from a
view (which contains partial series/tracks) but the user wants
to retrieve the entire original series/tracks. Can be blank.
offset (int)
A positive integer indicating the number of initial
series/tracks to skip (useful for paging through the results).
The default value is 0.The minimum allowed value is 0. The
maximum allowed value is MAX_INT.
limit (int)
A positive integer indicating the maximum number of
series/tracks to be returned. Or END_OF_SET (-9999) to indicate
that the max number of results should be returned. The default
value is 250.
encoding (str)
Specifies the encoding for returned records; either *binary* or
*json*.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
table_names (list of str)
The table name (one per series/track) of the returned
series/tracks.
type_names (list of str)
The type IDs (one per series/track) of the returned
series/tracks.
type_schemas (list of str)
The type schemas (one per series/track) of the returned
series/tracks.
list_records_binary (list of lists of str)
If the encoding parameter of the request was 'binary' then this
list-of-lists contains the binary encoded records for each
object (inner list) in each series/track (outer list).
Otherwise, empty list-of-lists.
list_records_json (list of lists of str)
If the encoding parameter of the request was 'json' then this
list-of-lists contains the json encoded records for each object
(inner list) in each series/track (outer list). Otherwise,
empty list-of-lists.
info (dict of str to str)
Additional information.
record_types (list of :class:`RecordType`)
A list of :class:`RecordType` objects using which the user can
decode the binarydata by using
:meth:`GPUdbRecord.decode_binary_data` per record.
"""
assert isinstance( table_name, (basestring)), "get_records_by_series(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( world_table_name, (basestring)), "get_records_by_series(): Argument 'world_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( world_table_name ).__name__
assert isinstance( offset, (int, long, float)), "get_records_by_series(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_by_series(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_by_series(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_by_series(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['world_table_name'] = world_table_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/get/records/byseries', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record types and save them in the response
record_types = [ self.get_known_type( __type_id ) for __type_id in response.type_names ]
response["record_types"] = record_types
return response
# end get_records_by_series
# begin get_records_by_series_and_decode
[docs] def get_records_by_series_and_decode( self, table_name = None, world_table_name
= None, offset = 0, limit = 250,
encoding = 'binary', options = {},
force_primitive_return_types = True ):
"""Retrieves the complete series/track records from the given
input parameter *world_table_name* based on the partial track
information contained in
the input parameter *table_name*.
This operation supports paging through the data via the input parameter
*offset* and
input parameter *limit* parameters.
In contrast to :meth:`GPUdb.get_records` this returns records grouped
by
series/track. So if input parameter *offset* is 0 and input parameter
*limit* is 5 this operation
would return the first 5 series/tracks in input parameter *table_name*.
Each series/track
will be returned sorted by their TIMESTAMP column.
Parameters:
table_name (str)
Name of the table or view for which series/tracks will be
fetched, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
world_table_name (str)
Name of the table containing the complete series/track
information to be returned for the tracks present in the input
parameter *table_name*, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
Typically this is used when retrieving series/tracks from a
view (which contains partial series/tracks) but the user wants
to retrieve the entire original series/tracks. Can be blank.
offset (int)
A positive integer indicating the number of initial
series/tracks to skip (useful for paging through the results).
The default value is 0.The minimum allowed value is 0. The
maximum allowed value is MAX_INT.
limit (int)
A positive integer indicating the maximum number of
series/tracks to be returned. Or END_OF_SET (-9999) to indicate
that the max number of results should be returned. The default
value is 250.
encoding (str)
Specifies the encoding for returned records; either *binary* or
*json*.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A dict with the following entries--
table_names (list of str)
The table name (one per series/track) of the returned
series/tracks.
type_names (list of str)
The type IDs (one per series/track) of the returned
series/tracks.
type_schemas (list of str)
The type schemas (one per series/track) of the returned
series/tracks.
info (dict of str to str)
Additional information.
records (list of list of :class:`Record`)
A list of list of :class:`Record` objects which contain the
decoded records.
"""
assert isinstance( table_name, (basestring)), "get_records_by_series_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( world_table_name, (basestring)), "get_records_by_series_and_decode(): Argument 'world_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( world_table_name ).__name__
assert isinstance( offset, (int, long, float)), "get_records_by_series_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_by_series_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_by_series_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_by_series_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert isinstance(force_primitive_return_types, bool), "get_records_by_series_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records/byseries", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['world_table_name'] = world_table_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/get/records/byseries', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
_record_types = [ self.get_known_type( _type_id ) for _type_id in response.type_names ]
records = [ _rt.decode_records( raw_response, _records )
for _rt, _records in zip( _record_types, response.list_records_binary ) ]
if force_primitive_return_types:
records = [ _Util.convert_cext_records_to_ordered_dicts( _records ) for _records in records]
response["records"] = records
else:
response["records"] = [ [ json.loads(_record, object_pairs_hook = collections.OrderedDict) for _record in _records ]
for _records in response.list_records_json ]
# end if
del response["list_records_binary"]
del response["list_records_json"]
return response
# end get_records_by_series_and_decode
# begin get_records_from_collection
[docs] def get_records_from_collection( self, table_name = None, offset = 0, limit =
-9999, encoding = 'binary', options = {} ):
"""Retrieves records from a collection. The operation can optionally
return the record IDs which can be used in certain queries such as
:meth:`GPUdb.delete_records`.
This operation supports paging through the data via the input parameter
*offset* and
input parameter *limit* parameters.
Note that when using the Java API, it is not possible to retrieve
records from
join views using this operation.
(DEPRECATED)
Parameters:
table_name (str)
Name of the collection or table from which records are to be
retrieved, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing collection or table.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use input parameter *offset* & input parameter *limit* to
request subsequent pages of results. The default value is
-9999.
encoding (str)
Specifies the encoding for returned records; either *binary* or
*json*.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
The default value is an empty dict ( {} ).
Allowed keys are:
* **return_record_ids** --
If *true* then return the internal record ID along with each
returned record.
Allowed values are:
* true
* false
The default value is 'false'.
* **expression** --
Optional filter expression to apply to the table. The
default value is ''.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_names (list of str)
The type IDs of the corresponding records in output parameter
*records_binary* or output parameter *records_json*. This is
useful when input parameter *table_name* is a heterogeneous
collection (collections containing tables of different types).
records_binary (list of str)
If the encoding parameter of the request was 'binary' then this
list contains the binary encoded records retrieved from the
table/collection. Otherwise, empty list.
records_json (list of str)
If the encoding parameter of the request was 'json', then this
list contains the JSON encoded records retrieved from the
table/collection. Otherwise, empty list.
record_ids (list of str)
If the 'return_record_ids' option of the request was 'true',
then this list contains the internal ID for each object.
Otherwise it will be empty.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **total_number_of_records** --
Total number of records.
* **has_more_records** --
Too many records. Returned a partial set.
Allowed values are:
* true
* false
record_types (list of :class:`RecordType`)
A list of :class:`RecordType` objects using which the user can
decode the binarydata by using
:meth:`GPUdbRecord.decode_binary_data` per record.
"""
assert isinstance( table_name, (basestring)), "get_records_from_collection(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( offset, (int, long, float)), "get_records_from_collection(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_from_collection(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_from_collection(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_from_collection(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/get/records/fromcollection', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create the record types and save them in the response
record_types = [ self.get_known_type( __type_id ) for __type_id in response.type_names ]
response["record_types"] = record_types
return response
# end get_records_from_collection
# begin get_records_from_collection_and_decode
[docs] def get_records_from_collection_and_decode( self, table_name = None, offset = 0,
limit = -9999, encoding =
'binary', options = {},
force_primitive_return_types =
True ):
"""Retrieves records from a collection. The operation can optionally
return the record IDs which can be used in certain queries such as
:meth:`GPUdb.delete_records`.
This operation supports paging through the data via the input parameter
*offset* and
input parameter *limit* parameters.
Note that when using the Java API, it is not possible to retrieve
records from
join views using this operation.
(DEPRECATED)
Parameters:
table_name (str)
Name of the collection or table from which records are to be
retrieved, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing collection or table.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use input parameter *offset* & input parameter *limit* to
request subsequent pages of results. The default value is
-9999.
encoding (str)
Specifies the encoding for returned records; either *binary* or
*json*.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
The default value is an empty dict ( {} ).
Allowed keys are:
* **return_record_ids** --
If *true* then return the internal record ID along with each
returned record.
Allowed values are:
* true
* false
The default value is 'false'.
* **expression** --
Optional filter expression to apply to the table. The
default value is ''.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_names (list of str)
The type IDs of the corresponding records in output parameter
*records_binary* or output parameter *records_json*. This is
useful when input parameter *table_name* is a heterogeneous
collection (collections containing tables of different types).
record_ids (list of str)
If the 'return_record_ids' option of the request was 'true',
then this list contains the internal ID for each object.
Otherwise it will be empty.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **total_number_of_records** --
Total number of records.
* **has_more_records** --
Too many records. Returned a partial set.
Allowed values are:
* true
* false
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
"""
assert isinstance( table_name, (basestring)), "get_records_from_collection_and_decode(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( offset, (int, long, float)), "get_records_from_collection_and_decode(): Argument 'offset' must be (one) of type(s) '(int, long, float)'; given %s" % type( offset ).__name__
assert isinstance( limit, (int, long, float)), "get_records_from_collection_and_decode(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "get_records_from_collection_and_decode(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "get_records_from_collection_and_decode(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert isinstance(force_primitive_return_types, bool), "get_records_from_collection_and_decode: Argument 'force_primitive_return_types' must be bool; given %s" % type( force_primitive_return_types ).__name__
(REQ_SCHEMA, RSP_SCHEMA_CEXT) = self.__get_schemas( "/get/records/fromcollection", get_rsp_cext = True )
# Force JSON encoding if client encoding is json and method encoding
# is binary (checking for binary so that we do not accidentally override
# the GeoJSON encoding)
if ( (self.encoding == "JSON") and (encoding == "binary") ):
encoding = "json"
obj = {}
obj['table_name'] = table_name
obj['offset'] = offset
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response, raw_response = self.__submit_request( '/get/records/fromcollection', obj, get_rsp_cext = True, convert_to_attr_dict = True, return_raw_response_too = True )
if not response.is_ok():
return response
# Decode the data
if (encoding == 'binary'):
record_types = [ self.get_known_type( type_id ) for type_id in response.type_names ]
records = [ rt.decode_records( raw_response, records )[ 0 ]
for rt, records in zip( record_types, response.records_binary ) ]
if force_primitive_return_types:
records = _Util.convert_cext_records_to_ordered_dicts( records )
response["records"] = records
else:
response["records"] = [ json.loads(record, object_pairs_hook = collections.OrderedDict) for record in response.records_json ]
# end if
del response["records_binary"]
del response["records_json"]
return response
# end get_records_from_collection_and_decode
# begin get_vectortile
def get_vectortile( self, table_names = None, column_names = None, layers =
None, tile_x = None, tile_y = None, zoom = None, options
= {} ):
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
column_names = column_names if isinstance( column_names, list ) else ( [] if (column_names is None) else [ column_names ] )
assert isinstance( layers, (dict)), "get_vectortile(): Argument 'layers' must be (one) of type(s) '(dict)'; given %s" % type( layers ).__name__
assert isinstance( tile_x, (int, long, float)), "get_vectortile(): Argument 'tile_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( tile_x ).__name__
assert isinstance( tile_y, (int, long, float)), "get_vectortile(): Argument 'tile_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( tile_y ).__name__
assert isinstance( zoom, (int, long, float)), "get_vectortile(): Argument 'zoom' must be (one) of type(s) '(int, long, float)'; given %s" % type( zoom ).__name__
assert isinstance( options, (dict)), "get_vectortile(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_names'] = table_names
obj['column_names'] = column_names
obj['layers'] = self.__sanitize_dicts( layers )
obj['tile_x'] = tile_x
obj['tile_y'] = tile_y
obj['zoom'] = zoom
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/get/vectortile', obj, convert_to_attr_dict = True )
return response
# end get_vectortile
# begin grant_permission
[docs] def grant_permission( self, principal = '', object = None, object_type = None,
permission = None, options = {} ):
"""Grant user or role the specified permission on the specified object.
Parameters:
principal (str)
Name of the user or role for which the permission is being
granted. Must be an existing user or role. The default value
is ''.
object (str)
Name of object permission is being granted to. It is
recommended to use a fully-qualified name when possible.
object_type (str)
The type of object being granted to
Allowed values are:
* **context** --
Context
* **credential** --
Credential
* **datasink** --
Data Sink
* **datasource** --
Data Source
* **directory** --
KIFS File Directory
* **graph** --
A Graph object
* **proc** --
UDF Procedure
* **schema** --
Schema
* **sql_proc** --
SQL Procedure
* **system** --
System-level access
* **table** --
Database Table
* **table_monitor** --
Table monitor
permission (str)
Permission being granted.
Allowed values are:
* **admin** --
Full read/write and administrative access on the object.
* **connect** --
Connect access on the given data source or data sink.
* **delete** --
Delete rows from tables.
* **execute** --
Ability to Execute the Procedure object.
* **insert** --
Insert access to tables.
* **read** --
Ability to read, list and use the object.
* **update** --
Update access to the table.
* **user_admin** --
Access to administer users and roles that do not have
system_admin permission.
* **write** --
Access to write, change and delete objects.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **columns** --
Apply table security to these columns, comma-separated. The
default value is ''.
* **filter_expression** --
Optional filter expression to apply to this grant. Only rows
that match the filter will be affected. The default value is
''.
* **with_grant_option** --
Allow the recipient to grant the same permission (or subset)
to others.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
principal (str)
Value of input parameter *principal*.
object (str)
Value of input parameter *object*.
object_type (str)
Value of input parameter *object_type*.
permission (str)
Value of input parameter *permission*.
info (dict of str to str)
Additional information.
"""
assert isinstance( principal, (basestring)), "grant_permission(): Argument 'principal' must be (one) of type(s) '(basestring)'; given %s" % type( principal ).__name__
assert isinstance( object, (basestring)), "grant_permission(): Argument 'object' must be (one) of type(s) '(basestring)'; given %s" % type( object ).__name__
assert isinstance( object_type, (basestring)), "grant_permission(): Argument 'object_type' must be (one) of type(s) '(basestring)'; given %s" % type( object_type ).__name__
assert isinstance( permission, (basestring)), "grant_permission(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( options, (dict)), "grant_permission(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['principal'] = principal
obj['object'] = object
obj['object_type'] = object_type
obj['permission'] = permission
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission', obj, convert_to_attr_dict = True )
return response
# end grant_permission
# begin grant_permission_credential
[docs] def grant_permission_credential( self, name = None, permission = None,
credential_name = None, options = {} ):
"""Grants a `credential-level permission
<../../../../security/sec_concepts/#security-concepts-permissions-credential>`__
to a user or role.
Parameters:
name (str)
Name of the user or role to which the permission will be
granted. Must be an existing user or role.
permission (str)
Permission to grant to the user or role.
Allowed values are:
* **credential_admin** --
Full read/write and administrative access on the credential.
* **credential_read** --
Ability to read and use the credential.
credential_name (str)
Name of the credential on which the permission will be granted.
Must be an existing credential, or an empty string to grant
access on all credentials.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
credential_name (str)
Value of input parameter *credential_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "grant_permission_credential(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "grant_permission_credential(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( credential_name, (basestring)), "grant_permission_credential(): Argument 'credential_name' must be (one) of type(s) '(basestring)'; given %s" % type( credential_name ).__name__
assert isinstance( options, (dict)), "grant_permission_credential(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['credential_name'] = credential_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission/credential', obj, convert_to_attr_dict = True )
return response
# end grant_permission_credential
# begin grant_permission_datasource
[docs] def grant_permission_datasource( self, name = None, permission = None,
datasource_name = None, options = {} ):
"""Grants a `data source <../../../../concepts/data_sources/>`__
permission to a user or role.
Parameters:
name (str)
Name of the user or role to which the permission will be
granted. Must be an existing user or role.
permission (str)
Permission to grant to the user or role
Allowed values are:
* **admin** --
Admin access on the given data source
* **connect** --
Connect access on the given data source
datasource_name (str)
Name of the data source on which the permission will be
granted. Must be an existing data source, or an empty string to
grant permission on all data sources.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
datasource_name (str)
Value of input parameter *datasource_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "grant_permission_datasource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "grant_permission_datasource(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( datasource_name, (basestring)), "grant_permission_datasource(): Argument 'datasource_name' must be (one) of type(s) '(basestring)'; given %s" % type( datasource_name ).__name__
assert isinstance( options, (dict)), "grant_permission_datasource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['datasource_name'] = datasource_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission/datasource', obj, convert_to_attr_dict = True )
return response
# end grant_permission_datasource
# begin grant_permission_directory
[docs] def grant_permission_directory( self, name = None, permission = None,
directory_name = None, options = {} ):
"""Grants a `KiFS <../../../../tools/kifs/>`__ directory-level permission
to a user or role.
Parameters:
name (str)
Name of the user or role to which the permission will be
granted. Must be an existing user or role.
permission (str)
Permission to grant to the user or role.
Allowed values are:
* **directory_read** --
For files in the directory, access to list files, download
files, or use files in server side functions
* **directory_write** --
Access to upload files to, or delete files from, the
directory. A user or role with write access automatically has
read access
directory_name (str)
Name of the KiFS directory to which the permission grants
access. An empty directory name grants access to all KiFS
directories
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
directory_name (str)
Value of input parameter *directory_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "grant_permission_directory(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "grant_permission_directory(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( directory_name, (basestring)), "grant_permission_directory(): Argument 'directory_name' must be (one) of type(s) '(basestring)'; given %s" % type( directory_name ).__name__
assert isinstance( options, (dict)), "grant_permission_directory(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['directory_name'] = directory_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission/directory', obj, convert_to_attr_dict = True )
return response
# end grant_permission_directory
# begin grant_permission_proc
[docs] def grant_permission_proc( self, name = None, permission = None, proc_name =
None, options = {} ):
"""Grants a proc-level permission to a user or role.
Parameters:
name (str)
Name of the user or role to which the permission will be
granted. Must be an existing user or role.
permission (str)
Permission to grant to the user or role.
Allowed values are:
* **proc_admin** --
Admin access to the proc.
* **proc_execute** --
Execute access to the proc.
proc_name (str)
Name of the proc to which the permission grants access. Must be
an existing proc, or an empty string to grant access to all
procs.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
proc_name (str)
Value of input parameter *proc_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "grant_permission_proc(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "grant_permission_proc(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( proc_name, (basestring)), "grant_permission_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( options, (dict)), "grant_permission_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['proc_name'] = proc_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission/proc', obj, convert_to_attr_dict = True )
return response
# end grant_permission_proc
# begin grant_permission_system
[docs] def grant_permission_system( self, name = None, permission = None, options = {}
):
"""Grants a system-level permission to a user or role.
Parameters:
name (str)
Name of the user or role to which the permission will be
granted. Must be an existing user or role.
permission (str)
Permission to grant to the user or role.
Allowed values are:
* **system_admin** --
Full access to all data and system functions.
* **system_user_admin** --
Access to administer users and roles that do not have
system_admin permission.
* **system_write** --
Read and write access to all tables.
* **system_read** --
Read-only access to all tables.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "grant_permission_system(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "grant_permission_system(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( options, (dict)), "grant_permission_system(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission/system', obj, convert_to_attr_dict = True )
return response
# end grant_permission_system
# begin grant_permission_table
[docs] def grant_permission_table( self, name = None, permission = None, table_name =
None, filter_expression = '', options = {} ):
"""Grants a table-level permission to a user or role.
Parameters:
name (str)
Name of the user or role to which the permission will be
granted. Must be an existing user or role.
permission (str)
Permission to grant to the user or role.
Allowed values are:
* **table_admin** --
Full read/write and administrative access to the table.
* **table_insert** --
Insert access to the table.
* **table_update** --
Update access to the table.
* **table_delete** --
Delete access to the table.
* **table_read** --
Read access to the table.
table_name (str)
Name of the table to which the permission grants access, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table, view, or schema. If a schema, the
permission also applies to tables and views in the schema.
filter_expression (str)
Optional filter expression to apply to this grant. Only rows
that match the filter will be affected. The default value is
''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **columns** --
Apply security to these columns, comma-separated. The
default value is ''.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
table_name (str)
Value of input parameter *table_name*.
filter_expression (str)
Value of input parameter *filter_expression*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "grant_permission_table(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "grant_permission_table(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( table_name, (basestring)), "grant_permission_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( filter_expression, (basestring)), "grant_permission_table(): Argument 'filter_expression' must be (one) of type(s) '(basestring)'; given %s" % type( filter_expression ).__name__
assert isinstance( options, (dict)), "grant_permission_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['table_name'] = table_name
obj['filter_expression'] = filter_expression
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/permission/table', obj, convert_to_attr_dict = True )
return response
# end grant_permission_table
# begin grant_role
[docs] def grant_role( self, role = None, member = None, options = {} ):
"""Grants membership in a role to a user or role.
Parameters:
role (str)
Name of the role in which membership will be granted. Must be
an existing role.
member (str)
Name of the user or role that will be granted membership in
input parameter *role*. Must be an existing user or role.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
role (str)
Value of input parameter *role*.
member (str)
Value of input parameter *member*.
info (dict of str to str)
Additional information.
"""
assert isinstance( role, (basestring)), "grant_role(): Argument 'role' must be (one) of type(s) '(basestring)'; given %s" % type( role ).__name__
assert isinstance( member, (basestring)), "grant_role(): Argument 'member' must be (one) of type(s) '(basestring)'; given %s" % type( member ).__name__
assert isinstance( options, (dict)), "grant_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['role'] = role
obj['member'] = member
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/grant/role', obj, convert_to_attr_dict = True )
return response
# end grant_role
# begin has_permission
[docs] def has_permission( self, principal = '', object = None, object_type = None,
permission = None, options = {} ):
"""Checks if the specified user has the specified permission on the
specified object.
Parameters:
principal (str)
Name of the user for which the permission is being checked.
Must be an existing user. If blank, will use the current user.
The default value is ''.
object (str)
Name of object to check for the requested permission. It is
recommended to use a fully-qualified name when possible.
object_type (str)
The type of object being checked
Allowed values are:
* **context** --
Context
* **credential** --
Credential
* **datasink** --
Data Sink
* **datasource** --
Data Source
* **directory** --
KiFS File Directory
* **graph** --
A Graph object
* **proc** --
UDF Procedure
* **schema** --
Schema
* **sql_proc** --
SQL Procedure
* **system** --
System-level access
* **table** --
Database Table
* **table_monitor** --
Table monitor
permission (str)
Permission to check for.
Allowed values are:
* **admin** --
Full read/write and administrative access on the object.
* **connect** --
Connect access on the given data source or data sink.
* **delete** --
Delete rows from tables.
* **execute** --
Ability to Execute the Procedure object.
* **insert** --
Insert access to tables.
* **read** --
Ability to read, list and use the object.
* **update** --
Update access to the table.
* **user_admin** --
Access to administer users and roles that do not have
system_admin permission.
* **write** --
Access to write, change and delete objects.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *false* will return an error if the provided input
parameter *object* does not exist or is blank. If *true* then
it will return *false* for output parameter *has_permission*.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
principal (str)
Value of input parameter *principal*
object (str)
Fully-qualified value of input parameter *object*
object_type (str)
Value of input parameter *object_type*
permission (str)
Value of input parameter *permission*
has_permission (bool)
Indicates whether the specified user has the specified
permission on the specified target.
Allowed values are:
* **true** --
User has the effective queried permission
* **false** --
User does not have the queried permission
filters (dict of str to str)
Map of column/filters that have been granted.
info (dict of str to str)
Additional information.
"""
assert isinstance( principal, (basestring)), "has_permission(): Argument 'principal' must be (one) of type(s) '(basestring)'; given %s" % type( principal ).__name__
assert isinstance( object, (basestring)), "has_permission(): Argument 'object' must be (one) of type(s) '(basestring)'; given %s" % type( object ).__name__
assert isinstance( object_type, (basestring)), "has_permission(): Argument 'object_type' must be (one) of type(s) '(basestring)'; given %s" % type( object_type ).__name__
assert isinstance( permission, (basestring)), "has_permission(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( options, (dict)), "has_permission(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['principal'] = principal
obj['object'] = object
obj['object_type'] = object_type
obj['permission'] = permission
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/has/permission', obj, convert_to_attr_dict = True )
return response
# end has_permission
# begin has_proc
[docs] def has_proc( self, proc_name = None, options = {} ):
"""Checks the existence of a proc with the given name.
Parameters:
proc_name (str)
Name of the proc to check for existence.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
proc_name (str)
Value of input parameter *proc_name*
proc_exists (bool)
Indicates whether the proc exists or not.
Allowed values are:
* true
* false
info (dict of str to str)
Additional information.
"""
assert isinstance( proc_name, (basestring)), "has_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( options, (dict)), "has_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['proc_name'] = proc_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/has/proc', obj, convert_to_attr_dict = True )
return response
# end has_proc
# begin has_role
[docs] def has_role( self, principal = '', role = None, options = {} ):
"""Checks if the specified user has the specified role.
Parameters:
principal (str)
Name of the user for which role membersih is being checked.
Must be an existing user. If blank, will use the current user.
The default value is ''.
role (str)
Name of role to check for membership.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *false* will return an error if the provided input
parameter *role* does not exist or is blank. If *true* then
it will return *false* for output parameter *has_role*.
Allowed values are:
* true
* false
The default value is 'false'.
* **only_direct** --
If *false* will search recursively if the input parameter
*principal* is a member of input parameter *role*. If *true*
then input parameter *principal* must directly be a member of
input parameter *role*.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
principal (str)
Value of input parameter *principal*
role (str)
input parameter *role* for which membership is being checked
has_role (bool)
Indicates whether the specified user has membership in the
specified target input parameter *role*.
Allowed values are:
* **true** --
User has membership in the role
* **false** --
User does not have membership in the role
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **direct** --
*true* when principal is directly a member of the role.
Allowed values are:
* true
* false
The default value is 'false'.
"""
assert isinstance( principal, (basestring)), "has_role(): Argument 'principal' must be (one) of type(s) '(basestring)'; given %s" % type( principal ).__name__
assert isinstance( role, (basestring)), "has_role(): Argument 'role' must be (one) of type(s) '(basestring)'; given %s" % type( role ).__name__
assert isinstance( options, (dict)), "has_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['principal'] = principal
obj['role'] = role
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/has/role', obj, convert_to_attr_dict = True )
return response
# end has_role
# begin has_schema
[docs] def has_schema( self, schema_name = None, options = {} ):
"""Checks for the existence of a schema with the given name.
Parameters:
schema_name (str)
Name of the schema to check for existence, in root, using
standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
schema_name (str)
Value of input parameter *schema_name*
schema_exists (bool)
Indicates whether the schema exists or not.
Allowed values are:
* true
* false
info (dict of str to str)
Additional information.
"""
assert isinstance( schema_name, (basestring)), "has_schema(): Argument 'schema_name' must be (one) of type(s) '(basestring)'; given %s" % type( schema_name ).__name__
assert isinstance( options, (dict)), "has_schema(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['schema_name'] = schema_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/has/schema', obj, convert_to_attr_dict = True )
return response
# end has_schema
# begin has_table
[docs] def has_table( self, table_name = None, options = {} ):
"""Checks for the existence of a table with the given name.
Parameters:
table_name (str)
Name of the table to check for existence, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*
table_exists (bool)
Indicates whether the table exists or not.
Allowed values are:
* true
* false
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "has_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( options, (dict)), "has_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/has/table', obj, convert_to_attr_dict = True )
return response
# end has_table
# begin has_type
[docs] def has_type( self, type_id = None, options = {} ):
"""Check for the existence of a type.
Parameters:
type_id (str)
Id of the type returned in response to
:meth:`GPUdb.create_type` request.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
type_id (str)
Value of input parameter *type_id*.
type_exists (bool)
Indicates whether the type exists or not.
Allowed values are:
* true
* false
info (dict of str to str)
Additional information.
"""
assert isinstance( type_id, (basestring)), "has_type(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__
assert isinstance( options, (dict)), "has_type(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['type_id'] = type_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/has/type', obj, convert_to_attr_dict = True )
return response
# end has_type
# begin import_model
def import_model( self, model_name = None, registry_name = None, container =
None, run_function = None, model_type = None, options = {}
):
assert isinstance( model_name, (basestring)), "import_model(): Argument 'model_name' must be (one) of type(s) '(basestring)'; given %s" % type( model_name ).__name__
assert isinstance( registry_name, (basestring)), "import_model(): Argument 'registry_name' must be (one) of type(s) '(basestring)'; given %s" % type( registry_name ).__name__
assert isinstance( container, (basestring)), "import_model(): Argument 'container' must be (one) of type(s) '(basestring)'; given %s" % type( container ).__name__
assert isinstance( run_function, (basestring)), "import_model(): Argument 'run_function' must be (one) of type(s) '(basestring)'; given %s" % type( run_function ).__name__
assert isinstance( model_type, (basestring)), "import_model(): Argument 'model_type' must be (one) of type(s) '(basestring)'; given %s" % type( model_type ).__name__
assert isinstance( options, (dict)), "import_model(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['model_name'] = model_name
obj['registry_name'] = registry_name
obj['container'] = container
obj['run_function'] = run_function
obj['model_type'] = model_type
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/import/model', obj, convert_to_attr_dict = True )
return response
# end import_model
# begin insert_records
[docs] def insert_records( self, table_name = None, data = None, list_encoding = None,
options = {}, record_type = None ):
"""Adds multiple records to the specified table. The operation is
synchronous, meaning that a response will not be returned until all the
records
are fully inserted and available. The response payload provides the
counts of
the number of records actually inserted and/or updated, and can provide
the
unique identifier of each added record.
The input parameter *options* parameter can be used to customize this
function's
behavior.
The *update_on_existing_pk* option specifies the record
collision policy for inserting into a table with a
`primary key <../../../../concepts/tables/#primary-keys>`__, but is
ignored if
no primary key exists.
The *return_record_ids* option indicates that the
database should return the unique identifiers of inserted records.
Parameters:
table_name (str)
Name of table to which the records are to be added, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
data (list of Records)
An array of *binary* or *json* encoded data, or :class:`Record`
objects for the records to be added. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list. The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
list_encoding (str)
The encoding of the records to be inserted.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into a
table
with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted will
be replaced by that new record (the new
data will be "upserted"). If set to *false*,
any existing table record with primary key values that match
those of a record being inserted will
remain unchanged, while the new record will be rejected and
the error handled as determined by
*ignore_existing_pk*, *allow_partial_batch*, &
*return_individual_errors*. If the specified table does not
have a primary
key, then this option has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when
not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any record being inserted that is rejected
for having primary key values that match those of an existing
table record will be ignored with no
error generated. If *false*, the rejection of any
record for having primary key values matching an existing
record will result in an error being
reported, as determined by *allow_partial_batch* &
*return_individual_errors*. If the specified table does not
have a primary key or if upsert mode is in effect
(*update_on_existing_pk* is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore new records whose primary key values collide with
those of existing records
* **false** --
Treat as errors any new records whose primary key values
collide with those of existing records
The default value is 'false'.
* **return_record_ids** --
If *true* then return the internal record id along for each
inserted record.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_strings** --
If set to *true*, any strings which are too long for their
target charN string columns will be truncated to fit.
Allowed values are:
* true
* false
The default value is 'false'.
* **return_individual_errors** --
If set to *true*, success will always be returned, and any
errors found will be included in the info map. The
"bad_record_indices" entry is a comma-separated list of bad
records (0-based). And if so, there will also be an
"error_N" entry for each record with an error, where N is the
index (0-based).
Allowed values are:
* true
* false
The default value is 'false'.
* **allow_partial_batch** --
If set to *true*, all correct records will be inserted and
incorrect records will be rejected and reported. Otherwise,
the entire batch will be rejected if any records are
incorrect.
Allowed values are:
* true
* false
The default value is 'false'.
* **dry_run** --
If set to *true*, no data will be saved and any errors will
be returned.
Allowed values are:
* true
* false
The default value is 'false'.
record_type (RecordType)
A :class:`RecordType` object using which the binary data will
be encoded. If None, then it is assumed that the data is
already encoded, and no further encoding will occur. Default
is None.
Returns:
A dict with the following entries--
record_ids (list of str)
An array containing the IDs with which the added records are
identified internally.
count_inserted (int)
The number of records inserted.
count_updated (int)
The number of records updated.
info (dict of str to str)
Additional information.
Allowed keys are:
* **bad_record_indices** --
If return_individual_errors option is specified or implied,
returns a comma-separated list of invalid indices (0-based)
* **error_N** --
Error message for record at index N (0-based)
"""
assert isinstance( table_name, (basestring)), "insert_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
data = data if isinstance( data, list ) else ( [] if (data is None) else [ data ] )
assert isinstance( list_encoding, (basestring, type( None ))), "insert_records(): Argument 'list_encoding' must be (one) of type(s) '(basestring, type( None ))'; given %s" % type( list_encoding ).__name__
assert isinstance( options, (dict)), "insert_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "insert_records: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
obj = {}
obj['table_name'] = table_name
list_encoding = list_encoding if list_encoding else self.__client_to_object_encoding()
obj['list_encoding'] = list_encoding
obj['options'] = self.__sanitize_dicts( options )
if (list_encoding == 'binary'):
# Convert the objects to proper Records
use_object_array, data = _Util.convert_binary_data_to_cext_records( self, table_name, data, record_type )
if use_object_array:
# First tuple element must be a RecordType or a Schema from the c-extension
obj['list'] = (data[0].type, data) if data else ()
else: # use avro-encoded bytes for the data
obj['list'] = data
obj['list_str'] = []
else:
obj['list_str'] = data
obj['list'] = () # needs a tuple for the c-extension
use_object_array = True
# end if
if use_object_array:
response = self.__submit_request( '/insert/records', obj, convert_to_attr_dict = True, get_req_cext = True )
else:
response = self.__submit_request( '/insert/records', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
return response
# end insert_records
# begin insert_records_from_files
[docs] def insert_records_from_files( self, table_name = None, filepaths = None,
modify_columns = {}, create_table_options =
{}, options = {} ):
"""Reads from one or more files and inserts the data into a new or
existing table.
The source data can be located either in `KiFS
<../../../../tools/kifs/>`__; on the cluster, accessible to
the database; or remotely, accessible via a pre-defined external
`data source <../../../../concepts/data_sources/>`__.
For delimited text files, there are two loading schemes: positional and
name-based. The name-based
loading scheme is enabled when the file has a header present and
*text_has_header* is set to
*true*. In this scheme, the source file(s) field names
must match the target table's column names exactly; however, the source
file can have more fields
than the target table has columns. If *error_handling* is set to
*permissive*, the source file can have fewer fields
than the target table has columns. If the name-based loading scheme is
being used, names matching
the file header's names may be provided to *columns_to_load* instead of
numbers, but ranges are not supported.
Note: Due to data being loaded in parallel, there is no insertion order
guaranteed. For tables with
primary keys, in the case of a primary key collision, this means it is
indeterminate which record
will be inserted first and remain, while the rest of the colliding key
records are discarded.
Returns once all files are processed.
Parameters:
table_name (str)
Name of the table into which the data will be inserted, in
[schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
If the table does not exist, the table will be created using
either an existing
*type_id* or the type inferred from the
file, and the new table name will have to meet standard
`table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
filepaths (list of str)
A list of file paths from which data will be sourced;
For paths in `KiFS <../../../../tools/kifs/>`__, use the uri
prefix of kifs:// followed by the path to
a file or directory. File matching by prefix is supported, e.g.
kifs://dir/file would match dir/file_1
and dir/file_2. When prefix matching is used, the path must
start with a full, valid KiFS directory name.
If an external data source is specified in *datasource_name*,
these file
paths must resolve to accessible files at that data source
location. Prefix matching is supported.
If the data source is hdfs, prefixes must be aligned with
directories, i.e. partial file names will
not match.
If no data source is specified, the files are assumed to be
local to the database and must all be
accessible to the gpudb user, residing on the path (or relative
to the path) specified by the
external files directory in the Kinetica
`configuration file
<../../../../config/#config-main-external-files>`__. Wildcards
(*) can be used to
specify a group of files. Prefix matching is supported, the
prefixes must be aligned with
directories.
If the first path ends in .tsv, the text delimiter will be
defaulted to a tab character.
If the first path ends in .psv, the text delimiter will be
defaulted to a pipe character (|). The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
modify_columns (dict of str to dicts of str to str)
Not implemented yet. The default value is an empty dict ( {}
).
create_table_options (dict of str to str)
Options from :meth:`GPUdb.create_table`, allowing the structure
of the table to
be defined independently of the data source, when creating the
target table. The default value is an empty dict ( {} ).
Allowed keys are:
* **type_id** --
ID of a currently registered `type
<../../../../concepts/types/>`__.
* **no_error_if_exists** --
If *true*,
prevents an error from occurring if the table already exists
and is of the given type. If a table with
the same name but a different type exists, it is still an
error.
Allowed values are:
* true
* false
The default value is 'false'.
* **is_replicated** --
Affects the `distribution scheme
<../../../../concepts/tables/#distribution>`__
for the table's data. If *true* and the
given table has no explicit `shard key
<../../../../concepts/tables/#shard-key>`__ defined, the
table will be `replicated
<../../../../concepts/tables/#replication>`__. If
*false*, the table will be
`sharded <../../../../concepts/tables/#sharding>`__ according
to the shard key specified in the
given *type_id*, or
`randomly sharded
<../../../../concepts/tables/#random-sharding>`__, if no
shard key is specified.
Note that a type containing a shard key cannot be used to
create a replicated table.
Allowed values are:
* true
* false
The default value is 'false'.
* **foreign_keys** --
Semicolon-separated list of
`foreign keys <../../../../concepts/tables/#foreign-keys>`__,
of the format
'(source_column_name [, ...]) references
target_table_name(primary_key_column_name [, ...]) [as
foreign_key_name]'.
* **foreign_shard_key** --
Foreign shard key of the format
'source_column references shard_by_column from
target_table(primary_key_column)'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or
column expressions by which records will be assigned to
partitions defined by
*partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends
on the choice of *partition_type*. See
`range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__,
`hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*,
a new partition will be created for values which don't fall
into an existing partition. Currently,
only supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in input parameter *table_name*.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
table.
* **is_result_table** --
Indicates whether the table is a
`memory-only table
<../../../../concepts/tables_memory_only/>`__. A result table
cannot contain columns with
store_only or text_search `data-handling
<../../../../concepts/types/#data-handling>`__ or that are
`non-charN strings
<../../../../concepts/types/#primitive-types>`__, and it will
not be retained if the
server is restarted.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__
for the table and its columns.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **bad_record_table_name** --
Name of a table to which records that were rejected are
written.
The bad-record-table has the following columns: line_number
(long), line_rejected (string),
error_message (string). When *error_handling* is
*abort*, bad records table is not populated.
* **bad_record_table_limit** --
A positive integer indicating the maximum number of records
that can be
written to the bad-record-table. The default value is
'10000'.
* **bad_record_table_limit_per_input** --
For subscriptions, a positive integer indicating the maximum
number
of records that can be written to the bad-record-table per
file/payload. Default value will be
*bad_record_table_limit* and total size of the table per rank
is limited to
*bad_record_table_limit*.
* **batch_size** --
Number of records to insert per batch when inserting data.
The default value is '50000'.
* **column_formats** --
For each target column specified, applies the
column-property-bound
format to the source data loaded into that column. Each
column format will contain a mapping of one
or more of its column properties to an appropriate format for
each property. Currently supported
column properties include date, time, & datetime. The
parameter value must be formatted as a JSON
string of maps of column names to maps of column properties
to their corresponding column formats,
e.g.,
'{ "order_date" : { "date" : "%Y.%m.%d" }, "order_time" : {
"time" : "%H:%M:%S" } }'.
See *default_column_formats* for valid format syntax.
* **columns_to_load** --
Specifies a comma-delimited list of columns from the source
data to
load. If more than one file is being loaded, this list
applies to all files.
Column numbers can be specified discretely or as a range.
For example, a value of '5,7,1..3' will
insert values from the fifth column in the source data into
the first column in the target table,
from the seventh column in the source data into the second
column in the target table, and from the
first through third columns in the source data into the third
through fifth columns in the target
table.
If the source data contains a header, column names matching
the file header names may be provided
instead of column numbers. If the target table doesn't
exist, the table will be created with the
columns in this order. If the target table does exist with
columns in a different order than the
source data, this list can be used to match the order of the
target table. For example, a value of
'C, B, A' will create a three column table with column C,
followed by column B, followed by column
A; or will insert those fields in that order into a table
created with columns in that order. If
the target table exists, the column names must match the
source data field names for a name-mapping
to be successful.
Mutually exclusive with *columns_to_skip*.
* **columns_to_skip** --
Specifies a comma-delimited list of columns from the source
data to
skip. Mutually exclusive with *columns_to_load*.
* **compression_type** --
Source data compression type.
Allowed values are:
* **none** --
No compression.
* **auto** --
Auto detect compression type
* **gzip** --
gzip file compression.
* **bzip2** --
bzip2 file compression.
The default value is 'auto'.
* **datasource_name** --
Name of an existing external data source from which data
file(s) specified in input parameter *filepaths* will be
loaded
* **default_column_formats** --
Specifies the default format to be applied to source data
loaded
into columns with the corresponding column property.
Currently supported column properties include
date, time, & datetime. This default column-property-bound
format can be overridden by specifying a
column property & format for a given target column in
*column_formats*. For
each specified annotation, the format will apply to all
columns with that annotation unless a custom
*column_formats* for that annotation is specified.
The parameter value must be formatted as a JSON string that
is a map of column properties to their
respective column formats, e.g., '{ "date" : "%Y.%m.%d",
"time" : "%H:%M:%S" }'. Column
formats are specified as a string of control characters and
plain text. The supported control
characters are 'Y', 'm', 'd', 'H', 'M', 'S', and 's', which
follow the Linux 'strptime()'
specification, as well as 's', which specifies seconds and
fractional seconds (though the fractional
component will be truncated past milliseconds).
Formats for the 'date' annotation must include the 'Y', 'm',
and 'd' control characters. Formats for
the 'time' annotation must include the 'H', 'M', and either
'S' or 's' (but not both) control
characters. Formats for the 'datetime' annotation meet both
the 'date' and 'time' control character
requirements. For example, '{"datetime" : "%m/%d/%Y %H:%M:%S"
}' would be used to interpret text
as "05/04/2000 12:12:11"
* **error_handling** --
Specifies how errors should be handled upon insertion.
Allowed values are:
* **permissive** --
Records with missing columns are populated with nulls if
possible; otherwise, the malformed records are skipped.
* **ignore_bad_records** --
Malformed records are skipped.
* **abort** --
Stops current insertion and aborts entire operation when an
error is encountered. Primary key collisions are
considered abortable errors in this mode.
The default value is 'abort'.
* **file_type** --
Specifies the type of the file(s) whose records will be
inserted.
Allowed values are:
* **avro** --
Avro file format
* **delimited_text** --
Delimited text file format; e.g., CSV, TSV, PSV, etc.
* **gdb** --
Esri/GDB file format
* **json** --
Json file format
* **parquet** --
Apache Parquet file format
* **shapefile** --
ShapeFile file format
The default value is 'delimited_text'.
* **gdal_configuration_options** --
Comma separated list of gdal conf options, for the specific
requets: key=value
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when
not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any record being inserted that is rejected
for having primary key values that match those of an existing
table record will be ignored with no
error generated. If *false*, the rejection of any
record for having primary key values matching an existing
record will result in an error being
reported, as determined by *error_handling*. If the
specified table does not
have a primary key or if upsert mode is in effect
(*update_on_existing_pk* is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore new records whose primary key values collide with
those of existing records
* **false** --
Treat as errors any new records whose primary key values
collide with those of existing records
The default value is 'false'.
* **ingestion_mode** --
Whether to do a full load, dry run, or perform a type
inference on the source data.
Allowed values are:
* **full** --
Run a type inference on the source data (if needed) and
ingest
* **dry_run** --
Does not load data, but walks through the source data and
determines the number of valid records, taking into account
the current mode of *error_handling*.
* **type_inference_only** --
Infer the type of the source data and return, without
ingesting any data. The inferred type is returned in the
response.
The default value is 'full'.
* **kafka_consumers_per_rank** --
Number of Kafka consumer threads per rank (valid range 1-6).
The default value is '1'.
* **kafka_group_id** --
The group id to be used when consuming data from a Kafka
topic (valid only for Kafka datasource subscriptions).
* **kafka_offset_reset_policy** --
Policy to determine whether the Kafka data consumption starts
either at earliest offset or latest offset.
Allowed values are:
* earliest
* latest
The default value is 'earliest'.
* **kafka_optimistic_ingest** --
Enable optimistic ingestion where Kafka topic offsets and
table data are committed independently to achieve
parallelism.
Allowed values are:
* true
* false
The default value is 'false'.
* **kafka_subscription_cancel_after** --
Sets the Kafka subscription lifespan (in minutes). Expired
subscription will be cancelled automatically.
* **kafka_type_inference_fetch_timeout** --
Maximum time to collect Kafka messages before type
inferencing on the set of them.
* **layer** --
Geo files layer(s) name(s): comma separated.
* **loading_mode** --
Scheme for distributing the extraction and loading of data
from the source data file(s). This option applies only when
loading files that are local to the database.
Allowed values are:
* **head** --
The head node loads all data. All files must be available
to the head node.
* **distributed_shared** --
The head node coordinates loading data by worker
processes across all nodes from shared files available to
all workers.
NOTE:
Instead of existing on a shared source, the files can be
duplicated on a source local to each host
to improve performance, though the files must appear as the
same data set from the perspective of
all hosts performing the load.
* **distributed_local** --
A single worker process on each node loads all files
that are available to it. This option works best when each
worker loads files from its own file
system, to maximize performance. In order to avoid data
duplication, either each worker performing
the load needs to have visibility to a set of files unique
to it (no file is visible to more than
one node) or the target table needs to have a primary key
(which will allow the worker to
automatically deduplicate data).
NOTE:
If the target table doesn't exist, the table structure will
be determined by the head node. If the
head node has no files local to it, it will be unable to
determine the structure and the request
will fail.
If the head node is configured to have no worker processes,
no data strictly accessible to the head
node will be loaded.
The default value is 'head'.
* **local_time_offset** --
Apply an offset to Avro local timestamp columns.
* **max_records_to_load** --
Limit the number of records to load in this request: if this
number
is larger than *batch_size*, then the number of records
loaded will be
limited to the next whole number of *batch_size* (per working
thread).
* **num_tasks_per_rank** --
Number of tasks for reading file per rank. Default will be
system configuration parameter,
external_file_reader_num_tasks.
* **poll_interval** --
If *true*, the number of
seconds between attempts to load external files into the
table. If zero, polling will be continuous
as long as data is found. If no data is found, the interval
will steadily increase to a maximum of
60 seconds. The default value is '0'.
* **primary_keys** --
Comma separated list of column names to set as primary keys,
when not specified in the type.
* **schema_registry_schema_name** --
Name of the Avro schema in the schema registry to use when
reading Avro records.
* **shard_keys** --
Comma separated list of column names to set as shard keys,
when not specified in the type.
* **skip_lines** --
Skip number of lines from begining of file.
* **subscribe** --
Continuously poll the data source to check for new data and
load it into the table.
Allowed values are:
* true
* false
The default value is 'false'.
* **table_insert_mode** --
Insertion scheme to use when inserting records from multiple
shapefiles.
Allowed values are:
* **single** --
Insert all records into a single table.
* **table_per_file** --
Insert records from each file into a new table
corresponding to that file.
The default value is 'single'.
* **text_comment_string** --
Specifies the character string that should be interpreted as
a comment line
prefix in the source data. All lines in the data starting
with the provided string are ignored.
For *delimited_text* *file_type* only. The default value is
'#'.
* **text_delimiter** --
Specifies the character delimiting field values in the source
data
and field names in the header (if present).
For *delimited_text* *file_type* only. The default value is
','.
* **text_escape_character** --
Specifies the character that is used to escape other
characters in
the source data.
An 'a', 'b', 'f', 'n', 'r', 't', or 'v' preceded by an escape
character will be interpreted as the
ASCII bell, backspace, form feed, line feed, carriage return,
horizontal tab, & vertical tab,
respectively. For example, the escape character followed by
an 'n' will be interpreted as a newline
within a field value.
The escape character can also be used to escape the quoting
character, and will be treated as an
escape character whether it is within a quoted field value or
not.
For *delimited_text* *file_type* only.
* **text_has_header** --
Indicates whether the source data contains a header row.
For *delimited_text* *file_type* only.
Allowed values are:
* true
* false
The default value is 'true'.
* **text_header_property_delimiter** --
Specifies the delimiter for
`column properties
<../../../../concepts/types/#column-properties>`__ in the
header row (if
present). Cannot be set to same value as *text_delimiter*.
For *delimited_text* *file_type* only. The default value is
'|'.
* **text_null_string** --
Specifies the character string that should be interpreted as
a null
value in the source data.
For *delimited_text* *file_type* only. The default value is
'\\N'.
* **text_quote_character** --
Specifies the character that should be interpreted as a field
value
quoting character in the source data. The character must
appear at beginning and end of field value
to take effect. Delimiters within quoted fields are treated
as literals and not delimiters. Within
a quoted field, two consecutive quote characters will be
interpreted as a single literal quote
character, effectively escaping it. To not have a quote
character, specify an empty string.
For *delimited_text* *file_type* only. The default value is
'"'.
* **text_search_columns** --
Add 'text_search' property to internally inferenced string
columns.
Comma seperated list of column names or '*' for all columns.
To add 'text_search' property only to
string columns greater than or equal to a minimum size, also
set the
*text_search_min_column_length*
* **text_search_min_column_length** --
Set the minimum column size for strings to apply the
'text_search' property to. Used only when
*text_search_columns* has a value.
* **truncate_strings** --
If set to *true*, truncate string values that are longer than
the column's type size.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_table** --
If set to *true*, truncates the table specified by input
parameter *table_name* prior to loading the file(s).
Allowed values are:
* true
* false
The default value is 'false'.
* **type_inference_mode** --
Optimize type inferencing for either speed or accuracy.
Allowed values are:
* **accuracy** --
Scans data to get exactly-typed & sized columns for all
data scanned.
* **speed** --
Scans data and picks the widest possible column types so
that 'all' values will fit with minimum data scanned
The default value is 'speed'.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into a
table
with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted will
be replaced by that new record (the new
data will be 'upserted'). If set to *false*,
any existing table record with primary key values that match
those of a record being inserted will
remain unchanged, while the new record will be rejected and
the error handled as determined by
*ignore_existing_pk* & *error_handling*. If the
specified table does not have a primary key, then this option
has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_id (str)
ID of the currently registered table structure `type
<../../../../concepts/types/>`__ for the target table
type_definition (str)
A JSON string describing the columns of the target table
type_label (str)
The user-defined description associated with the target table's
structure
type_properties (dict of str to lists of str)
A mapping of each target table column name to an array of
column properties associated with that column
count_inserted (long)
Number of records inserted into the target table.
count_skipped (long)
Number of records skipped, when not running in *abort* error
handling mode.
count_updated (long)
[Not yet implemented] Number of records updated within the
target table.
info (dict of str to str)
Additional information.
files (list of str)
"""
assert isinstance( table_name, (basestring)), "insert_records_from_files(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
filepaths = filepaths if isinstance( filepaths, list ) else ( [] if (filepaths is None) else [ filepaths ] )
assert isinstance( modify_columns, (dict)), "insert_records_from_files(): Argument 'modify_columns' must be (one) of type(s) '(dict)'; given %s" % type( modify_columns ).__name__
assert isinstance( create_table_options, (dict)), "insert_records_from_files(): Argument 'create_table_options' must be (one) of type(s) '(dict)'; given %s" % type( create_table_options ).__name__
assert isinstance( options, (dict)), "insert_records_from_files(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['filepaths'] = filepaths
obj['modify_columns'] = self.__sanitize_dicts( modify_columns )
obj['create_table_options'] = self.__sanitize_dicts( create_table_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/insert/records/fromfiles', obj, convert_to_attr_dict = True )
return response
# end insert_records_from_files
# begin insert_records_from_payload
[docs] def insert_records_from_payload( self, table_name = None, data_text = None,
data_bytes = None, modify_columns = {},
create_table_options = {}, options = {} ):
"""Reads from the given text-based or binary payload and inserts the
data into a new or existing table. The table will be created if it
doesn't
already exist.
Returns once all records are processed.
Parameters:
table_name (str)
Name of the table into which the data will be inserted, in
[schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
If the table does not exist, the table will be created using
either an existing
*type_id* or the type inferred from the
payload, and the new table name will have to meet standard
`table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
data_text (str)
Records formatted as delimited text
data_bytes (bytes)
Records formatted as binary data
modify_columns (dict of str to dicts of str to str)
Not implemented yet. The default value is an empty dict ( {}
).
create_table_options (dict of str to str)
Options used when creating the target table. Includes type to
use. The other options match those in
:meth:`GPUdb.create_table`. The default value is an empty dict
( {} ).
Allowed keys are:
* **type_id** --
ID of a currently registered `type
<../../../../concepts/types/>`__. The default value is ''.
* **no_error_if_exists** --
If *true*, prevents an error from occurring if the table
already exists and is of the given type. If a table with the
same ID but a different type exists, it is still an error.
Allowed values are:
* true
* false
The default value is 'false'.
* **is_replicated** --
Affects the `distribution scheme
<../../../../concepts/tables/#distribution>`__ for the
table's data. If *true* and the given type has no explicit
`shard key <../../../../concepts/tables/#shard-key>`__
defined, the table will be `replicated
<../../../../concepts/tables/#replication>`__. If *false*,
the table will be `sharded
<../../../../concepts/tables/#sharding>`__ according to the
shard key specified in the given *type_id*, or `randomly
sharded <../../../../concepts/tables/#random-sharding>`__, if
no shard key is specified. Note that a type containing a
shard key cannot be used to create a replicated table.
Allowed values are:
* true
* false
The default value is 'false'.
* **foreign_keys** --
Semicolon-separated list of `foreign keys
<../../../../concepts/tables/#foreign-keys>`__, of the format
'(source_column_name [, ...]) references
target_table_name(primary_key_column_name [, ...]) [as
foreign_key_name]'.
* **foreign_shard_key** --
Foreign shard key of the format 'source_column references
shard_by_column from target_table(primary_key_column)'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or column expressions by which records will be assigned to
partitions defined by *partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends on the choice of *partition_type*. See `range
partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__, `hash
partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*, a new partition will be created for values which
don't fall into an existing partition. Currently only
supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in input parameter *table_name*.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
table.
* **is_result_table** --
Indicates whether the table is a `memory-only table
<../../../../concepts/tables_memory_only/>`__. A result table
cannot contain columns with store_only or text_search
`data-handling <../../../../concepts/types/#data-handling>`__
or that are `non-charN strings
<../../../../concepts/types/#primitive-types>`__, and it will
not be retained if the server is restarted.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **avro_header_bytes** --
Optional number of bytes to skip when reading an avro record.
* **avro_num_records** --
Optional number of avro records, if data includes only
records.
* **avro_schema** --
Optional string representing avro schema, for insert records
in avro format, that does not include is schema.
* **avro_schemaless** --
When user provides 'avro_schema', avro data is assumed to be
schemaless, unless specified. Default is 'true' when given
avro_schema. Igonred when avro_schema is not given.
Allowed values are:
* true
* false
* **bad_record_table_name** --
Optional name of a table to which records that were rejected
are written. The bad-record-table has the following columns:
line_number (long), line_rejected (string), error_message
(string).
* **bad_record_table_limit** --
A positive integer indicating the maximum number of records
that can be written to the bad-record-table. Default value
is 10000
* **bad_record_table_limit_per_input** --
For subscriptions: A positive integer indicating the maximum
number of records that can be written to the bad-record-table
per file/payload. Default value will be
'bad_record_table_limit' and total size of the table per rank
is limited to 'bad_record_table_limit'
* **batch_size** --
Internal tuning parameter--number of records per batch when
inserting data.
* **column_formats** --
For each target column specified, applies the
column-property-bound format to the source data
loaded into that column. Each column format will contain a
mapping of one or more of its column
properties to an appropriate format for each property.
Currently supported column properties
include date, time, & datetime. The parameter value must be
formatted as a JSON string of maps of
column names to maps of column properties to their
corresponding column formats, e.g.,
'{ "order_date" : { "date" : "%Y.%m.%d" }, "order_time" : {
"time" : "%H:%M:%S" } }'.
See *default_column_formats* for valid format syntax.
* **columns_to_load** --
Specifies a comma-delimited list of columns from the source
data to
load. If more than one file is being loaded, this list
applies to all files.
Column numbers can be specified discretely or as a range.
For example, a value of '5,7,1..3' will
insert values from the fifth column in the source data into
the first column in the target table,
from the seventh column in the source data into the second
column in the target table, and from the
first through third columns in the source data into the third
through fifth columns in the target
table.
If the source data contains a header, column names matching
the file header names may be provided
instead of column numbers. If the target table doesn't
exist, the table will be created with the
columns in this order. If the target table does exist with
columns in a different order than the
source data, this list can be used to match the order of the
target table. For example, a value of
'C, B, A' will create a three column table with column C,
followed by column B, followed by column
A; or will insert those fields in that order into a table
created with columns in that order. If
the target table exists, the column names must match the
source data field names for a name-mapping
to be successful.
Mutually exclusive with *columns_to_skip*.
* **columns_to_skip** --
Specifies a comma-delimited list of columns from the source
data to
skip. Mutually exclusive with *columns_to_load*.
* **compression_type** --
Optional: payload compression type.
Allowed values are:
* **none** --
Uncompressed
* **auto** --
Default. Auto detect compression type
* **gzip** --
gzip file compression.
* **bzip2** --
bzip2 file compression.
The default value is 'auto'.
* **default_column_formats** --
Specifies the default format to be applied to source data
loaded
into columns with the corresponding column property.
Currently supported column properties include
date, time, & datetime. This default column-property-bound
format can be overridden by specifying a
column property & format for a given target column in
*column_formats*. For
each specified annotation, the format will apply to all
columns with that annotation unless a custom
*column_formats* for that annotation is specified.
The parameter value must be formatted as a JSON string that
is a map of column properties to their
respective column formats, e.g., '{ "date" : "%Y.%m.%d",
"time" : "%H:%M:%S" }'. Column
formats are specified as a string of control characters and
plain text. The supported control
characters are 'Y', 'm', 'd', 'H', 'M', 'S', and 's', which
follow the Linux 'strptime()'
specification, as well as 's', which specifies seconds and
fractional seconds (though the fractional
component will be truncated past milliseconds).
Formats for the 'date' annotation must include the 'Y', 'm',
and 'd' control characters. Formats for
the 'time' annotation must include the 'H', 'M', and either
'S' or 's' (but not both) control
characters. Formats for the 'datetime' annotation meet both
the 'date' and 'time' control character
requirements. For example, '{"datetime" : "%m/%d/%Y %H:%M:%S"
}' would be used to interpret text
as "05/04/2000 12:12:11"
* **error_handling** --
Specifies how errors should be handled upon insertion.
Allowed values are:
* **permissive** --
Records with missing columns are populated with nulls if
possible; otherwise, the malformed records are skipped.
* **ignore_bad_records** --
Malformed records are skipped.
* **abort** --
Stops current insertion and aborts entire operation when an
error is encountered. Primary key collisions are
considered abortable errors in this mode.
The default value is 'abort'.
* **file_type** --
Specifies the type of the file(s) whose records will be
inserted.
Allowed values are:
* **avro** --
Avro file format
* **delimited_text** --
Delimited text file format; e.g., CSV, TSV, PSV, etc.
* **gdb** --
Esri/GDB file format
* **json** --
Json file format
* **parquet** --
Apache Parquet file format
* **shapefile** --
ShapeFile file format
The default value is 'delimited_text'.
* **gdal_configuration_options** --
Comma separated list of gdal conf options, for the specific
requets: key=value. The default value is ''.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when
not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any record being inserted that is rejected
for having primary key values that match those of an existing
table record will be ignored with no
error generated. If *false*, the rejection of any
record for having primary key values matching an existing
record will result in an error being
reported, as determined by *error_handling*. If the
specified table does not
have a primary key or if upsert mode is in effect
(*update_on_existing_pk* is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore new records whose primary key values collide with
those of existing records
* **false** --
Treat as errors any new records whose primary key values
collide with those of existing records
The default value is 'false'.
* **ingestion_mode** --
Whether to do a full load, dry run, or perform a type
inference on the source data.
Allowed values are:
* **full** --
Run a type inference on the source data (if needed) and
ingest
* **dry_run** --
Does not load data, but walks through the source data and
determines the number of valid records, taking into account
the current mode of *error_handling*.
* **type_inference_only** --
Infer the type of the source data and return, without
ingesting any data. The inferred type is returned in the
response.
The default value is 'full'.
* **layer** --
Optional: geo files layer(s) name(s): comma separated. The
default value is ''.
* **loading_mode** --
Scheme for distributing the extraction and loading of data
from the source data file(s). This option applies only when
loading files that are local to the database.
Allowed values are:
* **head** --
The head node loads all data. All files must be available
to the head node.
* **distributed_shared** --
The head node coordinates loading data by worker
processes across all nodes from shared files available to
all workers.
NOTE:
Instead of existing on a shared source, the files can be
duplicated on a source local to each host
to improve performance, though the files must appear as the
same data set from the perspective of
all hosts performing the load.
* **distributed_local** --
A single worker process on each node loads all files
that are available to it. This option works best when each
worker loads files from its own file
system, to maximize performance. In order to avoid data
duplication, either each worker performing
the load needs to have visibility to a set of files unique
to it (no file is visible to more than
one node) or the target table needs to have a primary key
(which will allow the worker to
automatically deduplicate data).
NOTE:
If the target table doesn't exist, the table structure will
be determined by the head node. If the
head node has no files local to it, it will be unable to
determine the structure and the request
will fail.
If the head node is configured to have no worker processes,
no data strictly accessible to the head
node will be loaded.
The default value is 'head'.
* **local_time_offset** --
For Avro local timestamp columns
* **max_records_to_load** --
Limit the number of records to load in this request: If this
number is larger than a batch_size, then the number of
records loaded will be limited to the next whole number of
batch_size (per working thread). The default value is ''.
* **num_tasks_per_rank** --
Optional: number of tasks for reading file per rank. Default
will be external_file_reader_num_tasks
* **poll_interval** --
If *true*, the number of seconds between attempts to load
external files into the table. If zero, polling will be
continuous as long as data is found. If no data is found,
the interval will steadily increase to a maximum of 60
seconds.
* **primary_keys** --
Optional: comma separated list of column names, to set as
primary keys, when not specified in the type. The default
value is ''.
* **shard_keys** --
Optional: comma separated list of column names, to set as
primary keys, when not specified in the type. The default
value is ''.
* **skip_lines** --
Skip number of lines from begining of file.
* **subscribe** --
Continuously poll the data source to check for new data and
load it into the table.
Allowed values are:
* true
* false
The default value is 'false'.
* **table_insert_mode** --
Optional: table_insert_mode. When inserting records from
multiple files: if table_per_file then insert from each file
into a new table. Currently supported only for shapefiles.
Allowed values are:
* single
* table_per_file
The default value is 'single'.
* **text_comment_string** --
Specifies the character string that should be interpreted as
a comment line
prefix in the source data. All lines in the data starting
with the provided string are ignored.
For *delimited_text* *file_type* only. The default value is
'#'.
* **text_delimiter** --
Specifies the character delimiting field values in the source
data
and field names in the header (if present).
For *delimited_text* *file_type* only. The default value is
','.
* **text_escape_character** --
Specifies the character that is used to escape other
characters in
the source data.
An 'a', 'b', 'f', 'n', 'r', 't', or 'v' preceded by an escape
character will be interpreted as the
ASCII bell, backspace, form feed, line feed, carriage return,
horizontal tab, & vertical tab,
respectively. For example, the escape character followed by
an 'n' will be interpreted as a newline
within a field value.
The escape character can also be used to escape the quoting
character, and will be treated as an
escape character whether it is within a quoted field value or
not.
For *delimited_text* *file_type* only.
* **text_has_header** --
Indicates whether the source data contains a header row.
For *delimited_text* *file_type* only.
Allowed values are:
* true
* false
The default value is 'true'.
* **text_header_property_delimiter** --
Specifies the delimiter for
`column properties
<../../../../concepts/types/#column-properties>`__ in the
header row (if
present). Cannot be set to same value as *text_delimiter*.
For *delimited_text* *file_type* only. The default value is
'|'.
* **text_null_string** --
Specifies the character string that should be interpreted as
a null
value in the source data.
For *delimited_text* *file_type* only. The default value is
'\\N'.
* **text_quote_character** --
Specifies the character that should be interpreted as a field
value
quoting character in the source data. The character must
appear at beginning and end of field value
to take effect. Delimiters within quoted fields are treated
as literals and not delimiters. Within
a quoted field, two consecutive quote characters will be
interpreted as a single literal quote
character, effectively escaping it. To not have a quote
character, specify an empty string.
For *delimited_text* *file_type* only. The default value is
'"'.
* **text_search_columns** --
Add 'text_search' property to internally inferenced string
columns. Comma seperated list of column names or '*' for all
columns. To add text_search property only to string columns
of minimum size, set also the option
'text_search_min_column_length'
* **text_search_min_column_length** --
Set minimum column size. Used only when 'text_search_columns'
has a value.
* **truncate_strings** --
If set to *true*, truncate string values that are longer than
the column's type size.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_table** --
If set to *true*, truncates the table specified by input
parameter *table_name* prior to loading the file(s).
Allowed values are:
* true
* false
The default value is 'false'.
* **type_inference_mode** --
optimize type inference for:.
Allowed values are:
* **accuracy** --
Scans data to get exactly-typed & sized columns for all
data scanned.
* **speed** --
Scans data and picks the widest possible column types so
that 'all' values will fit with minimum data scanned
The default value is 'speed'.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into a
table
with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted will
be replaced by that new record (the new
data will be "upserted"). If set to *false*,
any existing table record with primary key values that match
those of a record being inserted will
remain unchanged, while the new record will be rejected and
the error handled as determined by
*ignore_existing_pk* & *error_handling*. If the
specified table does not have a primary key, then this option
has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_id (str)
ID of the currently registered table structure `type
<../../../../concepts/types/>`__ for the target table
type_definition (str)
A JSON string describing the columns of the target table
type_label (str)
The user-defined description associated with the target table's
structure
type_properties (dict of str to lists of str)
A mapping of each target table column name to an array of
column properties associated with that column
count_inserted (long)
Number of records inserted into the target table.
count_skipped (long)
Number of records skipped, when not running in *abort* error
handling mode.
count_updated (long)
[Not yet implemented] Number of records updated within the
target table.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "insert_records_from_payload(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( data_text, (basestring)), "insert_records_from_payload(): Argument 'data_text' must be (one) of type(s) '(basestring)'; given %s" % type( data_text ).__name__
assert isinstance( data_bytes, (basestring, bytes)), "insert_records_from_payload(): Argument 'data_bytes' must be (one) of type(s) '(basestring, bytes)'; given %s" % type( data_bytes ).__name__
assert isinstance( modify_columns, (dict)), "insert_records_from_payload(): Argument 'modify_columns' must be (one) of type(s) '(dict)'; given %s" % type( modify_columns ).__name__
assert isinstance( create_table_options, (dict)), "insert_records_from_payload(): Argument 'create_table_options' must be (one) of type(s) '(dict)'; given %s" % type( create_table_options ).__name__
assert isinstance( options, (dict)), "insert_records_from_payload(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['data_text'] = data_text
obj['data_bytes'] = data_bytes
obj['modify_columns'] = self.__sanitize_dicts( modify_columns )
obj['create_table_options'] = self.__sanitize_dicts( create_table_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/insert/records/frompayload', obj, convert_to_attr_dict = True )
return response
# end insert_records_from_payload
# begin insert_records_from_query
[docs] def insert_records_from_query( self, table_name = None, remote_query = None,
modify_columns = {}, create_table_options =
{}, options = {} ):
"""Computes remote query result and inserts the result data into a new or
existing table
Parameters:
table_name (str)
Name of the table into which the data will be inserted, in
[schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
If the table does not exist, the table will be created using
either an existing
*type_id* or the type inferred from the
remote query, and the new table name will have to meet standard
`table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
remote_query (str)
Query for which result data needs to be imported
modify_columns (dict of str to dicts of str to str)
Not implemented yet. The default value is an empty dict ( {}
).
create_table_options (dict of str to str)
Options used when creating the target table. The default value
is an empty dict ( {} ).
Allowed keys are:
* **type_id** --
ID of a currently registered `type
<../../../../concepts/types/>`__. The default value is ''.
* **no_error_if_exists** --
If *true*, prevents an error from occurring if the table
already exists and is of the given type. If a table with the
same ID but a different type exists, it is still an error.
Allowed values are:
* true
* false
The default value is 'false'.
* **is_replicated** --
Affects the `distribution scheme
<../../../../concepts/tables/#distribution>`__ for the
table's data. If *true* and the given type has no explicit
`shard key <../../../../concepts/tables/#shard-key>`__
defined, the table will be `replicated
<../../../../concepts/tables/#replication>`__. If *false*,
the table will be `sharded
<../../../../concepts/tables/#sharding>`__ according to the
shard key specified in the given *type_id*, or `randomly
sharded <../../../../concepts/tables/#random-sharding>`__, if
no shard key is specified. Note that a type containing a
shard key cannot be used to create a replicated table.
Allowed values are:
* true
* false
The default value is 'false'.
* **foreign_keys** --
Semicolon-separated list of `foreign keys
<../../../../concepts/tables/#foreign-keys>`__, of the format
'(source_column_name [, ...]) references
target_table_name(primary_key_column_name [, ...]) [as
foreign_key_name]'.
* **foreign_shard_key** --
Foreign shard key of the format 'source_column references
shard_by_column from target_table(primary_key_column)'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or column expressions by which records will be assigned to
partitions defined by *partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends on the choice of *partition_type*. See `range
partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__, `hash
partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*, a new partition will be created for values which
don't fall into an existing partition. Currently only
supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in input parameter *table_name*.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
table.
* **is_result_table** --
Indicates whether the table is a `memory-only table
<../../../../concepts/tables_memory_only/>`__. A result table
cannot contain columns with store_only or text_search
`data-handling <../../../../concepts/types/#data-handling>`__
or that are `non-charN strings
<../../../../concepts/types/#primitive-types>`__, and it will
not be retained if the server is restarted.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **bad_record_table_name** --
Optional name of a table to which records that were rejected
are written. The bad-record-table has the following columns:
line_number (long), line_rejected (string), error_message
(string). When error handling is Abort, bad records table is
not populated.
* **bad_record_table_limit** --
A positive integer indicating the maximum number of records
that can be written to the bad-record-table. Default value
is 10000
* **batch_size** --
Number of records per batch when inserting data.
* **datasource_name** --
Name of an existing external data source from which table
will be loaded
* **error_handling** --
Specifies how errors should be handled upon insertion.
Allowed values are:
* **permissive** --
Records with missing columns are populated with nulls if
possible; otherwise, the malformed records are skipped.
* **ignore_bad_records** --
Malformed records are skipped.
* **abort** --
Stops current insertion and aborts entire operation when an
error is encountered. Primary key collisions are
considered abortable errors in this mode.
The default value is 'abort'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting into a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when
not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any record being inserted that is rejected
for having primary key values that match those of an existing
table record will be ignored with no
error generated. If *false*, the rejection of any
record for having primary key values matching an existing
record will result in an error being
reported, as determined by *error_handling*. If the
specified table does not
have a primary key or if upsert mode is in effect
(*update_on_existing_pk* is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore new records whose primary key values collide with
those of existing records
* **false** --
Treat as errors any new records whose primary key values
collide with those of existing records
The default value is 'false'.
* **ingestion_mode** --
Whether to do a full load, dry run, or perform a type
inference on the source data.
Allowed values are:
* **full** --
Run a type inference on the source data (if needed) and
ingest
* **dry_run** --
Does not load data, but walks through the source data and
determines the number of valid records, taking into account
the current mode of *error_handling*.
* **type_inference_only** --
Infer the type of the source data and return, without
ingesting any data. The inferred type is returned in the
response.
The default value is 'full'.
* **jdbc_fetch_size** --
The JDBC fetch size, which determines how many rows to fetch
per round trip.
* **jdbc_session_init_statement** --
Executes the statement per each jdbc session before doing
actual load. The default value is ''.
* **num_splits_per_rank** --
Optional: number of splits for reading data per rank. Default
will be external_file_reader_num_tasks. The default value is
''.
* **num_tasks_per_rank** --
Optional: number of tasks for reading data per rank. Default
will be external_file_reader_num_tasks
* **primary_keys** --
Optional: comma separated list of column names, to set as
primary keys, when not specified in the type. The default
value is ''.
* **shard_keys** --
Optional: comma separated list of column names, to set as
primary keys, when not specified in the type. The default
value is ''.
* **subscribe** --
Continuously poll the data source to check for new data and
load it into the table.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_table** --
If set to *true*, truncates the table specified by input
parameter *table_name* prior to loading the data.
Allowed values are:
* true
* false
The default value is 'false'.
* **remote_query** --
Remote SQL query from which data will be sourced
* **remote_query_order_by** --
Name of column to be used for splitting the query into
multiple sub-queries using ordering of given column. The
default value is ''.
* **remote_query_filter_column** --
Name of column to be used for splitting the query into
multiple sub-queries using the data distribution of given
column. The default value is ''.
* **remote_query_increasing_column** --
Column on subscribed remote query result that will increase
for new records (e.g., TIMESTAMP). The default value is ''.
* **remote_query_partition_column** --
Alias name for remote_query_filter_column. The default value
is ''.
* **truncate_strings** --
If set to *true*, truncate string values that are longer than
the column's type size.
Allowed values are:
* true
* false
The default value is 'false'.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting into a
table
with a `primary key
<../../../../concepts/tables/#primary-keys>`__. If set to
*true*, any existing table record with primary
key values that match those of a record being inserted will
be replaced by that new record (the new
data will be "upserted"). If set to *false*,
any existing table record with primary key values that match
those of a record being inserted will
remain unchanged, while the new record will be rejected and
the error handled as determined by
*ignore_existing_pk* & *error_handling*. If the
specified table does not have a primary key, then this option
has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
type_id (str)
ID of the currently registered table structure `type
<../../../../concepts/types/>`__ for the target table
type_definition (str)
A JSON string describing the columns of the target table
type_label (str)
The user-defined description associated with the target table's
structure
type_properties (dict of str to lists of str)
A mapping of each target table column name to an array of
column properties associated with that column
count_inserted (long)
Number of records inserted into the target table.
count_skipped (long)
Number of records skipped, when not running in *abort* error
handling mode.
count_updated (long)
[Not yet implemented] Number of records updated within the
target table.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "insert_records_from_query(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( remote_query, (basestring)), "insert_records_from_query(): Argument 'remote_query' must be (one) of type(s) '(basestring)'; given %s" % type( remote_query ).__name__
assert isinstance( modify_columns, (dict)), "insert_records_from_query(): Argument 'modify_columns' must be (one) of type(s) '(dict)'; given %s" % type( modify_columns ).__name__
assert isinstance( create_table_options, (dict)), "insert_records_from_query(): Argument 'create_table_options' must be (one) of type(s) '(dict)'; given %s" % type( create_table_options ).__name__
assert isinstance( options, (dict)), "insert_records_from_query(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['remote_query'] = remote_query
obj['modify_columns'] = self.__sanitize_dicts( modify_columns )
obj['create_table_options'] = self.__sanitize_dicts( create_table_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/insert/records/fromquery', obj, convert_to_attr_dict = True )
return response
# end insert_records_from_query
# begin insert_records_random
[docs] def insert_records_random( self, table_name = None, count = None, options = {}
):
"""Generates a specified number of random records and adds them to the
given table.
There is an optional parameter that allows the user to customize the
ranges of
the column values. It also allows the user to specify linear profiles
for some
or all columns in which case linear values are generated rather than
random
ones. Only individual tables are supported for this operation.
This operation is synchronous, meaning that a response will not be
returned
until all random records are fully available.
Parameters:
table_name (str)
Table to which random records will be added, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table, not a view.
count (long)
Number of records to generate.
options (dict of str to dicts of str to floats)
Optional parameter to pass in specifications for the randomness
of the values. This map is different from the *options*
parameter of most other endpoints in that it is a map of string
to map of string to doubles, while most others are maps of
string to string. In this map, the top level keys represent
which column's parameters are being specified, while the
internal keys represents which parameter is being specified.
These parameters take on different meanings depending on the
type of the column. Below follows a more detailed description
of the map:. The default value is an empty dict ( {} ).
Allowed keys are:
* **seed** --
If provided, the internal random number generator will be
initialized with the given value. The minimum is 0. This
allows for the same set of random numbers to be generated
across invocation of this endpoint in case the user wants to
repeat the test. Since input parameter *options*, is a map
of maps, we need an internal map to provide the seed value.
For example, to pass 100 as the seed value through this
parameter, you need something equivalent to: 'options' =
{'seed': { 'value': 100 } }
Allowed keys are:
* **value** --
The seed value to use
* **all** --
This key indicates that the specifications relayed in the
internal map are to be applied to all columns of the records.
Allowed keys are:
* **min** --
For numerical columns, the minimum of the generated values
is set to this value. Default is -99999. For point,
shape, and track columns, min for numeric 'x' and 'y'
columns needs to be within [-180, 180] and [-90, 90],
respectively. The default minimum possible values for these
columns in such cases are -180.0 and -90.0. For the
'TIMESTAMP' column, the default minimum corresponds to Jan
1, 2010.
For string columns, the minimum length of the randomly
generated strings is set to this value (default is 0). If
both minimum and maximum are provided, minimum must be less
than or equal to max. Value needs to be within [0, 200].
If the min is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track, then
those parameters will not be set; however, an error will
not be thrown in such a case. It is the responsibility of
the user to use the *all* parameter judiciously.
* **max** --
For numerical columns, the maximum of the generated values
is set to this value. Default is 99999. For point, shape,
and track columns, max for numeric 'x' and 'y' columns
needs to be within [-180, 180] and [-90, 90], respectively.
The default minimum possible values for these columns in
such cases are 180.0 and 90.0.
For string columns, the maximum length of the randomly
generated strings is set to this value (default is 200). If
both minimum and maximum are provided, *max* must be
greater than or equal to *min*. Value needs to be within
[0, 200].
If the *max* is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track, then
those parameters will not be set; however, an error will
not be thrown in such a case. It is the responsibility of
the user to use the *all* parameter judiciously.
* **interval** --
If specified, generate values for all columns evenly spaced
with the given interval value. If a max value is specified
for a given column the data is randomly generated between
min and max and decimated down to the interval. If no max
is provided the data is linerally generated starting at the
minimum value (instead of generating random data). For
non-decimated string-type columns the interval value is
ignored. Instead the values are generated following the
pattern: 'attrname_creationIndex#', i.e. the column name
suffixed with an underscore and a running counter (starting
at 0). For string types with limited size (eg char4) the
prefix is dropped. No nulls will be generated for nullable
columns.
* **null_percentage** --
If specified, then generate the given percentage of the
count as nulls for all nullable columns. This option will
be ignored for non-nullable columns. The value must be
within the range [0, 1.0]. The default value is 5% (0.05).
* **cardinality** --
If specified, limit the randomly generated values to a
fixed set. Not allowed on a column with interval specified,
and is not applicable to WKT or Track-specific columns. The
value must be greater than 0. This option is disabled by
default.
* **attr_name** --
Use the desired column name in place of *attr_name*, and set
the following parameters for the column specified. This
overrides any parameter set by *all*.
Allowed keys are:
* **min** --
For numerical columns, the minimum of the generated values
is set to this value. Default is -99999. For point,
shape, and track columns, min for numeric 'x' and 'y'
columns needs to be within [-180, 180] and [-90, 90],
respectively. The default minimum possible values for these
columns in such cases are -180.0 and -90.0. For the
'TIMESTAMP' column, the default minimum corresponds to Jan
1, 2010.
For string columns, the minimum length of the randomly
generated strings is set to this value (default is 0). If
both minimum and maximum are provided, minimum must be less
than or equal to max. Value needs to be within [0, 200].
If the min is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track, then
those parameters will not be set; however, an error will
not be thrown in such a case. It is the responsibility of
the user to use the *all* parameter judiciously.
* **max** --
For numerical columns, the maximum of the generated values
is set to this value. Default is 99999. For point, shape,
and track columns, max for numeric 'x' and 'y' columns
needs to be within [-180, 180] and [-90, 90], respectively.
The default minimum possible values for these columns in
such cases are 180.0 and 90.0.
For string columns, the maximum length of the randomly
generated strings is set to this value (default is 200). If
both minimum and maximum are provided, *max* must be
greater than or equal to *min*. Value needs to be within
[0, 200].
If the *max* is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track, then
those parameters will not be set; however, an error will
not be thrown in such a case. It is the responsibility of
the user to use the *all* parameter judiciously.
* **interval** --
If specified, generate values for all columns evenly spaced
with the given interval value. If a max value is specified
for a given column the data is randomly generated between
min and max and decimated down to the interval. If no max
is provided the data is linerally generated starting at the
minimum value (instead of generating random data). For
non-decimated string-type columns the interval value is
ignored. Instead the values are generated following the
pattern: 'attrname_creationIndex#', i.e. the column name
suffixed with an underscore and a running counter (starting
at 0). For string types with limited size (eg char4) the
prefix is dropped. No nulls will be generated for nullable
columns.
* **null_percentage** --
If specified and if this column is nullable, then generate
the given percentage of the count as nulls. This option
will result in an error if the column is not nullable. The
value must be within the range [0, 1.0]. The default value
is 5% (0.05).
* **cardinality** --
If specified, limit the randomly generated values to a
fixed set. Not allowed on a column with interval specified,
and is not applicable to WKT or Track-specific columns. The
value must be greater than 0. This option is disabled by
default.
* **track_length** --
This key-map pair is only valid for track data sets (an error
is thrown otherwise). No nulls would be generated for
nullable columns.
Allowed keys are:
* **min** --
Minimum possible length for generated series; default is
100 records per series. Must be an integral value within
the range [1, 500]. If both min and max are specified, min
must be less than or equal to max.
* **max** --
Maximum possible length for generated series; default is
500 records per series. Must be an integral value within
the range [1, 500]. If both min and max are specified, max
must be greater than or equal to min.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
count (long)
Number of records inserted.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "insert_records_random(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( count, (int, long, float)), "insert_records_random(): Argument 'count' must be (one) of type(s) '(int, long, float)'; given %s" % type( count ).__name__
assert isinstance( options, (dict)), "insert_records_random(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['count'] = count
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/insert/records/random', obj, convert_to_attr_dict = True )
return response
# end insert_records_random
# begin insert_symbol
[docs] def insert_symbol( self, symbol_id = None, symbol_format = None, symbol_data =
None, options = {} ):
"""Adds a symbol or icon (i.e. an image) to represent data points when
data is rendered visually. Users must provide the symbol identifier
(string), a format (currently supported: 'svg' and 'svg_path'), the
data for the symbol, and any additional optional parameter (e.g.
color). To have a symbol used for rendering create a table with a
string column named 'SYMBOLCODE' (along with 'x' or 'y' for example).
Then when the table is rendered (via `WMS
<../../../../api/rest/wms_rest/>`__) if the 'dosymbology' parameter is
'true' then the value of the 'SYMBOLCODE' column is used to pick the
symbol displayed for each point.
Parameters:
symbol_id (str)
The id of the symbol being added. This is the same id that
should be in the 'SYMBOLCODE' column for objects using this
symbol
symbol_format (str)
Specifies the symbol format. Must be either 'svg' or
'svg_path'.
Allowed values are:
* svg
* svg_path
symbol_data (bytes)
The actual symbol data. If input parameter *symbol_format* is
'svg' then this should be the raw bytes representing an svg
file. If input parameter *symbol_format* is svg path then this
should be an svg path string, for example:
'M25.979,12.896,5.979,12.896,5.979,19.562,25.979,19.562z'
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **color** --
If input parameter *symbol_format* is 'svg' this is ignored.
If input parameter *symbol_format* is 'svg_path' then this
option specifies the color (in RRGGBB hex format) of the
path. For example, to have the path rendered in red, used
'FF0000'. If 'color' is not provided then '00FF00' (i.e.
green) is used by default.
Returns:
A dict with the following entries--
symbol_id (str)
Value of input parameter *symbol_id*.
info (dict of str to str)
Additional information.
"""
assert isinstance( symbol_id, (basestring)), "insert_symbol(): Argument 'symbol_id' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_id ).__name__
assert isinstance( symbol_format, (basestring)), "insert_symbol(): Argument 'symbol_format' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_format ).__name__
assert isinstance( symbol_data, (basestring, bytes)), "insert_symbol(): Argument 'symbol_data' must be (one) of type(s) '(basestring, bytes)'; given %s" % type( symbol_data ).__name__
assert isinstance( options, (dict)), "insert_symbol(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['symbol_id'] = symbol_id
obj['symbol_format'] = symbol_format
obj['symbol_data'] = symbol_data
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/insert/symbol', obj, convert_to_attr_dict = True )
return response
# end insert_symbol
# begin kill_proc
[docs] def kill_proc( self, run_id = '', options = {} ):
"""Kills a running proc instance.
Parameters:
run_id (str)
The run ID of a running proc instance. If a proc with a
matching run ID is not found or the proc instance has already
completed, no procs will be killed. If not specified, all
running proc instances will be killed. The default value is
''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **run_tag** --
If input parameter *run_id* is specified, kill the proc
instance that has a matching run ID and a matching run tag
that was provided to :meth:`GPUdb.execute_proc`. If input
parameter *run_id* is not specified, kill the proc
instance(s) where a matching run tag was provided to
:meth:`GPUdb.execute_proc`. The default value is ''.
* **clear_execute_at_startup** --
If *true*, kill and remove the instance of the proc matching
the auto-start run ID that was created to run when the
database is started. The auto-start run ID was returned from
:meth:`GPUdb.execute_proc` and can be retrieved using
:meth:`GPUdb.show_proc`.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
run_ids (list of str)
List of run IDs of proc instances that were killed.
info (dict of str to str)
Additional information.
"""
assert isinstance( run_id, (basestring)), "kill_proc(): Argument 'run_id' must be (one) of type(s) '(basestring)'; given %s" % type( run_id ).__name__
assert isinstance( options, (dict)), "kill_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['run_id'] = run_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/kill/proc', obj, convert_to_attr_dict = True )
return response
# end kill_proc
# begin list_graph
def list_graph( self, graph_name = '', options = {} ):
assert isinstance( graph_name, (basestring)), "list_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( options, (dict)), "list_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/list/graph', obj, convert_to_attr_dict = True )
return response
# end list_graph
# begin lock_table
[docs] def lock_table( self, table_name = None, lock_type = 'status', options = {} ):
"""Manages global access to a table's data. By default a table has a
input parameter *lock_type* of *read_write*, indicating all operations
are permitted. A user may request a *read_only* or a *write_only*
lock, after which only read or write operations, respectively, are
permitted on the table until the lock is removed. When input parameter
*lock_type* is *no_access* then no operations are permitted on the
table. The lock status can be queried by setting input parameter
*lock_type* to *status*.
Parameters:
table_name (str)
Name of the table to be locked, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. It
must be a currently existing table or view.
lock_type (str)
The type of lock being applied to the table. Setting it to
*status* will return the current lock status of the table
without changing it.
Allowed values are:
* **status** --
Show locked status
* **no_access** --
Allow no read/write operations
* **read_only** --
Allow only read operations
* **write_only** --
Allow only write operations
* **read_write** --
Allow all read/write operations
The default value is 'status'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
lock_type (str)
Returns the lock state of the table.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "lock_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( lock_type, (basestring)), "lock_table(): Argument 'lock_type' must be (one) of type(s) '(basestring)'; given %s" % type( lock_type ).__name__
assert isinstance( options, (dict)), "lock_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['lock_type'] = lock_type
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/lock/table', obj, convert_to_attr_dict = True )
return response
# end lock_table
# begin match_graph
[docs] def match_graph( self, graph_name = None, sample_points = None, solve_method =
'markov_chain', solution_table = '', options = {} ):
"""Matches a directed route implied by a given set of
latitude/longitude points to an existing underlying road network graph
using a
given solution type.
IMPORTANT: It's highly recommended that you review the
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
concepts documentation, the
`Graph REST Tutorial <../../../../guides/graph_rest_guide/>`__,
and/or some
`/match/graph examples <../../../../guide-tags/graph---match/>`__
before using this endpoint.
Parameters:
graph_name (str)
Name of the underlying geospatial graph resource to match to
using input parameter *sample_points*.
sample_points (list of str)
Sample points used to match to an underlying geospatial
graph. Sample points must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#match-identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#match-combinations>`__.
Identifiers can be used with: existing column names, e.g.,
'table.column AS SAMPLE_X'; expressions, e.g.,
'ST_MAKEPOINT(table.x, table.y) AS SAMPLE_WKTPOINT'; or
constant values, e.g.,
'{1, 2, 10} AS SAMPLE_TRIPID'. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
solve_method (str)
The type of solver to use for graph matching.
Allowed values are:
* **markov_chain** --
Matches input parameter *sample_points* to the graph using
the Hidden Markov Model (HMM)-based method, which conducts a
range-tree closest-edge search to find the best combinations
of possible road segments (*num_segments*) for each sample
point to create the best route. The route is secured one
point at a time while looking ahead *chain_width* number of
points, so the prediction is corrected after each point. This
solution type is the most accurate but also the most
computationally intensive. Related options: *num_segments*
and *chain_width*.
* **match_od_pairs** --
Matches input parameter *sample_points* to find the most
probable path between origin and destination pairs with cost
constraints.
* **match_supply_demand** --
Matches input parameter *sample_points* to optimize
scheduling multiple supplies (trucks) with varying sizes to
varying demand sites with varying capacities per depot.
Related options: *partial_loading* and *max_combinations*.
* **match_batch_solves** --
Matches input parameter *sample_points* source and
destination pairs for the shortest path solves in batch mode.
* **match_loops** --
Matches closed loops (Eulerian paths) originating and ending
at each graph node within min and max hops (levels).
* **match_charging_stations** --
Matches an optimal path across a number of ev-charging
stations between source and target locations.
* **match_similarity** --
Matches the intersection set(s) by computing the Jaccard
similarity score between node pairs.
* **match_pickup_dropoff** --
Matches the pickups and dropoffs by optimizing the total trip
costs
* **match_clusters** --
Matches the graph nodes with a cluster index using Louvain
clustering algorithm
* **match_pattern** --
Matches a pattern in the graph
The default value is 'markov_chain'.
solution_table (str)
The name of the table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. This
table contains a `track
<../../../../location_intelligence/geo_objects/#geospatial-tracks>`__
of geospatial points for the matched portion of the graph, a
track ID, and a score value. Also outputs a details table
containing a trip ID (that matches the track ID), the
latitude/longitude pair, the timestamp the point was recorded
at, and an edge ID corresponding to the matched road segment.
Must not be an existing table of the same name. The default
value is ''.
options (dict of str to str)
Additional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **gps_noise** --
GPS noise value (in meters) to remove redundant sample
points. Use -1 to disable noise reduction. The default value
accounts for 95% of point variation (+ or -5 meters). The
default value is '5.0'.
* **num_segments** --
Maximum number of potentially matching road segments for each
sample point. For the *markov_chain* solver, the default is
3. The default value is '3'.
* **search_radius** --
Maximum search radius used when snapping sample points onto
potentially matching surrounding segments. The default value
corresponds to approximately 100 meters. The default value
is '0.001'.
* **chain_width** --
For the *markov_chain* solver only. Length of the sample
points lookahead window within the Markov kernel; the larger
the number, the more accurate the solution. The default
value is '9'.
* **source** --
Optional WKT starting point from input parameter
*sample_points* for the solver. The default behavior for the
endpoint is to use time to determine the starting point. The
default value is 'POINT NULL'.
* **destination** --
Optional WKT ending point from input parameter
*sample_points* for the solver. The default behavior for the
endpoint is to use time to determine the destination point.
The default value is 'POINT NULL'.
* **partial_loading** --
For the *match_supply_demand* solver only. When false
(non-default), trucks do not off-load at the demand (store)
side if the remainder is less than the store's need.
Allowed values are:
* **true** --
Partial off-loading at multiple store (demand) locations
* **false** --
No partial off-loading allowed if supply is less than the
store's demand.
The default value is 'true'.
* **max_combinations** --
For the *match_supply_demand* solver only. This is the cutoff
for the number of generated combinations for sequencing the
demand locations - can increase this up to 2M. The default
value is '10000'.
* **max_supply_combinations** --
For the *match_supply_demand* solver only. This is the cutoff
for the number of generated combinations for sequencing the
supply locations if/when 'permute_supplies' is true. The
default value is '10000'.
* **left_turn_penalty** --
This will add an additonal weight over the edges labelled as
'left turn' if the 'add_turn' option parameter of the
:meth:`GPUdb.create_graph` was invoked at graph creation.
The default value is '0.0'.
* **right_turn_penalty** --
This will add an additonal weight over the edges labelled as'
right turn' if the 'add_turn' option parameter of the
:meth:`GPUdb.create_graph` was invoked at graph creation.
The default value is '0.0'.
* **intersection_penalty** --
This will add an additonal weight over the edges labelled as
'intersection' if the 'add_turn' option parameter of the
:meth:`GPUdb.create_graph` was invoked at graph creation.
The default value is '0.0'.
* **sharp_turn_penalty** --
This will add an additonal weight over the edges labelled as
'sharp turn' or 'u-turn' if the 'add_turn' option parameter
of the :meth:`GPUdb.create_graph` was invoked at graph
creation. The default value is '0.0'.
* **aggregated_output** --
For the *match_supply_demand* solver only. When it is true
(default), each record in the output table shows a particular
truck's scheduled cumulative round trip path
(MULTILINESTRING) and the corresponding aggregated cost.
Otherwise, each record shows a single scheduled truck route
(LINESTRING) towards a particular demand location (store id)
with its corresponding cost. The default value is 'true'.
* **output_tracks** --
For the *match_supply_demand* solver only. When it is true
(non-default), the output will be in tracks format for all
the round trips of each truck in which the timestamps are
populated directly from the edge weights starting from their
originating depots. The default value is 'false'.
* **max_trip_cost** --
For the *match_supply_demand* and *match_pickup_dropoff*
solvers only. If this constraint is greater than zero
(default) then the trucks/rides will skip travelling from one
demand/pick location to another if the cost between them is
greater than this number (distance or time). Zero (default)
value means no check is performed. The default value is
'0.0'.
* **filter_folding_paths** --
For the *markov_chain* solver only. When true (non-default),
the paths per sequence combination is checked for folding
over patterns and can significantly increase the execution
time depending on the chain width and the number of gps
samples.
Allowed values are:
* **true** --
Filter out the folded paths.
* **false** --
Do not filter out the folded paths
The default value is 'false'.
* **unit_unloading_cost** --
For the *match_supply_demand* solver only. The unit cost per
load amount to be delivered. If this value is greater than
zero (default) then the additional cost of this unit load
multiplied by the total dropped load will be added over to
the trip cost to the demand location. The default value is
'0.0'.
* **max_num_threads** --
For the *markov_chain* solver only. If specified (greater
than zero), the maximum number of threads will not be greater
than the specified value. It can be lower due to the memory
and the number cores available. Default value of zero allows
the algorithm to set the maximal number of threads within
these constraints. The default value is '0'.
* **service_limit** --
For the *match_supply_demand* solver only. If specified
(greater than zero), any supply actor's total service cost
(distance or time) will be limited by the specified value
including multiple rounds (if set). The default value is
'0.0'.
* **enable_reuse** --
For the *match_supply_demand* solver only. If specified
(true), all supply actors can be scheduled for second rounds
from their originating depots.
Allowed values are:
* **true** --
Allows reusing supply actors (trucks, e.g.) for scheduling
again.
* **false** --
Supply actors are scheduled only once from their depots.
The default value is 'false'.
* **max_stops** --
For the *match_supply_demand* solver only. If specified
(greater than zero), a supply actor (truck) can at most have
this many stops (demand locations) in one round trip.
Otherwise, it is unlimited. If 'enable_truck_reuse' is on,
this condition will be applied separately at each round trip
use of the same truck. The default value is '0'.
* **service_radius** --
For the *match_supply_demand* and *match_pickup_dropoff*
solvers only. If specified (greater than zero), it filters
the demands/picks outside this radius centered around the
supply actor/ride's originating location (distance or time).
The default value is '0.0'.
* **permute_supplies** --
For the *match_supply_demand* solver only. If specified
(true), supply side actors are permuted for the demand
combinations during msdo optimization - note that this option
increases optimization time significantly - use of
'max_combinations' option is recommended to prevent
prohibitively long runs.
Allowed values are:
* **true** --
Generates sequences over supply side permutations if total
supply is less than twice the total demand
* **false** --
Permutations are not performed, rather a specific order of
supplies based on capacity is computed
The default value is 'true'.
* **batch_tsm_mode** --
For the *match_supply_demand* solver only. When enabled, it
sets the number of visits on each demand location by a single
salesman at each trip is considered to be (one) 1, otherwise
there is no bound.
Allowed values are:
* **true** --
Sets only one visit per demand location by a salesman (tsm
mode)
* **false** --
No preset limit (usual msdo mode)
The default value is 'false'.
* **round_trip** --
For the *match_supply_demand* solver only. When enabled, the
supply will have to return back to the origination location.
Allowed values are:
* **true** --
The optimization is done for trips in round trip manner
always returning to originating locations
* **false** --
Supplies do not have to come back to their originating
locations in their routes. The routes are considered
finished at the final dropoff.
The default value is 'true'.
* **num_cycles** --
For the *match_clusters* solver only. Terminates the cluster
exchange iterations across 2-step-cycles (outer loop) when
quality does not improve during iterations. The default
value is '10'.
* **num_loops_per_cycle** --
For the *match_clusters* solver only. Terminates the cluster
exchanges within the first step iterations of a cycle (inner
loop) unless convergence is reached. The default value is
'10'.
* **num_output_clusters** --
For the *match_clusters* solver only. Limits the output to
the top 'num_output_clusters' clusters based on density.
Default value of zero outputs all clusters. The default
value is '0'.
* **max_num_clusters** --
For the *match_clusters* solver only. If set (value greater
than zero), it terminates when the number of clusters goes
below than this number. The default value is '0'.
* **cluster_quality_metric** --
For the *match_clusters* solver only. The quality metric for
Louvain modularity optimization solver.
Allowed values are:
* **girvan** --
Uses the Newman Girvan quality metric for cluster solver
* **spectral** --
Applies recursive spectral bisection (RSB) partitioning
solver
The default value is 'girvan'.
* **restricted_type** --
For the *match_supply_demand* solver only. Optimization is
performed by restricting routes labeled by
'MSDO_ODDEVEN_RESTRICTED' only for this supply actor (truck)
type.
Allowed values are:
* **odd** --
Applies odd/even rule restrictions to odd tagged vehicles.
* **even** --
Applies odd/even rule restrictions to even tagged vehicles.
* **none** --
Does not apply odd/even rule restrictions to any vehicles.
The default value is 'none'.
* **server_id** --
Indicates which graph server(s) to send the request to.
Default is to send to the server, amongst those containing
the corresponding graph, that has the most computational
bandwidth. The default value is ''.
* **inverse_solve** --
For the *match_batch_solves* solver only. Solves
source-destination pairs using inverse shortest path solver.
Allowed values are:
* **true** --
Solves using inverse shortest path solver.
* **false** --
Solves using direct shortest path solver.
The default value is 'false'.
* **min_loop_level** --
For the *match_loops* solver only. Finds closed loops around
each node deducible not less than this minimal hop (level)
deep. The default value is '0'.
* **max_loop_level** --
For the *match_loops* solver only. Finds closed loops around
each node deducible not more than this maximal hop (level)
deep. The default value is '5'.
* **search_limit** --
For the *match_loops* solver only. Searches within this limit
of nodes per vertex to detect loops. The value zero means
there is no limit. The default value is '10000'.
* **output_batch_size** --
For the *match_loops* solver only. Uses this value as the
batch size of the number of loops in flushing(inserting) to
the output table. The default value is '1000'.
* **charging_capacity** --
For the *match_charging_stations* solver only. This is the
maximum ev-charging capacity of a vehicle (distance in meters
or time in seconds depending on the unit of the graph
weights). The default value is '300000.0'.
* **charging_candidates** --
For the *match_charging_stations* solver only. Solver
searches for this many number of stations closest around each
base charging location found by capacity. The default value
is '10'.
* **charging_penalty** --
For the *match_charging_stations* solver only. This is the
penalty for full charging. The default value is '30000.0'.
* **max_hops** --
For the *match_similarity* solver only. Searches within this
maximum hops for source and target node pairs to compute the
Jaccard scores. The default value is '3'.
* **traversal_node_limit** --
For the *match_similarity* solver only. Limits the traversal
depth if it reaches this many number of nodes. The default
value is '1000'.
* **paired_similarity** --
For the *match_similarity* solver only. If true, it computes
Jaccard score between each pair, otherwise it will compute
Jaccard from the intersection set between the source and
target nodes.
Allowed values are:
* true
* false
The default value is 'true'.
* **force_undirected** --
For the *match_pattern* solver only. Pattern matching will be
using both pattern and graph as undirected if set to true.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful solution.
match_score (float)
The mean square error calculation representing the map matching
score. Values closer to zero are better.
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "match_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
sample_points = sample_points if isinstance( sample_points, list ) else ( [] if (sample_points is None) else [ sample_points ] )
assert isinstance( solve_method, (basestring)), "match_graph(): Argument 'solve_method' must be (one) of type(s) '(basestring)'; given %s" % type( solve_method ).__name__
assert isinstance( solution_table, (basestring)), "match_graph(): Argument 'solution_table' must be (one) of type(s) '(basestring)'; given %s" % type( solution_table ).__name__
assert isinstance( options, (dict)), "match_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['sample_points'] = sample_points
obj['solve_method'] = solve_method
obj['solution_table'] = solution_table
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/match/graph', obj, convert_to_attr_dict = True )
return response
# end match_graph
# begin merge_records
[docs] def merge_records( self, table_name = None, source_table_names = None,
field_maps = None, options = {} ):
"""Create a new empty result table (specified by input parameter
*table_name*),
and insert all records from source tables
(specified by input parameter *source_table_names*) based on the field
mapping
information (specified by input parameter *field_maps*).
For merge records details and examples, see
`Merge Records <../../../../concepts/merge_records/>`__. For
limitations, see
`Merge Records Limitations and Cautions
<../../../../concepts/merge_records/#limitations-and-cautions>`__.
The field map (specified by input parameter *field_maps*) holds the
user-specified maps
of target table column names to source table columns. The array of
input parameter *field_maps* must match one-to-one with the input
parameter *source_table_names*,
e.g., there's a map present in input parameter *field_maps* for each
table listed in
input parameter *source_table_names*.
Parameters:
table_name (str)
The name of the new result table for the records to be merged
into, in [schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
NOT be an existing table.
source_table_names (list of str)
The list of names of source tables to get the records from,
each in [schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be existing table names. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
field_maps (list of dicts of str to str)
Contains a list of source/target column mappings, one mapping
for each source table listed in input parameter
*source_table_names* being merged into the target table
specified by input parameter *table_name*. Each mapping
contains the target column names (as keys) that the data in the
mapped source columns or column `expressions
<../../../../concepts/expressions/>`__ (as values) will be
merged into. All of the source columns being merged into a
given target column must match in type, as that type will
determine the type of the new target column. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*table_name*. If *persist* is *false*, then this is always
allowed even if the caller does not have permission to create
tables. The generated name is returned in
*qualified_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
merged table as part of input parameter *table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created merged
table specified by input parameter *table_name*.
* **is_replicated** --
Indicates the `distribution scheme
<../../../../concepts/tables/#distribution>`__ for the data
of the merged table specified in input parameter
*table_name*. If true, the table will be `replicated
<../../../../concepts/tables/#replication>`__. If false, the
table will be `randomly sharded
<../../../../concepts/tables/#random-sharding>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the merged
table specified in input parameter *table_name*.
* **persist** --
If *true*, then the table specified in input parameter
*table_name* will be persisted and will not expire unless a
*ttl* is specified. If *false*, then the table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'true'.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
merged table specified in input parameter *table_name*.
* **view_id** --
view this result table is part of. The default value is ''.
Returns:
A dict with the following entries--
table_name (str)
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_table_name** --
The fully qualified name of the result table (i.e. including
the schema)
"""
assert isinstance( table_name, (basestring)), "merge_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
source_table_names = source_table_names if isinstance( source_table_names, list ) else ( [] if (source_table_names is None) else [ source_table_names ] )
field_maps = field_maps if isinstance( field_maps, list ) else ( [] if (field_maps is None) else [ field_maps ] )
assert isinstance( options, (dict)), "merge_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['source_table_names'] = source_table_names
obj['field_maps'] = field_maps
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/merge/records', obj, convert_to_attr_dict = True )
return response
# end merge_records
# begin modify_graph
[docs] def modify_graph( self, graph_name = None, nodes = None, edges = None, weights =
None, restrictions = None, options = {} ):
"""Update an existing graph network using given nodes, edges, weights,
restrictions, and options.
IMPORTANT: It's highly recommended that you review the
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
concepts documentation, and
`Graph REST Tutorial <../../../../guides/graph_rest_guide/>`__
before using this endpoint.
Parameters:
graph_name (str)
Name of the graph resource to modify.
nodes (list of str)
Nodes with which to update existing input parameter *nodes* in
graph specified by input parameter *graph_name*. Review `Nodes
<../../../../graph_solver/network_graph_solver/#nodes>`__ for
more information. Nodes must be specified using `identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS NODE_ID', expressions, e.g.,
'ST_MAKEPOINT(column1, column2) AS NODE_WKTPOINT', or raw
values, e.g., '{9, 10, 11} AS NODE_ID'. If using raw values in
an identifier combination, the number of values specified must
match across the combination. Identifier combination(s) do not
have to match the method used to create the graph, e.g., if
column names were specified to create the graph, expressions or
raw values could also be used to modify the graph. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
edges (list of str)
Edges with which to update existing input parameter *edges* in
graph specified by input parameter *graph_name*. Review `Edges
<../../../../graph_solver/network_graph_solver/#edges>`__ for
more information. Edges must be specified using `identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS EDGE_ID', expressions, e.g., 'SUBSTR(column,
1, 6) AS EDGE_NODE1_NAME', or raw values, e.g., "{'family',
'coworker'} AS EDGE_LABEL". If using raw values in an
identifier combination, the number of values specified must
match across the combination. Identifier combination(s) do not
have to match the method used to create the graph, e.g., if
column names were specified to create the graph, expressions or
raw values could also be used to modify the graph. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
weights (list of str)
Weights with which to update existing input parameter *weights*
in graph specified by input parameter *graph_name*. Review
`Weights
<../../../../graph_solver/network_graph_solver/#graph-weights>`__
for more information. Weights must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS WEIGHTS_EDGE_ID', expressions, e.g.,
'ST_LENGTH(wkt) AS WEIGHTS_VALUESPECIFIED', or raw values,
e.g., '{4, 15} AS WEIGHTS_VALUESPECIFIED'. If using raw values
in an identifier combination, the number of values specified
must match across the combination. Identifier combination(s) do
not have to match the method used to create the graph, e.g., if
column names were specified to create the graph, expressions or
raw values could also be used to modify the graph. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
restrictions (list of str)
Restrictions with which to update existing input parameter
*restrictions* in graph specified by input parameter
*graph_name*. Review `Restrictions
<../../../../graph_solver/network_graph_solver/#graph-restrictions>`__
for more information. Restrictions must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS RESTRICTIONS_EDGE_ID', expressions, e.g.,
'column/2 AS RESTRICTIONS_VALUECOMPARED', or raw values, e.g.,
'{0, 0, 0, 1} AS RESTRICTIONS_ONOFFCOMPARED'. If using raw
values in an identifier combination, the number of values
specified must match across the combination. Identifier
combination(s) do not have to match the method used to create
the graph, e.g., if column names were specified to create the
graph, expressions or raw values could also be used to modify
the graph. The user can provide a single element (which will
be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **restriction_threshold_value** --
Value-based restriction comparison. Any node or edge with a
RESTRICTIONS_VALUECOMPARED value greater than the
*restriction_threshold_value* will not be included in the
graph.
* **export_create_results** --
If set to *true*, returns the graph topology in the response
as arrays.
Allowed values are:
* true
* false
The default value is 'false'.
* **enable_graph_draw** --
If set to *true*, adds a 'EDGE_WKTLINE' column identifier to
the specified *graph_table* so the graph can be viewed via
WMS; for social and non-geospatial graphs, the 'EDGE_WKTLINE'
column identifier will be populated with spatial coordinates
derived from a flattening layout algorithm so the graph can
still be viewed.
Allowed values are:
* true
* false
The default value is 'false'.
* **save_persist** --
If set to *true*, the graph will be saved in the persist
directory (see the `config reference <../../../../config/>`__
for more information). If set to *false*, the graph will be
removed when the graph server is shutdown.
Allowed values are:
* true
* false
The default value is 'false'.
* **add_table_monitor** --
Adds a table monitor to every table used in the creation of
the graph; this table monitor will trigger the graph to
update dynamically upon inserts to the source table(s). Note
that upon database restart, if *save_persist* is also set to
*true*, the graph will be fully reconstructed and the table
monitors will be reattached. For more details on table
monitors, see :meth:`GPUdb.create_table_monitor`.
Allowed values are:
* true
* false
The default value is 'false'.
* **graph_table** --
If specified, the created graph is also created as a table
with the given name, in [schema_name.]table_name format,
using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
This table will have the following identifier columns:
'EDGE_ID', 'EDGE_NODE1_ID', 'EDGE_NODE2_ID'. If left blank,
no table is created. The default value is ''.
* **remove_label_only** --
When RESTRICTIONS on labeled entities requested, if set to
true this will NOT delete the entity but only the label
associated with the entity. Otherwise (default), it'll delete
the label AND the entity.
Allowed values are:
* true
* false
The default value is 'false'.
* **add_turns** --
Adds dummy 'pillowed' edges around intersection nodes where
there are more than three edges so that additional weight
penalties can be imposed by the solve endpoints. (increases
the total number of edges).
Allowed values are:
* true
* false
The default value is 'false'.
* **turn_angle** --
Value in degrees modifies the thresholds for attributing
right, left, sharp turns, and intersections. It is the
vertical deviation angle from the incoming edge to the
intersection node. The larger the value, the larger the
threshold for sharp turns and intersections; the smaller the
value, the larger the threshold for right and left turns; 0 <
turn_angle < 90. The default value is '60'.
* **use_rtree** --
Use an range tree structure to accelerate and improve the
accuracy of snapping, especially to edges.
Allowed values are:
* true
* false
The default value is 'true'.
* **label_delimiter** --
If provided the label string will be split according to this
delimiter and each sub-string will be applied as a separate
label onto the specified edge. The default value is ''.
* **allow_multiple_edges** --
Multigraph choice; allowing multiple edges with the same node
pairs if set to true, otherwise, new edges with existing same
node pairs will not be inserted.
Allowed values are:
* true
* false
The default value is 'true'.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful modification on all servers.
num_nodes (long)
Total number of nodes in the graph.
num_edges (long)
Total number of edges in the graph.
edges_ids (list of longs)
Edges given as pairs of node indices. Only populated if
*export_create_results* is set to *true*.
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "modify_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
nodes = nodes if isinstance( nodes, list ) else ( [] if (nodes is None) else [ nodes ] )
edges = edges if isinstance( edges, list ) else ( [] if (edges is None) else [ edges ] )
weights = weights if isinstance( weights, list ) else ( [] if (weights is None) else [ weights ] )
restrictions = restrictions if isinstance( restrictions, list ) else ( [] if (restrictions is None) else [ restrictions ] )
assert isinstance( options, (dict)), "modify_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['nodes'] = nodes
obj['edges'] = edges
obj['weights'] = weights
obj['restrictions'] = restrictions
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/modify/graph', obj, convert_to_attr_dict = True )
return response
# end modify_graph
# begin query_graph
[docs] def query_graph( self, graph_name = None, queries = None, restrictions = [],
adjacency_table = '', rings = 1, options = {} ):
"""Employs a topological query on a network graph generated a-priori by
:meth:`GPUdb.create_graph` and returns a list of adjacent edge(s) or
node(s),
also known as an adjacency list, depending on what's been provided to
the
endpoint; providing edges will return nodes and providing nodes will
return
edges.
To determine the node(s) or edge(s) adjacent to a value from a given
column,
provide a list of values to input parameter *queries*. This field can
be populated with
column values from any table as long as the type is supported by the
given
identifier. See
`Query Identifiers
<../../../../graph_solver/network_graph_solver/#query-identifiers>`__
for more information.
To return the adjacency list in the response, leave input parameter
*adjacency_table*
empty.
IMPORTANT: It's highly recommended that you review the
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
concepts documentation, the
`Graph REST Tutorial <../../../../guides/graph_rest_guide/>`__,
and/or some
`/match/graph examples <../../../../guide-tags/graph---query>`__
before using this endpoint.
Parameters:
graph_name (str)
Name of the graph resource to query.
queries (list of str)
Nodes or edges to be queried specified using `query identifiers
<../../../../graph_solver/network_graph_solver/#query-identifiers>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS QUERY_NODE_ID', raw values, e.g., '{0, 2} AS
QUERY_NODE_ID', or expressions, e.g., 'ST_MAKEPOINT(table.x,
table.y) AS QUERY_NODE_WKTPOINT'. Multiple values can be
provided as long as the same identifier is used for all values.
If using raw values in an identifier combination, the number of
values specified must match across the combination. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
restrictions (list of str)
Additional restrictions to apply to the nodes/edges of an
existing graph. Restrictions must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS RESTRICTIONS_EDGE_ID', expressions, e.g.,
'column/2 AS RESTRICTIONS_VALUECOMPARED', or raw values, e.g.,
'{0, 0, 0, 1} AS RESTRICTIONS_ONOFFCOMPARED'. If using raw
values in an identifier combination, the number of values
specified must match across the combination. The default value
is an empty list ( [] ). The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
adjacency_table (str)
Name of the table to store the resulting adjacencies, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
left blank, the query results are instead returned in the
response. If the 'QUERY_TARGET_NODE_LABEL' `query identifier
<../../../../graph_solver/network_graph_solver/#query-identifiers>`__
is used in input parameter *queries*, then two additional
columns will be available: 'PATH_ID' and 'RING_ID'. See `Using
Labels
<../../../../graph_solver/network_graph_solver/#using-labels>`__
for more information. The default value is ''.
rings (int)
Sets the number of rings around the node to query for
adjacency, with '1' being the edges directly attached to the
queried node. Also known as number of hops. For example, if it
is set to '2', the edge(s) directly attached to the queried
node(s) will be returned; in addition, the edge(s) attached to
the node(s) attached to the initial ring of edge(s) surrounding
the queried node(s) will be returned. If the value is set to
'0', any nodes that meet the criteria in input parameter
*queries* and input parameter *restrictions* will be returned.
This parameter is only applicable when querying nodes. The
default value is 1.
options (dict of str to str)
Additional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **force_undirected** --
If set to *true*, all inbound edges and outbound edges
relative to the node will be returned. If set to *false*,
only outbound edges relative to the node will be returned.
This parameter is only applicable if the queried graph input
parameter *graph_name* is directed and when querying nodes.
Consult `Directed Graphs
<../../../../graph_solver/network_graph_solver/#directed-graphs>`__
for more details.
Allowed values are:
* true
* false
The default value is 'false'.
* **limit** --
When specified (>0), limits the number of query results. The
size of the nodes table will be limited by the *limit* value.
The default value is '0'.
* **output_wkt_path** --
If true then concatenated wkt line segments will be added as
the WKT column of the adjacency table.
Allowed values are:
* true
* false
The default value is 'false'.
* **and_labels** --
If set to *true*, the result of the query has entities that
satisfy all of the target labels, instead of any.
Allowed values are:
* true
* false
The default value is 'false'.
* **server_id** --
Indicates which graph server(s) to send the request to.
Default is to send to the server, amongst those containing
the corresponding graph, that has the most computational
bandwidth.
* **output_charn_length** --
When specified (>0 and <=256), limits the number of char
length on the output tables for string based nodes. The
default length is 64. The default value is '64'.
* **find_common_labels** --
If set to true, for many-to-many queries or multi-level
traversals, it lists the common labels between the source and
target nodes and edge labels in each path. Otherwise (zero
rings), it'll list all labels of the node(s) queried.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful query.
adjacency_list_int_array (list of longs)
The adjacency entity integer ID: either edge IDs per node
requested (if using QUERY_EDGE_ID or QUERY_NODE1_ID and
QUERY_NODE2_ID in the input) or two node IDs per edge requested
(if using QUERY_NODE_ID in the input).
adjacency_list_string_array (list of str)
The adjacency entity string ID: either edge IDs per node
requested (if using QUERY_EDGE_NAME or QUERY_NODE1_NAME and
QUERY_NODE2_NAME in the input) or two node IDs per edge
requested (if using QUERY_NODE_NAME in the input).
adjacency_list_wkt_array (list of str)
The adjacency entity WKTPOINT or WKTLINE ID: either edge IDs
per node requested (if using QUERY_EDGE_WKTLINE or
QUERY_NODE1_WKTPOINT and QUERY_NODE2_WKTPOINT in the input) or
two node IDs per edge requested (if using QUERY_NODE_WKTPOINT
in the input).
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "query_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
queries = queries if isinstance( queries, list ) else ( [] if (queries is None) else [ queries ] )
restrictions = restrictions if isinstance( restrictions, list ) else ( [] if (restrictions is None) else [ restrictions ] )
assert isinstance( adjacency_table, (basestring)), "query_graph(): Argument 'adjacency_table' must be (one) of type(s) '(basestring)'; given %s" % type( adjacency_table ).__name__
assert isinstance( rings, (int, long, float)), "query_graph(): Argument 'rings' must be (one) of type(s) '(int, long, float)'; given %s" % type( rings ).__name__
assert isinstance( options, (dict)), "query_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['queries'] = queries
obj['restrictions'] = restrictions
obj['adjacency_table'] = adjacency_table
obj['rings'] = rings
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/query/graph', obj, convert_to_attr_dict = True )
return response
# end query_graph
# begin repartition_graph
[docs] def repartition_graph( self, graph_name = None, options = {} ):
"""Rebalances an existing partitioned graph.
IMPORTANT: It's highly recommended that you review the
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
concepts documentation, the
`Graph REST Tutorial <../../../../guides/graph_rest_guide/>`__,
and/or some `graph examples <../../../../guide-tags/graph/>`__ before
using this endpoint.
Parameters:
graph_name (str)
Name of the graph resource to rebalance.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **new_graph_name** --
If a non-empty value is specified, the original graph will be
kept
(non-default behaviour) and a new balanced graph will be
created under this given name. When the
value is empty (default), the generated 'balanced' graph will
replace the original 'unbalanced'
graph under the same graph name. The default value is ''.
* **source_node** --
The distributed shortest path solve is run from this source
node to
all the nodes in the graph to create balaced partitions using
the iso-distance levels of the
solution. The source node is selected by the rebalance
algorithm automatically (default case when
the value is an empty string). Otherwise, the user specified
node is used as the source. The default value is ''.
* **sql_request_avro_json** --
The default value is ''.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful rebalancing on all servers.
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "repartition_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( options, (dict)), "repartition_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/repartition/graph', obj, convert_to_attr_dict = True )
return response
# end repartition_graph
# begin reserve_resource
def reserve_resource( self, component = None, name = None, action = None,
bytes_requested = 0, owner_id = 0, options = {} ):
assert isinstance( component, (basestring)), "reserve_resource(): Argument 'component' must be (one) of type(s) '(basestring)'; given %s" % type( component ).__name__
assert isinstance( name, (basestring)), "reserve_resource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( action, (basestring)), "reserve_resource(): Argument 'action' must be (one) of type(s) '(basestring)'; given %s" % type( action ).__name__
assert isinstance( bytes_requested, (int, long, float)), "reserve_resource(): Argument 'bytes_requested' must be (one) of type(s) '(int, long, float)'; given %s" % type( bytes_requested ).__name__
assert isinstance( owner_id, (int, long, float)), "reserve_resource(): Argument 'owner_id' must be (one) of type(s) '(int, long, float)'; given %s" % type( owner_id ).__name__
assert isinstance( options, (dict)), "reserve_resource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['component'] = component
obj['name'] = name
obj['action'] = action
obj['bytes_requested'] = bytes_requested
obj['owner_id'] = owner_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/reserve/resource', obj, convert_to_attr_dict = True )
return response
# end reserve_resource
# begin revoke_permission
[docs] def revoke_permission( self, principal = '', object = None, object_type = None,
permission = None, options = {} ):
"""Revoke user or role the specified permission on the specified object.
Parameters:
principal (str)
Name of the user or role for which the permission is being
revoked. Must be an existing user or role. The default value
is ''.
object (str)
Name of object permission is being revoked from. It is
recommended to use a fully-qualified name when possible.
object_type (str)
The type of object being revoked
Allowed values are:
* **context** --
Context
* **credential** --
Credential
* **datasink** --
Data Sink
* **datasource** --
Data Source
* **directory** --
KIFS File Directory
* **graph** --
A Graph object
* **proc** --
UDF Procedure
* **schema** --
Schema
* **sql_proc** --
SQL Procedure
* **system** --
System-level access
* **table** --
Database Table
* **table_monitor** --
Table monitor
permission (str)
Permission being revoked.
Allowed values are:
* **admin** --
Full read/write and administrative access on the object.
* **connect** --
Connect access on the given data source or data sink.
* **delete** --
Delete rows from tables.
* **execute** --
Ability to Execute the Procedure object.
* **insert** --
Insert access to tables.
* **read** --
Ability to read, list and use the object.
* **update** --
Update access to the table.
* **user_admin** --
Access to administer users and roles that do not have
system_admin permission.
* **write** --
Access to write, change and delete objects.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **columns** --
Revoke table security from these columns, comma-separated.
The default value is ''.
Returns:
A dict with the following entries--
principal (str)
Value of input parameter *principal*.
object (str)
Value of input parameter *object*.
object_type (str)
Value of input parameter *object_type*.
permission (str)
Value of input parameter *permission*.
info (dict of str to str)
Additional information.
"""
assert isinstance( principal, (basestring)), "revoke_permission(): Argument 'principal' must be (one) of type(s) '(basestring)'; given %s" % type( principal ).__name__
assert isinstance( object, (basestring)), "revoke_permission(): Argument 'object' must be (one) of type(s) '(basestring)'; given %s" % type( object ).__name__
assert isinstance( object_type, (basestring)), "revoke_permission(): Argument 'object_type' must be (one) of type(s) '(basestring)'; given %s" % type( object_type ).__name__
assert isinstance( permission, (basestring)), "revoke_permission(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( options, (dict)), "revoke_permission(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['principal'] = principal
obj['object'] = object
obj['object_type'] = object_type
obj['permission'] = permission
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission', obj, convert_to_attr_dict = True )
return response
# end revoke_permission
# begin revoke_permission_credential
[docs] def revoke_permission_credential( self, name = None, permission = None,
credential_name = None, options = {} ):
"""Revokes a `credential-level permission
<../../../../security/sec_concepts/#security-concepts-permissions-credential>`__
from a user or role.
Parameters:
name (str)
Name of the user or role from which the permission will be
revoked. Must be an existing user or role.
permission (str)
Permission to revoke from the user or role.
Allowed values are:
* **credential_admin** --
Full read/write and administrative access on the credential.
* **credential_read** --
Ability to read and use the credential.
credential_name (str)
Name of the credential on which the permission will be revoked.
Must be an existing credential, or an empty string to revoke
access on all credentials.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
credential_name (str)
Value of input parameter *credential_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "revoke_permission_credential(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "revoke_permission_credential(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( credential_name, (basestring)), "revoke_permission_credential(): Argument 'credential_name' must be (one) of type(s) '(basestring)'; given %s" % type( credential_name ).__name__
assert isinstance( options, (dict)), "revoke_permission_credential(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['credential_name'] = credential_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission/credential', obj, convert_to_attr_dict = True )
return response
# end revoke_permission_credential
# begin revoke_permission_datasource
[docs] def revoke_permission_datasource( self, name = None, permission = None,
datasource_name = None, options = {} ):
"""Revokes a `data source <../../../../concepts/data_sources/>`__
permission from a user or role.
Parameters:
name (str)
Name of the user or role from which the permission will be
revoked. Must be an existing user or role.
permission (str)
Permission to revoke from the user or role
Allowed values are:
* **admin** --
Admin access on the given data source
* **connect** --
Connect access on the given data source
datasource_name (str)
Name of the data source on which the permission will be
revoked. Must be an existing data source, or an empty string to
revoke permission from all data sources.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
datasource_name (str)
Value of input parameter *datasource_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "revoke_permission_datasource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "revoke_permission_datasource(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( datasource_name, (basestring)), "revoke_permission_datasource(): Argument 'datasource_name' must be (one) of type(s) '(basestring)'; given %s" % type( datasource_name ).__name__
assert isinstance( options, (dict)), "revoke_permission_datasource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['datasource_name'] = datasource_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission/datasource', obj, convert_to_attr_dict = True )
return response
# end revoke_permission_datasource
# begin revoke_permission_directory
[docs] def revoke_permission_directory( self, name = None, permission = None,
directory_name = None, options = {} ):
"""Revokes a `KiFS <../../../../tools/kifs/>`__ directory-level permission
from a user or role.
Parameters:
name (str)
Name of the user or role from which the permission will be
revoked. Must be an existing user or role.
permission (str)
Permission to revoke from the user or role.
Allowed values are:
* **directory_read** --
For files in the directory, access to list files, download
files, or use files in server side functions
* **directory_write** --
Access to upload files to, or delete files from, the
directory. A user or role with write access automatically has
read acceess
directory_name (str)
Name of the KiFS directory to which the permission revokes
access
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
directory_name (str)
Value of input parameter *directory_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "revoke_permission_directory(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "revoke_permission_directory(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( directory_name, (basestring)), "revoke_permission_directory(): Argument 'directory_name' must be (one) of type(s) '(basestring)'; given %s" % type( directory_name ).__name__
assert isinstance( options, (dict)), "revoke_permission_directory(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['directory_name'] = directory_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission/directory', obj, convert_to_attr_dict = True )
return response
# end revoke_permission_directory
# begin revoke_permission_proc
[docs] def revoke_permission_proc( self, name = None, permission = None, proc_name =
None, options = {} ):
"""Revokes a proc-level permission from a user or role.
Parameters:
name (str)
Name of the user or role from which the permission will be
revoked. Must be an existing user or role.
permission (str)
Permission to revoke from the user or role.
Allowed values are:
* **proc_admin** --
Admin access to the proc.
* **proc_execute** --
Execute access to the proc.
proc_name (str)
Name of the proc to which the permission grants access. Must be
an existing proc, or an empty string if the permission grants
access to all procs.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
proc_name (str)
Value of input parameter *proc_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "revoke_permission_proc(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "revoke_permission_proc(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( proc_name, (basestring)), "revoke_permission_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( options, (dict)), "revoke_permission_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['proc_name'] = proc_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission/proc', obj, convert_to_attr_dict = True )
return response
# end revoke_permission_proc
# begin revoke_permission_system
[docs] def revoke_permission_system( self, name = None, permission = None, options = {}
):
"""Revokes a system-level permission from a user or role.
Parameters:
name (str)
Name of the user or role from which the permission will be
revoked. Must be an existing user or role.
permission (str)
Permission to revoke from the user or role.
Allowed values are:
* **system_admin** --
Full access to all data and system functions.
* **system_user_admin** --
Access to administer users and roles that do not have
system_admin permission.
* **system_write** --
Read and write access to all tables.
* **system_read** --
Read-only access to all tables.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "revoke_permission_system(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "revoke_permission_system(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( options, (dict)), "revoke_permission_system(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission/system', obj, convert_to_attr_dict = True )
return response
# end revoke_permission_system
# begin revoke_permission_table
[docs] def revoke_permission_table( self, name = None, permission = None, table_name =
None, options = {} ):
"""Revokes a table-level permission from a user or role.
Parameters:
name (str)
Name of the user or role from which the permission will be
revoked. Must be an existing user or role.
permission (str)
Permission to revoke from the user or role.
Allowed values are:
* **table_admin** --
Full read/write and administrative access to the table.
* **table_insert** --
Insert access to the table.
* **table_update** --
Update access to the table.
* **table_delete** --
Delete access to the table.
* **table_read** --
Read access to the table.
table_name (str)
Name of the table to which the permission grants access, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table, view or schema.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **columns** --
Apply security to these columns, comma-separated. The
default value is ''.
Returns:
A dict with the following entries--
name (str)
Value of input parameter *name*.
permission (str)
Value of input parameter *permission*.
table_name (str)
Value of input parameter *table_name*.
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "revoke_permission_table(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( permission, (basestring)), "revoke_permission_table(): Argument 'permission' must be (one) of type(s) '(basestring)'; given %s" % type( permission ).__name__
assert isinstance( table_name, (basestring)), "revoke_permission_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( options, (dict)), "revoke_permission_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['permission'] = permission
obj['table_name'] = table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/permission/table', obj, convert_to_attr_dict = True )
return response
# end revoke_permission_table
# begin revoke_role
[docs] def revoke_role( self, role = None, member = None, options = {} ):
"""Revokes membership in a role from a user or role.
Parameters:
role (str)
Name of the role in which membership will be revoked. Must be
an existing role.
member (str)
Name of the user or role that will be revoked membership in
input parameter *role*. Must be an existing user or role.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
role (str)
Value of input parameter *role*.
member (str)
Value of input parameter *member*.
info (dict of str to str)
Additional information.
"""
assert isinstance( role, (basestring)), "revoke_role(): Argument 'role' must be (one) of type(s) '(basestring)'; given %s" % type( role ).__name__
assert isinstance( member, (basestring)), "revoke_role(): Argument 'member' must be (one) of type(s) '(basestring)'; given %s" % type( member ).__name__
assert isinstance( options, (dict)), "revoke_role(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['role'] = role
obj['member'] = member
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/revoke/role', obj, convert_to_attr_dict = True )
return response
# end revoke_role
# begin show_container_registry
def show_container_registry( self, registry_name = None, options = {} ):
assert isinstance( registry_name, (basestring)), "show_container_registry(): Argument 'registry_name' must be (one) of type(s) '(basestring)'; given %s" % type( registry_name ).__name__
assert isinstance( options, (dict)), "show_container_registry(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['registry_name'] = registry_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/container/registry', obj, convert_to_attr_dict = True )
return response
# end show_container_registry
# begin show_credential
[docs] def show_credential( self, credential_name = None, options = {} ):
"""Shows information about a specified `credential
<../../../../concepts/credentials/>`__ or all credentials.
Parameters:
credential_name (str)
Name of the credential on which to retrieve information. The
name must refer to a currently existing credential. If '*' is
specified, information about all credentials will be returned.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
credential_names (list of str)
A list of all credential names.
credential_types (list of str)
A list of each credential's type.
credential_identities (list of str)
A list of each credential's identity.
credentials (list of str)
A list of each credential's create_credential_request JSON
encoded structure.
additional_info (list of dicts of str to str)
Additional information about the respective credential in
output parameter *credential_names*.
info (dict of str to str)
Additional information.
"""
assert isinstance( credential_name, (basestring)), "show_credential(): Argument 'credential_name' must be (one) of type(s) '(basestring)'; given %s" % type( credential_name ).__name__
assert isinstance( options, (dict)), "show_credential(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['credential_name'] = credential_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/credential', obj, convert_to_attr_dict = True )
return response
# end show_credential
# begin show_datasink
[docs] def show_datasink( self, name = None, options = {} ):
"""Shows information about a specified `data sink
<../../../../concepts/data_sinks/>`__ or all data sinks.
Parameters:
name (str)
Name of the data sink for which to retrieve information. The
name must refer to a currently existing data sink. If '*' is
specified, information about all data sinks will be returned.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
datasink_names (list of str)
The data sink names.
destination_types (list of str)
The destination type of the data sinks named in output
parameter *datasink_names*.
additional_info (list of dicts of str to str)
Additional information about the respective data sinks in
output parameter *datasink_names*.
Allowed values are:
* @INNER_STRUCTURE
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "show_datasink(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "show_datasink(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/datasink', obj, convert_to_attr_dict = True )
return response
# end show_datasink
# begin show_datasource
[docs] def show_datasource( self, name = None, options = {} ):
"""Shows information about a specified `data source
<../../../../concepts/data_sources/>`__ or all data sources.
Parameters:
name (str)
Name of the data source for which to retrieve information. The
name must refer to a currently existing data source. If '*' is
specified, information about all data sources will be returned.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
datasource_names (list of str)
The data source names.
storage_provider_types (list of str)
The storage provider type of the data sources named in output
parameter *datasource_names*.
Allowed values are:
* **hdfs** --
Apache Hadoop Distributed File System
* **s3** --
Amazon S3 bucket
additional_info (list of dicts of str to str)
Additional information about the respective data sources in
output parameter *datasource_names*.
Allowed values are:
* @INNER_STRUCTURE
info (dict of str to str)
Additional information.
"""
assert isinstance( name, (basestring)), "show_datasource(): Argument 'name' must be (one) of type(s) '(basestring)'; given %s" % type( name ).__name__
assert isinstance( options, (dict)), "show_datasource(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['name'] = name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/datasource', obj, convert_to_attr_dict = True )
return response
# end show_datasource
# begin show_directories
[docs] def show_directories( self, directory_name = '', options = {} ):
"""Shows information about directories in `KiFS
<../../../../tools/kifs/>`__. Can be used to show a single directory,
or all directories.
Parameters:
directory_name (str)
The KiFS directory name to show. If empty, shows all
directories. The default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
directories (list of str)
KiFS directory names
users (list of str)
User that created each directory for the respective directories
in output parameter *directories*
creation_times (list of longs)
The creation time for each directory in milliseconds since
epoch, for the respective directories in output parameter
*directories*
data_usages (list of longs)
The data usage each directory in bytes, for the respective
directories in output parameter *directories*
data_limits (list of longs)
The data limit for each directory in bytes, for the respective
directories in output parameter *directories*
permissions (list of str)
Highest level of permission the calling user has for the
respective directories in output parameter *directories*.
Will be empty if no permissions. If a user has been granted
both read and write permissions, 'directory_write' will be
listed.
info (dict of str to str)
Additional information.
"""
assert isinstance( directory_name, (basestring)), "show_directories(): Argument 'directory_name' must be (one) of type(s) '(basestring)'; given %s" % type( directory_name ).__name__
assert isinstance( options, (dict)), "show_directories(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['directory_name'] = directory_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/directories', obj, convert_to_attr_dict = True )
return response
# end show_directories
# begin show_environment
[docs] def show_environment( self, environment_name = '', options = {} ):
"""Shows information about a specified `user-defined function
<../../../../concepts/udf/>`__ (UDF) environment or all environments.
Returns detailed information about existing environments.
Parameters:
environment_name (str)
Name of the environment on which to retrieve information. The
name must refer to a currently existing environment. If '*' or
an empty value is specified, information about all environments
will be returned. The default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true* and if the environment specified in input parameter
*environment_name* does not exist, no error is returned. If
*false* and if the environment specified in input parameter
*environment_name* does not exist, then an error is returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
environment_names (list of str)
A list of all credential names.
packages (list of lists of str)
Information about the installed packages in the respective
environments in output parameter *environment_names*.
info (dict of str to str)
Additional information.
"""
assert isinstance( environment_name, (basestring)), "show_environment(): Argument 'environment_name' must be (one) of type(s) '(basestring)'; given %s" % type( environment_name ).__name__
assert isinstance( options, (dict)), "show_environment(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['environment_name'] = environment_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/environment', obj, convert_to_attr_dict = True )
return response
# end show_environment
# begin show_files
[docs] def show_files( self, paths = None, options = {} ):
"""Shows information about files in `KiFS <../../../../tools/kifs/>`__.
Can be used for individual files, or to show all files in a given
directory.
Parameters:
paths (list of str)
File paths to show. Each path can be a KiFS directory name, or
a full path to a KiFS file. File paths may contain wildcard
characters after the KiFS directory delimeter.
Accepted wildcard characters are asterisk (*) to represent any
string of zero or more characters, and question mark (?) to
indicate a single character. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
file_names (list of str)
A listing of files in the paths specified
sizes (list of longs)
Size of each file, in bytes
users (list of str)
User that created the file
creation_times (list of longs)
Creation time for each file, in milliseconds since epoch
info (dict of str to str)
Additional information.
Allowed keys are:
* **multipart_uploads** --
JSON-encoded information about multipart uploads in progress
"""
paths = paths if isinstance( paths, list ) else ( [] if (paths is None) else [ paths ] )
assert isinstance( options, (dict)), "show_files(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['paths'] = paths
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/files', obj, convert_to_attr_dict = True )
return response
# end show_files
# begin show_functions
def show_functions( self, names = None, options = {} ):
names = names if isinstance( names, list ) else ( [] if (names is None) else [ names ] )
assert isinstance( options, (dict)), "show_functions(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['names'] = names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/functions', obj, convert_to_attr_dict = True )
return response
# end show_functions
# begin show_graph
[docs] def show_graph( self, graph_name = '', options = {} ):
"""Shows information and characteristics of graphs that exist on the graph
server.
Parameters:
graph_name (str)
Name of the graph on which to retrieve information. If left as
the default value, information about all graphs is returned.
The default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **show_original_request** --
If set to *true*, the request that was originally used to
create the graph is also returned as JSON.
Allowed values are:
* true
* false
The default value is 'true'.
* **server_id** --
Indicates which graph server(s) to send the request to.
Default is to send to get information about all the servers.
Returns:
A dict with the following entries--
result (bool)
Indicates a success. This call will fail if the graph specified
in the request does not exist.
load (list of ints)
A percentage approximating the current computational load on
the server.
memory (list of longs)
Available memory.
graph_names (list of str)
Name(s) of the graph(s).
graph_server_ids (list of ints)
Id(s) of the graph(s).
graph_owner_user_names (list of str)
Owner of the graph(s) and associated solution table(s).
graph_owner_resource_groups (list of str)
Owner of the resource groups(s) of the graph(s).
directed (list of bools)
Whether or not the edges of the graph have directions
(bi-directional edges can still exist in directed graphs).
Consult `Directed Graphs
<../../../../graph_solver/network_graph_solver/#directed-graphs>`__
for more details.
num_nodes (list of longs)
Total number of nodes in the graph.
num_edges (list of longs)
Total number of edges in the graph.
num_bytes (list of longs)
Memory this graph uses in bytes.
resource_capacity (list of longs)
Memory this graph uses in bytes.
is_persisted (list of bools)
Shows whether or not the graph is persisted (saved and loaded
on launch).
is_partitioned (list of bools)
Indicated if the graph data is distributed across all available
servers.
is_sync_db (list of bools)
Shows whether or not the graph is linked to the original tables
that created it, and will potentially be re-created instead
loaded from persist on launch.
has_insert_table_monitor (list of bools)
Shows whether or not the graph has an insert table monitor
attached to it.
original_request (list of str)
The original client request used to create the graph (before
any expression evaluation or separator processing).
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "show_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( options, (dict)), "show_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/graph', obj, convert_to_attr_dict = True )
return response
# end show_graph
# begin show_graph_grammar
def show_graph_grammar( self, options = {} ):
assert isinstance( options, (dict)), "show_graph_grammar(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/graph/grammar', obj, convert_to_attr_dict = True )
return response
# end show_graph_grammar
# begin show_model
def show_model( self, model_names = {}, options = {} ):
model_names = model_names if isinstance( model_names, list ) else ( [] if (model_names is None) else [ model_names ] )
assert isinstance( options, (dict)), "show_model(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['model_names'] = model_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/model', obj, convert_to_attr_dict = True )
return response
# end show_model
# begin show_proc
[docs] def show_proc( self, proc_name = '', options = {} ):
"""Shows information about a proc.
Parameters:
proc_name (str)
Name of the proc to show information about. If specified, must
be the name of a currently existing proc. If not specified,
information about all procs will be returned. The default
value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **include_files** --
If set to *true*, the files that make up the proc will be
returned. If set to *false*, the files will not be returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
proc_names (list of str)
The proc names.
execution_modes (list of str)
The execution modes of the procs named in output parameter
*proc_names*.
Allowed values are:
* @INNER_STRUCTURE
files (list of dicts of str to str)
Maps of the files that make up the procs named in output
parameter *proc_names*.
commands (list of str)
The commands (excluding arguments) that will be invoked when
the procs named in output parameter *proc_names* are executed.
args (list of lists of str)
Arrays of command-line arguments that will be passed to the
procs named in output parameter *proc_names* when executed.
options (list of dicts of str to str)
The optional parameters for the procs named in output parameter
*proc_names*.
info (dict of str to str)
Additional information.
"""
assert isinstance( proc_name, (basestring)), "show_proc(): Argument 'proc_name' must be (one) of type(s) '(basestring)'; given %s" % type( proc_name ).__name__
assert isinstance( options, (dict)), "show_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['proc_name'] = proc_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/proc', obj, convert_to_attr_dict = True )
return response
# end show_proc
# begin show_proc_status
[docs] def show_proc_status( self, run_id = '', options = {} ):
"""Shows the statuses of running or completed proc instances. Results are
grouped by run ID (as returned from :meth:`GPUdb.execute_proc`) and
data segment ID (each invocation of the proc command on a data segment
is assigned a data segment ID).
Parameters:
run_id (str)
The run ID of a specific proc instance for which the status
will be returned. If a proc with a matching run ID is not
found, the response will be empty. If not specified, the
statuses of all executed proc instances will be returned. The
default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **clear_complete** --
If set to *true*, if a proc instance has completed (either
successfully or unsuccessfully) then its status will be
cleared and no longer returned in subsequent calls.
Allowed values are:
* true
* false
The default value is 'false'.
* **run_tag** --
If input parameter *run_id* is specified, return the status
for a proc instance that has a matching run ID and a matching
run tag that was provided to :meth:`GPUdb.execute_proc`. If
input parameter *run_id* is not specified, return statuses
for all proc instances where a matching run tag was provided
to :meth:`GPUdb.execute_proc`. The default value is ''.
Returns:
A dict with the following entries--
proc_names (dict of str to str)
The proc names corresponding to the returned run IDs.
params (dict of str to dicts of str to str)
The string params passed to :meth:`GPUdb.execute_proc` for the
returned run IDs.
bin_params (dict of str to dicts of str to str)
The binary params passed to :meth:`GPUdb.execute_proc` for the
returned run IDs.
input_table_names (dict of str to lists of str)
The input table names passed to :meth:`GPUdb.execute_proc` for
the returned run IDs.
input_column_names (dict of str to dicts of str to lists of str)
The input column names passed to :meth:`GPUdb.execute_proc` for
the returned run IDs, supplemented with the column names for
input tables not included in the input column name map.
output_table_names (dict of str to lists of str)
The output table names passed to :meth:`GPUdb.execute_proc` for
the returned run IDs.
options (dict of str to dicts of str to str)
The optional parameters passed to :meth:`GPUdb.execute_proc`
for the returned run IDs.
overall_statuses (dict of str to str)
Overall statuses for the returned run IDs. Note that these are
rollups and individual statuses may differ between data
segments for the same run ID; see output parameter *statuses*
and output parameter *messages* for statuses from individual
data segments.
Allowed values are:
* **running** --
The proc instance is currently running.
* **complete** --
The proc instance completed with no errors.
* **killed** --
The proc instance was killed before completion.
* **error** --
The proc instance failed with an error.
* **none** --
The proc instance does not have a status, i.e. it has not yet
ran.
statuses (dict of str to dicts of str to str)
Statuses for the returned run IDs, grouped by data segment ID.
messages (dict of str to dicts of str to str)
Messages containing additional status information for the
returned run IDs, grouped by data segment ID.
results (dict of str to dicts of str to dicts of str to str)
String results for the returned run IDs, grouped by data
segment ID.
bin_results (dict of str to dicts of str to dicts of str to str)
Binary results for the returned run IDs, grouped by data
segment ID.
output (dict of str to dicts of str to dicts of str to lists of str)
Output lines for the returned run IDs, grouped by data segment
ID.
timings (dict of str to dicts of str to dicts of str to longs)
Timing information for the returned run IDs, grouped by data
segment ID.
info (dict of str to str)
Additional information.
"""
assert isinstance( run_id, (basestring)), "show_proc_status(): Argument 'run_id' must be (one) of type(s) '(basestring)'; given %s" % type( run_id ).__name__
assert isinstance( options, (dict)), "show_proc_status(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['run_id'] = run_id
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/proc/status', obj, convert_to_attr_dict = True )
return response
# end show_proc_status
# begin show_resource_objects
[docs] def show_resource_objects( self, options = {} ):
"""Returns information about the internal sub-components (tiered objects)
which use resources of the system. The request can either return
results from
actively used objects (default) or it can be used to query the status
of the
objects of a given list of tables.
Returns detailed information about the requested resource objects.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **tiers** --
Comma-separated list of tiers to query, leave blank for all
tiers.
* **expression** --
An expression to filter the returned objects. Expression is
limited to the following operators:
=,!=,<,<=,>,>=,+,-,*,AND,OR,LIKE. For details see
`Expressions <../../../../concepts/expressions/>`__. To use a
more complex expression, query the
ki_catalog.ki_tiered_objects table directly.
* **order_by** --
Single column to be sorted by as well as the sort direction,
e.g., 'size asc'.
Allowed values are:
* size
* id
* priority
* tier
* evictable
* owner_resource_group
* **limit** --
An integer indicating the maximum number of results to be
returned, per rank, or (-1) to indicate that the maximum
number of results allowed by the server
should be returned. The number of records returned will
never exceed the server's own limit,
defined by the `max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server
configuration. The default value is '100'.
* **table_names** --
Comma-separated list of tables to restrict the results to.
Use '*' to show all tables.
Returns:
A dict with the following entries--
rank_objects (dict of str to str)
Tier usage across ranks. Layout is:
response.rank_usage[rank_number][resource_group_name] =
group_usage (as stringified json)
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "show_resource_objects(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/resource/objects', obj, convert_to_attr_dict = True )
return response
# end show_resource_objects
# begin show_resource_statistics
[docs] def show_resource_statistics( self, options = {} ):
"""Requests various statistics for storage/memory tiers and resource
groups.
Returns statistics on a per-rank basis.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
statistics_map (dict of str to str)
Map of resource statistics
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "show_resource_statistics(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/resource/statistics', obj, convert_to_attr_dict = True )
return response
# end show_resource_statistics
# begin show_resource_groups
[docs] def show_resource_groups( self, names = None, options = {} ):
"""Requests resource group properties.
Returns detailed information about the requested resource groups.
Parameters:
names (list of str)
List of names of groups to be shown. A single entry with an
empty string returns all groups. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **show_default_values** --
If *true* include values of fields that are based on the
default resource group.
Allowed values are:
* true
* false
The default value is 'true'.
* **show_default_group** --
If *true* include the default and system resource groups in
the response. This value defaults to false if an explicit
list of group names is provided, and true otherwise.
Allowed values are:
* true
* false
The default value is 'true'.
* **show_tier_usage** --
If *true* include the resource group usage on the worker
ranks in the response.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
groups (list of dicts of str to str)
Map of resource group information.
rank_usage (dict of str to str)
Tier usage across ranks. Layout is:
response.rank_usage[rank_number][resource_group_name] =
group_usage (as stringified json)
info (dict of str to str)
Additional information.
"""
names = names if isinstance( names, list ) else ( [] if (names is None) else [ names ] )
assert isinstance( options, (dict)), "show_resource_groups(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['names'] = names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/resourcegroups', obj, convert_to_attr_dict = True )
return response
# end show_resource_groups
# begin show_schema
[docs] def show_schema( self, schema_name = None, options = {} ):
"""Retrieves information about a `schema
<../../../../concepts/schemas/>`__ (or all schemas), as specified in
input parameter *schema_name*.
Parameters:
schema_name (str)
Name of the schema for which to retrieve the information. If
blank, then info for all schemas is returned.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *false* will return an error if the provided input
parameter *schema_name* does not exist. If *true* then it
will return an empty result if the provided input parameter
*schema_name* does not exist.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
schema_name (str)
Value of input parameter *schema_name*.
schema_names (list of str)
A list of all schema names for which information is returned
schema_tables (list of lists of str)
An array of arrays containing a list of tables in each of the
respective output parameter *schema_names*.
additional_info (list of dicts of str to str)
Additional information about the respective tables in output
parameter *schema_names*.
info (dict of str to str)
Additional information.
"""
assert isinstance( schema_name, (basestring)), "show_schema(): Argument 'schema_name' must be (one) of type(s) '(basestring)'; given %s" % type( schema_name ).__name__
assert isinstance( options, (dict)), "show_schema(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['schema_name'] = schema_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/schema', obj, convert_to_attr_dict = True )
return response
# end show_schema
# begin show_security
[docs] def show_security( self, names = None, options = {} ):
"""Shows security information relating to users and/or roles. If the
caller is not a system administrator, only information relating to the
caller and their roles is returned.
Parameters:
names (list of str)
A list of names of users and/or roles about which security
information is requested. If none are provided, information
about all users and roles will be returned. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **show_current_user** --
If *true*, returns only security information for the current
user.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
types (dict of str to str)
Map of user/role name to the type of that user/role.
Allowed values are:
* **internal_user** --
A user whose credentials are managed by the database system.
* **external_user** --
A user whose credentials are managed by an external LDAP.
* **role** --
A role.
roles (dict of str to lists of str)
Map of user/role name to a list of names of roles of which that
user/role is a member.
permissions (dict of str to lists of dicts of str to str)
Map of user/role name to a list of permissions directly granted
to that user/role.
resource_groups (dict of str to str)
Map of user name to resource group name.
info (dict of str to str)
Additional information.
"""
names = names if isinstance( names, list ) else ( [] if (names is None) else [ names ] )
assert isinstance( options, (dict)), "show_security(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['names'] = names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/security', obj, convert_to_attr_dict = True )
return response
# end show_security
# begin show_sql_proc
[docs] def show_sql_proc( self, procedure_name = '', options = {} ):
"""Shows information about SQL procedures, including the full definition
of each requested procedure.
Parameters:
procedure_name (str)
Name of the procedure for which to retrieve the information. If
blank, then information about all procedures is returned. The
default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true*, no error will be returned if the requested
procedure does not exist. If *false*, an error will be
returned if the requested procedure does not exist.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
procedure_names (list of str)
A list of the names of the requested procedures.
procedure_definitions (list of str)
A list of the definitions for the requested procedures.
additional_info (list of dicts of str to str)
Additional information about the respective tables in the
requested procedures.
Allowed values are:
* @INNER_STRUCTURE
info (dict of str to str)
Additional information.
"""
assert isinstance( procedure_name, (basestring)), "show_sql_proc(): Argument 'procedure_name' must be (one) of type(s) '(basestring)'; given %s" % type( procedure_name ).__name__
assert isinstance( options, (dict)), "show_sql_proc(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['procedure_name'] = procedure_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/sql/proc', obj, convert_to_attr_dict = True )
return response
# end show_sql_proc
# begin show_statistics
[docs] def show_statistics( self, table_names = None, options = {} ):
"""Retrieves the collected column statistics for the specified table(s).
Parameters:
table_names (list of str)
Names of tables whose metadata will be fetched, each in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. All
provided tables must exist, or an error is returned. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
table_names (list of str)
Value of input parameter *table_names*.
stastistics_map (list of lists of dicts of str to str)
A list of maps which contain the column statistics of the table
input parameter *table_names*.
info (dict of str to str)
Additional information.
"""
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
assert isinstance( options, (dict)), "show_statistics(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_names'] = table_names
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/statistics', obj, convert_to_attr_dict = True )
return response
# end show_statistics
# begin show_system_properties
[docs] def show_system_properties( self, options = {} ):
"""Returns server configuration and version related information to the
caller. The admin tool uses it to present server related information to
the user.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **properties** --
A list of comma separated names of properties requested. If
not specified, all properties will be returned.
Returns:
A dict with the following entries--
property_map (dict of str to str)
A map of server configuration parameters and version
information.
Allowed keys are:
* **conf.enable_worker_http_servers** --
Boolean value indicating whether the system is configured for
multi-head ingestion.
Allowed values are:
* **TRUE** --
Indicates that the system is configured for multi-head
ingestion.
* **FALSE** --
Indicates that the system is NOT configured for multi-head
ingestion.
* **conf.worker_http_server_ips** --
Semicolon (';') separated string of IP addresses of all the
ingestion-enabled worker heads of the system.
* **conf.worker_http_server_ports** --
Semicolon (';') separated string of the port numbers of all
the ingestion-enabled worker ranks of the system.
* **conf.hm_http_port** --
The host manager port number (an integer value).
* **conf.enable_ha** --
Flag indicating whether high availability (HA) is set up (a
boolean value).
* **conf.ha_ring_head_nodes** --
A comma-separated string of high availability (HA) ring node
URLs. If HA is not set up, then an empty string.
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "show_system_properties(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/system/properties', obj, convert_to_attr_dict = True )
return response
# end show_system_properties
# begin show_system_status
[docs] def show_system_status( self, options = {} ):
"""Provides server configuration and health related status to the caller.
The admin tool uses it to present server related information to the
user.
Parameters:
options (dict of str to str)
Optional parameters, currently unused. The default value is an
empty dict ( {} ).
Returns:
A dict with the following entries--
status_map (dict of str to str)
A map of server configuration and health related status.
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "show_system_status(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/system/status', obj, convert_to_attr_dict = True )
return response
# end show_system_status
# begin show_system_timing
[docs] def show_system_timing( self, options = {} ):
"""Returns the last 100 database requests along with the request timing
and internal job id. The admin tool uses it to present request timing
information to the user.
Parameters:
options (dict of str to str)
Optional parameters, currently unused. The default value is an
empty dict ( {} ).
Returns:
A dict with the following entries--
endpoints (list of str)
List of recently called endpoints, most recent first.
time_in_ms (list of floats)
List of time (in ms) of the recent requests.
jobIds (list of str)
List of the internal job ids for the recent requests.
info (dict of str to str)
Additional information.
"""
assert isinstance( options, (dict)), "show_system_timing(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/system/timing', obj, convert_to_attr_dict = True )
return response
# end show_system_timing
# begin show_table
[docs] def show_table( self, table_name = None, options = {} ):
"""Retrieves detailed information about a table, view, or schema,
specified in input parameter *table_name*. If the supplied input
parameter *table_name* is a
schema the call can return information about either the schema itself
or the
tables and views it contains. If input parameter *table_name* is empty,
information about
all schemas will be returned.
If the option *get_sizes* is set to
*true*, then the number of records
in each table is returned (in output parameter *sizes* and
output parameter *full_sizes*), along with the total number of objects
across all
requested tables (in output parameter *total_size* and output parameter
*total_full_size*).
For a schema, setting the *show_children* option to
*false* returns only information
about the schema itself; setting *show_children* to
*true* returns a list of tables and
views contained in the schema, along with their corresponding detail.
To retrieve a list of every table, view, and schema in the database,
set
input parameter *table_name* to '*' and *show_children* to
*true*. When doing this, the
returned output parameter *total_size* and output parameter
*total_full_size* will not include the sizes of
non-base tables (e.g., filters, views, joins, etc.).
Parameters:
table_name (str)
Name of the table for which to retrieve the information, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. If
blank, then returns information about all tables and views.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **force_synchronous** --
If *true* then the table sizes will wait for read lock before
returning.
Allowed values are:
* true
* false
The default value is 'true'.
* **get_sizes** --
If *true* then the number of records in each table, along
with a cumulative count, will be returned; blank, otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **get_cached_sizes** --
If *true* then the number of records in each table, along
with a cumulative count, will be returned; blank, otherwise.
This version will return the sizes cached at rank 0, which
may be stale if there is a multihead insert occuring.
Allowed values are:
* true
* false
The default value is 'false'.
* **show_children** --
If input parameter *table_name* is a schema, then *true* will
return information about the tables and views in the schema,
and *false* will return information about the schema itself.
If input parameter *table_name* is a table or view,
*show_children* must be *false*. If input parameter
*table_name* is empty, then *show_children* must be *true*.
Allowed values are:
* true
* false
The default value is 'true'.
* **no_error_if_not_exists** --
If *false* will return an error if the provided input
parameter *table_name* does not exist. If *true* then it will
return an empty result.
Allowed values are:
* true
* false
The default value is 'false'.
* **get_column_info** --
If *true* then column info (memory usage, etc) will be
returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
table_name (str)
Value of input parameter *table_name*.
table_names (list of str)
If input parameter *table_name* is a table or view, then the
single element of the array is input parameter *table_name*.
If input parameter *table_name* is a schema and *show_children*
is set to *true*,
then this array is populated with the names of all tables and
views in the given schema;
if *show_children* is *false*,
then this array will only include the schema name itself.
If input parameter *table_name* is an empty string, then the
array contains the names of all tables in the user's default
schema.
table_descriptions (list of lists of str)
List of descriptions for the respective tables in output
parameter *table_names*.
Allowed values are:
* COLLECTION
* JOIN
* LOGICAL_EXTERNAL_TABLE
* LOGICAL_VIEW
* MATERIALIZED_EXTERNAL_TABLE
* MATERIALIZED_VIEW
* MATERIALIZED_VIEW_MEMBER
* MATERIALIZED_VIEW_UNDER_CONSTRUCTION
* REPLICATED
* RESULT_TABLE
* SCHEMA
* VIEW
type_ids (list of str)
Type ids of the respective tables in output parameter
*table_names*.
type_schemas (list of str)
Type schemas of the respective tables in output parameter
*table_names*.
type_labels (list of str)
Type labels of the respective tables in output parameter
*table_names*.
properties (list of dicts of str to lists of str)
Property maps of the respective tables in output parameter
*table_names*.
additional_info (list of dicts of str to str)
Additional information about the respective tables in output
parameter *table_names*.
Allowed values are:
* @INNER_STRUCTURE
sizes (list of longs)
If *get_sizes* is *true*, an array containing the number of
records of each corresponding table in output parameter
*table_names*. Otherwise, an empty array.
full_sizes (list of longs)
If *get_sizes* is *true*, an array containing the number of
records of each corresponding table in output parameter
*table_names* (same values as output parameter *sizes*).
Otherwise, an empty array.
join_sizes (list of floats)
If *get_sizes* is *true*, an array containing the number of
unfiltered records in the cross product of the sub-tables of
each corresponding join-table in output parameter
*table_names*. For simple tables, this number will be the same
as output parameter *sizes*. For join-tables, this value gives
the number of joined-table rows that must be processed by any
aggregate functions operating on the table. Otherwise, (if
*get_sizes* is *false*), an empty array.
total_size (long)
If *get_sizes* is *true*, the sum of the elements of output
parameter *sizes*. Otherwise, -1.
total_full_size (long)
If *get_sizes* is *true*, the sum of the elements of output
parameter *full_sizes* (same value as output parameter
*total_size*). Otherwise, -1.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "show_table(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( options, (dict)), "show_table(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/table', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create record types for the returned types and save them
for __type_info in zip( response.type_ids, response.type_labels, response.type_schemas, response.properties ):
# Create a type only if it is not collection or a materialized view
# under construction (which returns an empty string for the schema)
if ( (__type_info[ 1 ] != "<collection>") and (__type_info[2] != "") ):
record_type = RecordType.from_type_schema( __type_info[ 1 ], __type_info[ 2 ], __type_info[ 3 ] )
self.save_known_type( __type_info[ 0 ], record_type )
# end loop
return response
# end show_table
# begin show_table_metadata
# end show_table_metadata
# begin show_table_monitors
[docs] def show_table_monitors( self, monitor_ids = None, options = {} ):
"""Show table monitors and their properties. Table monitors are created
using :meth:`GPUdb.create_table_monitor`.
Returns detailed information about existing table monitors.
Parameters:
monitor_ids (list of str)
List of monitors to be shown. An empty list or a single entry
with an empty string returns all table monitors. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
monitor_ids (list of str)
List of monitor IDs.
table_names (list of str)
List of source tables being monitored for the respective output
parameter *monitor_ids*.
events (list of str)
List of notification events for the respective output parameter
*monitor_ids*.
increasing_columns (list of str)
List of columns used on the respective tables in output
parameter *table_names* that will increase for new records.
filter_expressions (list of str)
List of filter expressions used on the respective tables in
output parameter *table_names* to limit records for
notifications.
refresh_method (list of str)
List of refresh methods used on the respective tables in output
parameter *table_names*.
refresh_period (list of str)
List of refresh periods used on the respective tables in output
parameter *table_names*.
refresh_start_time (list of str)
List of refresh start times used on the respective tables in
output parameter *table_names*.
datasink_names (list of str)
List of datasink names for the respective output parameter
*monitor_ids* if one is defined.
additional_info (list of dicts of str to str)
Additional information about the respective monitors in output
parameter *monitor_ids*.
Allowed values are:
* @INNER_STRUCTURE
info (dict of str to str)
Additional information.
"""
monitor_ids = monitor_ids if isinstance( monitor_ids, list ) else ( [] if (monitor_ids is None) else [ monitor_ids ] )
assert isinstance( options, (dict)), "show_table_monitors(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['monitor_ids'] = monitor_ids
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/tablemonitors', obj, convert_to_attr_dict = True )
return response
# end show_table_monitors
# begin show_tables_by_type
[docs] def show_tables_by_type( self, type_id = None, label = None, options = {} ):
"""Gets names of the tables whose type matches the given criteria. Each
table has a particular type. This type comprises the schema and
properties of the table and sometimes a type label. This function
allows a look up of the existing tables based on full or partial type
information. The operation is synchronous.
Parameters:
type_id (str)
Type id returned by a call to :meth:`GPUdb.create_type`.
label (str)
Optional user supplied label which can be used instead of the
type_id to retrieve all tables with the given label.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
table_names (list of str)
List of tables matching the input criteria.
info (dict of str to str)
Additional information.
"""
assert isinstance( type_id, (basestring)), "show_tables_by_type(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__
assert isinstance( label, (basestring)), "show_tables_by_type(): Argument 'label' must be (one) of type(s) '(basestring)'; given %s" % type( label ).__name__
assert isinstance( options, (dict)), "show_tables_by_type(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['type_id'] = type_id
obj['label'] = label
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/tables/bytype', obj, convert_to_attr_dict = True )
return response
# end show_tables_by_type
# begin show_triggers
[docs] def show_triggers( self, trigger_ids = None, options = {} ):
"""Retrieves information regarding the specified triggers or all existing
triggers currently active.
Parameters:
trigger_ids (list of str)
List of IDs of the triggers whose information is to be
retrieved. An empty list means information will be retrieved on
all active triggers. The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
trigger_map (dict of str to dicts of str to str)
This dictionary contains (key, value) pairs of (trigger ID,
information map/dictionary) where the key is a Unicode string
representing a Trigger ID. The value is another embedded
dictionary containing (key, value) pairs where the keys consist
of 'table_name', 'type' and the parameter names relating to the
trigger type, e.g. *nai*, *min*, *max*. The values are unicode
strings (numeric values are also converted to strings)
representing the value of the respective parameter. If a
trigger is associated with multiple tables, then the string
value for *table_name* contains a comma separated list of table
names.
info (dict of str to str)
Additional information.
"""
trigger_ids = trigger_ids if isinstance( trigger_ids, list ) else ( [] if (trigger_ids is None) else [ trigger_ids ] )
assert isinstance( options, (dict)), "show_triggers(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['trigger_ids'] = trigger_ids
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/triggers', obj, convert_to_attr_dict = True )
return response
# end show_triggers
# begin show_types
[docs] def show_types( self, type_id = None, label = None, options = {} ):
"""Retrieves information for the specified data type ID or type label. For
all data types that match the input criteria, the database returns the
type ID, the type schema, the label (if available), and the type's
column properties.
Parameters:
type_id (str)
Type Id returned in response to a call to
:meth:`GPUdb.create_type`.
label (str)
Option string that was supplied by user in a call to
:meth:`GPUdb.create_type`.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_join_types** --
When set to 'true', no join types will be included.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
type_ids (list of str)
type_schemas (list of str)
labels (list of str)
properties (list of dicts of str to lists of str)
info (dict of str to str)
Additional information.
"""
assert isinstance( type_id, (basestring)), "show_types(): Argument 'type_id' must be (one) of type(s) '(basestring)'; given %s" % type( type_id ).__name__
assert isinstance( label, (basestring)), "show_types(): Argument 'label' must be (one) of type(s) '(basestring)'; given %s" % type( label ).__name__
assert isinstance( options, (dict)), "show_types(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['type_id'] = type_id
obj['label'] = label
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/types', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
# Create record types for the returned types and save them
for __type_info in zip( response.type_ids, response.labels, response.type_schemas, response.properties ):
# Create a type only if it is not collection or a materialized view
# under construction (which returns an empty string for the schema)
if ( (__type_info[ 1 ] != "<collection>") and (__type_info[2] != "") ):
record_type = RecordType.from_type_schema( __type_info[ 1 ], __type_info[ 2 ], __type_info[ 3 ] )
self.save_known_type( __type_info[ 0 ], record_type )
# end loop
return response
# end show_types
# begin show_video
[docs] def show_video( self, paths = None, options = {} ):
"""Retrieves information about rendered videos.
Parameters:
paths (list of str)
The fully-qualified `KiFS <../../../../tools/kifs/>`__ paths
for the videos to show. If empty, shows all videos. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
creation_times (list of str)
Creation time for each video as an ISO-8601 datetime.
elapsed_render_time_seconds (list of longs)
The elapsed time spent rendering each video in seconds.
job_ids (list of longs)
The job id of the rendering process, for each video that is
still being rendered.
paths (list of str)
KIFS path to each video.
rendered_bytes (list of longs)
The number of bytes emitted by the encoder for each video.
rendered_frames (list of longs)
The number of frames rendered for each video.
rendered_percents (list of longs)
Percent completion of each video's rendering process (0-100)
requests (list of str)
JSON-string reflecting each video's creation parameters.
status (list of str)
The status of the last rendered frame for each video. Either
OK or Error with a message indicating the nature of the error.
ttls (list of longs)
The remaining `TTL <../../../../concepts/ttl/>`__, in minutes,
before the respective video expires (-1 if it will never
expire).
info (dict of str to str)
Additional information.
"""
paths = paths if isinstance( paths, list ) else ( [] if (paths is None) else [ paths ] )
assert isinstance( options, (dict)), "show_video(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['paths'] = paths
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/show/video', obj, convert_to_attr_dict = True )
return response
# end show_video
# begin solve_graph
[docs] def solve_graph( self, graph_name = None, weights_on_edges = [], restrictions =
[], solver_type = 'SHORTEST_PATH', source_nodes = [],
destination_nodes = [], solution_table = 'graph_solutions',
options = {} ):
"""Solves an existing graph for a type of problem (e.g., shortest path,
page rank, travelling salesman, etc.) using source nodes, destination
nodes, and
additional, optional weights and restrictions.
IMPORTANT: It's highly recommended that you review the
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
concepts documentation, the
`Graph REST Tutorial <../../../../guides/graph_rest_guide/>`__,
and/or some
`/solve/graph examples <../../../../guide-tags/graph---solve>`__
before using this endpoint.
Parameters:
graph_name (str)
Name of the graph resource to solve.
weights_on_edges (list of str)
Additional weights to apply to the edges of an existing
graph. Weights must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS WEIGHTS_EDGE_ID', expressions, e.g.,
'ST_LENGTH(wkt) AS WEIGHTS_VALUESPECIFIED', or constant values,
e.g.,
'{4, 15, 2} AS WEIGHTS_VALUESPECIFIED'. Any provided weights
will be added
(in the case of 'WEIGHTS_VALUESPECIFIED') to or multiplied with
(in the case of 'WEIGHTS_FACTORSPECIFIED') the existing
weight(s). If using
constant values in an identifier combination, the number of
values specified
must match across the combination. The default value is an
empty list ( [] ). The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
restrictions (list of str)
Additional restrictions to apply to the nodes/edges of an
existing graph. Restrictions must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as
`combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS RESTRICTIONS_EDGE_ID', expressions, e.g.,
'column/2 AS RESTRICTIONS_VALUECOMPARED', or constant values,
e.g.,
'{0, 0, 0, 1} AS RESTRICTIONS_ONOFFCOMPARED'. If using constant
values in an
identifier combination, the number of values specified must
match across the
combination. If remove_previous_restrictions option is set
to true, any
provided restrictions will replace the existing restrictions.
Otherwise, any provided
restrictions will be added (in the case of
'RESTRICTIONS_VALUECOMPARED') to or
replaced (in the case of 'RESTRICTIONS_ONOFFCOMPARED'). The
default value is an empty list ( [] ). The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
solver_type (str)
The type of solver to use for the graph.
Allowed values are:
* **SHORTEST_PATH** --
Solves for the optimal (shortest) path based on weights and
restrictions from one source to destinations nodes. Also
known as the Dijkstra solver.
* **PAGE_RANK** --
Solves for the probability of each destination node being
visited based on the links of the graph topology. Weights are
not required to use this solver.
* **PROBABILITY_RANK** --
Solves for the transitional probability (Hidden Markov) for
each node based on the weights (probability assigned over
given edges).
* **CENTRALITY** --
Solves for the degree of a node to depict how many pairs of
individuals that would have to go through the node to reach
one another in the minimum number of hops. Also known as
betweenness.
* **MULTIPLE_ROUTING** --
Solves for finding the minimum cost cumulative path for a
round-trip starting from the given source and visiting each
given destination node once then returning to the source.
Also known as the travelling salesman problem.
* **INVERSE_SHORTEST_PATH** --
Solves for finding the optimal path cost for each destination
node to route to the source node. Also known as inverse
Dijkstra or the service man routing problem.
* **BACKHAUL_ROUTING** --
Solves for optimal routes that connect remote asset nodes to
the fixed (backbone) asset nodes.
* **ALLPATHS** --
Solves for paths that would give costs between max and min
solution radia - Make sure to limit by the
'max_solution_targets' option. Min cost shoudl be >=
shortest_path cost.
* **STATS_ALL** --
Solves for graph statistics such as graph diameter, longest
pairs, vertex valences, topology numbers, average and max
cluster sizes, etc.
* **CLOSENESS** --
Solves for the centrality closeness score per node as the sum
of the inverse shortest path costs to all nodes in the graph.
The default value is 'SHORTEST_PATH'.
source_nodes (list of str)
It can be one of the nodal identifiers - e.g: 'NODE_WKTPOINT'
for source nodes. For *BACKHAUL_ROUTING*, this list depicts the
fixed assets. The default value is an empty list ( [] ). The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
destination_nodes (list of str)
It can be one of the nodal identifiers - e.g: 'NODE_WKTPOINT'
for destination (target) nodes. For *BACKHAUL_ROUTING*, this
list depicts the remote assets. The default value is an empty
list ( [] ). The user can provide a single element (which will
be automatically promoted to a list internally) or a list.
solution_table (str)
Name of the table to store the solution, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. The
default value is 'graph_solutions'.
options (dict of str to str)
Additional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **max_solution_radius** --
For *ALLPATHS*, *SHORTEST_PATH* and *INVERSE_SHORTEST_PATH*
solvers only. Sets the maximum solution cost radius, which
ignores the input parameter *destination_nodes* list and
instead outputs the nodes within the radius sorted by
ascending cost. If set to '0.0', the setting is ignored. The
default value is '0.0'.
* **min_solution_radius** --
For *ALLPATHS*, *SHORTEST_PATH* and *INVERSE_SHORTEST_PATH*
solvers only. Applicable only when *max_solution_radius* is
set. Sets the minimum solution cost radius, which ignores the
input parameter *destination_nodes* list and instead outputs
the nodes within the radius sorted by ascending cost. If set
to '0.0', the setting is ignored. The default value is
'0.0'.
* **max_solution_targets** --
For *ALLPATHS*, *SHORTEST_PATH* and *INVERSE_SHORTEST_PATH*
solvers only. Sets the maximum number of solution targets,
which ignores the input parameter *destination_nodes* list
and instead outputs no more than n number of nodes sorted by
ascending cost where n is equal to the setting value. If set
to 0, the setting is ignored. The default value is '1000'.
* **uniform_weights** --
When specified, assigns the given value to all the edges in
the graph. Note that weights provided in input parameter
*weights_on_edges* will override this value.
* **left_turn_penalty** --
This will add an additonal weight over the edges labelled as
'left turn' if the 'add_turn' option parameter of the
:meth:`GPUdb.create_graph` was invoked at graph creation.
The default value is '0.0'.
* **right_turn_penalty** --
This will add an additonal weight over the edges labelled as'
right turn' if the 'add_turn' option parameter of the
:meth:`GPUdb.create_graph` was invoked at graph creation.
The default value is '0.0'.
* **intersection_penalty** --
This will add an additonal weight over the edges labelled as
'intersection' if the 'add_turn' option parameter of the
:meth:`GPUdb.create_graph` was invoked at graph creation.
The default value is '0.0'.
* **sharp_turn_penalty** --
This will add an additonal weight over the edges labelled as
'sharp turn' or 'u-turn' if the 'add_turn' option parameter
of the :meth:`GPUdb.create_graph` was invoked at graph
creation. The default value is '0.0'.
* **num_best_paths** --
For *MULTIPLE_ROUTING* solvers only; sets the number of
shortest paths computed from each node. This is the heuristic
criterion. Default value of zero allows the number to be
computed automatically by the solver. The user may want to
override this parameter to speed-up the solver. The default
value is '0'.
* **max_num_combinations** --
For *MULTIPLE_ROUTING* solvers only; sets the cap on the
combinatorial sequences generated. If the default value of
two millions is overridden to a lesser value, it can
potentially speed up the solver. The default value is
'2000000'.
* **output_edge_path** --
If true then concatenated edge ids will be added as the EDGE
path column of the solution table for each source and target
pair in shortest path solves.
Allowed values are:
* true
* false
The default value is 'false'.
* **output_wkt_path** --
If true then concatenated wkt line segments will be added as
the Wktroute column of the solution table for each source and
target pair in shortest path solves.
Allowed values are:
* true
* false
The default value is 'true'.
* **server_id** --
Indicates which graph server(s) to send the request to.
Default is to send to the server, amongst those containing
the corresponding graph, that has the most computational
bandwidth. For SHORTEST_PATH solver type, the input is split
amongst the server containing the corresponding graph.
* **convergence_limit** --
For *PAGE_RANK* solvers only; Maximum percent relative
threshold on the pagerank scores of each node between
consecutive iterations to satisfy convergence. Default value
is 1 (one) percent. The default value is '1.0'.
* **max_iterations** --
For *PAGE_RANK* solvers only; Maximum number of pagerank
iterations for satisfying convergence. Default value is 100.
The default value is '100'.
* **max_runs** --
For all *CENTRALITY* solvers only; Sets the maximum number of
shortest path runs; maximum possible value is the number of
nodes in the graph. Default value of 0 enables this value to
be auto computed by the solver. The default value is '0'.
* **output_clusters** --
For *STATS_ALL* solvers only; the cluster index for each node
will be inserted as an additional column in the output.
Allowed values are:
* **true** --
An additional column 'CLUSTER' will be added for each node
* **false** --
No extra cluster info per node will be available in the
output
The default value is 'false'.
* **solve_heuristic** --
Specify heuristic search criterion only for the geo graphs
and shortest path solves towards a single target.
Allowed values are:
* **astar** --
Employs A-STAR heuristics to speed up the shortest path
traversal
* **none** --
No heuristics are applied
The default value is 'none'.
* **astar_radius** --
For path solvers only when 'solve_heuristic' option is
'astar'. The shortest path traversal front includes nodes
only within this radius (kilometers) as it moves towards the
target location. The default value is '70'.
Returns:
A dict with the following entries--
result (bool)
Indicates a successful solution on all servers.
result_per_destination_node (list of floats)
Cost or Pagerank (based on solver type) for each destination
node requested. Only populated if 'export_solve_results' option
is set to true.
info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "solve_graph(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
weights_on_edges = weights_on_edges if isinstance( weights_on_edges, list ) else ( [] if (weights_on_edges is None) else [ weights_on_edges ] )
restrictions = restrictions if isinstance( restrictions, list ) else ( [] if (restrictions is None) else [ restrictions ] )
assert isinstance( solver_type, (basestring)), "solve_graph(): Argument 'solver_type' must be (one) of type(s) '(basestring)'; given %s" % type( solver_type ).__name__
source_nodes = source_nodes if isinstance( source_nodes, list ) else ( [] if (source_nodes is None) else [ source_nodes ] )
destination_nodes = destination_nodes if isinstance( destination_nodes, list ) else ( [] if (destination_nodes is None) else [ destination_nodes ] )
assert isinstance( solution_table, (basestring)), "solve_graph(): Argument 'solution_table' must be (one) of type(s) '(basestring)'; given %s" % type( solution_table ).__name__
assert isinstance( options, (dict)), "solve_graph(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['weights_on_edges'] = weights_on_edges
obj['restrictions'] = restrictions
obj['solver_type'] = solver_type
obj['source_nodes'] = source_nodes
obj['destination_nodes'] = destination_nodes
obj['solution_table'] = solution_table
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/solve/graph', obj, convert_to_attr_dict = True )
return response
# end solve_graph
# begin update_records
[docs] def update_records( self, table_name = None, expressions = None, new_values_maps
= None, records_to_insert = [], records_to_insert_str =
[], record_encoding = 'binary', options = {},
record_type = None ):
"""Runs multiple predicate-based updates in a single call. With the
list of given expressions, any matching record's column values will be
updated
as provided in input parameter *new_values_maps*. There is also an
optional 'upsert'
capability where if a particular predicate doesn't match any existing
record,
then a new record can be inserted.
Note that this operation can only be run on an original table and not
on a
result view.
This operation can update primary key values. By default only
'pure primary key' predicates are allowed when updating primary key
values. If
the primary key for a table is the column 'attr1', then the operation
will only
accept predicates of the form: "attr1 == 'foo'" if the attr1 column is
being
updated. For a composite primary key (e.g. columns 'attr1' and
'attr2') then
this operation will only accept predicates of the form:
"(attr1 == 'foo') and (attr2 == 'bar')". Meaning, all primary key
columns
must appear in an equality predicate in the expressions. Furthermore
each
'pure primary key' predicate must be unique within a given request.
These
restrictions can be removed by utilizing some available options through
input parameter *options*.
The *update_on_existing_pk* option specifies the record primary key
collision
policy for tables with a `primary key
<../../../../concepts/tables/#primary-keys>`__, while
*ignore_existing_pk* specifies the record primary key collision
error-suppression policy when those collisions result in the update
being rejected. Both are
ignored on tables with no primary key.
Parameters:
table_name (str)
Name of table to be updated, in [schema_name.]table_name
format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be a currently
existing table and not a view.
expressions (list of str)
A list of the actual predicates, one for each update; format
should follow the guidelines :meth:`here <GPUdb.filter>`.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
new_values_maps (list of dicts of str to str and/or None)
List of new values for the matching records. Each element is a
map with
(key, value) pairs where the keys are the names of the columns
whose values are to be updated; the
values are the new values. The number of elements in the list
should match the length of input parameter *expressions*.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
records_to_insert (list of str)
An *optional* list of new binary-avro encoded records to
insert, one for each
update. If one of input parameter *expressions* does not yield
a matching record to be updated, then the
corresponding element from this list will be added to the
table. The default value is an empty list ( [] ). The user
can provide a single element (which will be automatically
promoted to a list internally) or a list. The user can provide
a single element (which will be automatically promoted to a
list internally) or a list.
records_to_insert_str (list of str)
An optional list of JSON encoded objects to insert, one for
each update, to be added if the particular update did not match
any objects. The default value is an empty list ( [] ). The
user can provide a single element (which will be automatically
promoted to a list internally) or a list. The user can provide
a single element (which will be automatically promoted to a
list internally) or a list.
record_encoding (str)
Identifies which of input parameter *records_to_insert* and
input parameter *records_to_insert_str* should be used.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **global_expression** --
An optional global expression to reduce the search space of
the predicates listed in input parameter *expressions*. The
default value is ''.
* **bypass_safety_checks** --
When set to *true*,
all predicates are available for primary key updates. Keep
in mind that it is possible to destroy
data in this case, since a single predicate may match
multiple objects (potentially all of records
of a table), and then updating all of those records to have
the same primary key will, due to the
primary key uniqueness constraints, effectively delete all
but one of those updated records.
Allowed values are:
* true
* false
The default value is 'false'.
* **update_on_existing_pk** --
Specifies the record collision policy for updating a table
with a
`primary key <../../../../concepts/tables/#primary-keys>`__.
There are two ways that a record collision can
occur.
The first is an "update collision", which happens when the
update changes the value of the updated
record's primary key, and that new primary key already exists
as the primary key of another record
in the table.
The second is an "insert collision", which occurs when a
given filter in input parameter *expressions*
finds no records to update, and the alternate insert record
given in input parameter *records_to_insert* (or
input parameter *records_to_insert_str*) contains a primary
key matching that of an existing record in the
table.
If *update_on_existing_pk* is set to
*true*, "update collisions" will result in the
existing record collided into being removed and the record
updated with values specified in
input parameter *new_values_maps* taking its place; "insert
collisions" will result in the collided-into
record being updated with the values in input parameter
*records_to_insert*/input parameter *records_to_insert_str*
(if given).
If set to *false*, the existing collided-into
record will remain unchanged, while the update will be
rejected and the error handled as determined
by *ignore_existing_pk*. If the specified table does not
have a primary key,
then this option has no effect.
Allowed values are:
* **true** --
Overwrite the collided-into record when updating a
record's primary key or inserting an alternate record
causes a primary key collision between the
record being updated/inserted and another existing record
in the table
* **false** --
Reject updates which cause primary key collisions
between the record being updated/inserted and an existing
record in the table
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
updating a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when primary
key record collisions are rejected (*update_on_existing_pk*
is
*false*). If set to
*true*, any record update that is rejected for
resulting in a primary key collision with an existing table
record will be ignored with no error
generated. If *false*, the rejection of any update
for resulting in a primary key collision will cause an error
to be reported. If the specified table
does not have a primary key or if *update_on_existing_pk* is
*true*, then this option has no effect.
Allowed values are:
* **true** --
Ignore updates that result in primary key collisions with
existing records
* **false** --
Treat as errors any updates that result in primary key
collisions with existing records
The default value is 'false'.
* **update_partition** --
Force qualifying records to be deleted and reinserted so
their partition membership will be reevaluated.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_strings** --
If set to *true*, any strings which are too long for their
charN string fields will be truncated to fit.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_expressions_in_new_values_maps** --
When set to *true*,
all new values in input parameter *new_values_maps* are
considered as expression values. When set to
*false*, all new values in
input parameter *new_values_maps* are considered as
constants. NOTE: When
*true*, string constants will need
to be quoted to avoid being evaluated as expressions.
Allowed values are:
* true
* false
The default value is 'false'.
* **record_id** --
ID of a single record to be updated (returned in the call to
:meth:`GPUdb.insert_records` or
:meth:`GPUdb.get_records_from_collection`).
record_type (RecordType)
A :class:`RecordType` object using which the binary data will
be encoded. If None, then it is assumed that the data is
already encoded, and no further encoding will occur. Default
is None.
Returns:
A dict with the following entries--
count_updated (long)
Total number of records updated.
counts_updated (list of longs)
Total number of records updated per predicate in input
parameter *expressions*.
count_inserted (long)
Total number of records inserted (due to expressions not
matching any existing records).
counts_inserted (list of longs)
Total number of records inserted per predicate in input
parameter *expressions* (will be either 0 or 1 for each
expression).
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "update_records(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
expressions = expressions if isinstance( expressions, list ) else ( [] if (expressions is None) else [ expressions ] )
new_values_maps = new_values_maps if isinstance( new_values_maps, list ) else ( [] if (new_values_maps is None) else [ new_values_maps ] )
records_to_insert = records_to_insert if isinstance( records_to_insert, list ) else ( [] if (records_to_insert is None) else [ records_to_insert ] )
records_to_insert_str = records_to_insert_str if isinstance( records_to_insert_str, list ) else ( [] if (records_to_insert_str is None) else [ records_to_insert_str ] )
assert isinstance( record_encoding, (basestring)), "update_records(): Argument 'record_encoding' must be (one) of type(s) '(basestring)'; given %s" % type( record_encoding ).__name__
assert isinstance( options, (dict)), "update_records(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
assert ( (record_type == None) or isinstance(record_type, RecordType) ), "update_records: Argument 'record_type' must be either RecordType or None; given %s" % type( record_type ).__name__
obj = {}
obj['table_name'] = table_name
obj['expressions'] = expressions
obj['new_values_maps'] = new_values_maps
obj['records_to_insert_str'] = records_to_insert_str
obj['record_encoding'] = record_encoding
obj['options'] = self.__sanitize_dicts( options )
if (record_encoding == 'binary'):
# Convert the objects to proper Records
use_object_array, data = _Util.convert_binary_data_to_cext_records( self, table_name, records_to_insert, record_type )
if use_object_array:
# First tuple element must be a RecordType or a Schema from the c-extension
obj['records_to_insert'] = (data[0].type, data) if data else ()
else: # use avro-encoded bytes for the data
obj['records_to_insert'] = data
else:
use_object_array = False
obj['records_to_insert'] = []
# end if
if use_object_array:
response = self.__submit_request( '/update/records', obj, convert_to_attr_dict = True, get_req_cext = True )
else:
response = self.__submit_request( '/update/records', obj, convert_to_attr_dict = True )
if not response.is_ok():
return response
return response
# end update_records
# begin update_records_by_series
[docs] def update_records_by_series( self, table_name = None, world_table_name = None,
view_name = '', reserved = [], options = {} ):
"""Updates the view specified by input parameter *table_name* to include
full
series (track) information from the input parameter *world_table_name*
for the series
(tracks) present in the input parameter *view_name*.
Parameters:
table_name (str)
Name of the view on which the update operation will be
performed, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing view.
world_table_name (str)
Name of the table containing the complete series (track)
information, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
view_name (str)
Name of the view containing the series (tracks) which have to
be updated, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. The
default value is ''.
reserved (list of str)
The default value is an empty list ( [] ). The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
count (int)
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "update_records_by_series(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( world_table_name, (basestring)), "update_records_by_series(): Argument 'world_table_name' must be (one) of type(s) '(basestring)'; given %s" % type( world_table_name ).__name__
assert isinstance( view_name, (basestring)), "update_records_by_series(): Argument 'view_name' must be (one) of type(s) '(basestring)'; given %s" % type( view_name ).__name__
reserved = reserved if isinstance( reserved, list ) else ( [] if (reserved is None) else [ reserved ] )
assert isinstance( options, (dict)), "update_records_by_series(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['world_table_name'] = world_table_name
obj['view_name'] = view_name
obj['reserved'] = reserved
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/update/records/byseries', obj, convert_to_attr_dict = True )
return response
# end update_records_by_series
# begin upload_files
[docs] def upload_files( self, file_names = None, file_data = None, options = {} ):
"""Uploads one or more files to `KiFS <../../../../tools/kifs/>`__. There
are
two methods for uploading files: load files in their entirety, or load
files in
parts. The latter is recommeded for files of approximately 60 MB or
larger.
To upload files in their entirety, populate input parameter
*file_names* with the file
names to upload into on KiFS, and their respective byte content in
input parameter *file_data*.
Multiple steps are involved when uploading in multiple parts. Only one
file at a
time can be uploaded in this manner. A user-provided UUID is utilized
to tie all
the upload steps together for a given file. To upload a file in
multiple parts:
1. Provide the file name in input parameter *file_names*, the UUID in
the *multipart_upload_uuid* key in input parameter *options*, and
a *multipart_operation* value of
*init*.
2. Upload one or more parts by providing the file name, the part data
in input parameter *file_data*, the UUID, a *multipart_operation*
value of *upload_part*, and
the part number in the *multipart_upload_part_number*.
The part numbers must start at 1 and increase incrementally.
Parts may not be uploaded out of order.
3. Complete the upload by providing the file name, the UUID, and a
*multipart_operation* value of
*complete*.
Multipart uploads in progress may be canceled by providing the file
name, the
UUID, and a *multipart_operation* value of
*cancel*. If an new upload is
initialized with a different UUID for an existing upload in progress,
the
pre-existing upload is automatically canceled in favor of the new
upload.
The multipart upload must be completed for the file to be usable in
KiFS.
Information about multipart uploads in progress is available in
:meth:`GPUdb.show_files`.
File data may be pre-encoded using base64 encoding. This should be
indicated
using the *file_encoding* option, and is recommended when
using JSON serialization.
Each file path must reside in a top-level KiFS directory, i.e. one of
the
directories listed in :meth:`GPUdb.show_directories`. The user must
have write
permission on the directory. Nested directories are permitted in file
name
paths. Directories are deliniated with the directory separator of '/'.
For
example, given the file path '/a/b/c/d.txt', 'a' must be a KiFS
directory.
These characters are allowed in file name paths: letters, numbers,
spaces, the
path delimiter of '/', and the characters: '.' '-' ':' '[' ']' '(' ')'
'#' '='.
Parameters:
file_names (list of str)
An array of full file name paths to be used for the files
uploaded to KiFS. File names may have any number of nested
directories in their
paths, but the top-level directory must be an existing KiFS
directory. Each file
must reside in or under a top-level directory. A full file name
path cannot be
larger than 1024 characters. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
file_data (list of str)
File data for the files being uploaded, for the respective
files in input parameter *file_names*. The user can provide
a single element (which will be automatically promoted to a
list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **file_encoding** --
Encoding that has been applied to the uploaded
file data. When using JSON serialization it is recommended to
utilize
*base64*. The caller is responsible
for encoding the data provided in this payload.
Allowed values are:
* **base64** --
Specifies that the file data being uploaded has been base64
encoded.
* **none** --
The uploaded file data has not been encoded.
The default value is 'none'.
* **multipart_operation** --
Multipart upload operation to perform.
Allowed values are:
* **none** --
Default, indicates this is not a multipart upload
* **init** --
Initialize a multipart file upload
* **upload_part** --
Uploads a part of the specified multipart file upload
* **complete** --
Complete the specified multipart file upload
* **cancel** --
Cancel the specified multipart file upload
The default value is 'none'.
* **multipart_upload_uuid** --
UUID to uniquely identify a multipart upload
* **multipart_upload_part_number** --
Incremental part number for each part in a
multipart upload. Part numbers start at 1, increment by 1,
and must be uploaded
sequentially
* **delete_if_exists** --
If *true*,
any existing files specified in input parameter *file_names*
will be deleted prior to start of upload.
Otherwise the file is replaced once the upload completes.
Rollback of the original file is
no longer possible if the upload is cancelled, aborted or
fails if the file was deleted beforehand.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
A dict with the following entries--
info (dict of str to str)
Additional information.
"""
file_names = file_names if isinstance( file_names, list ) else ( [] if (file_names is None) else [ file_names ] )
file_data = file_data if isinstance( file_data, list ) else ( [] if (file_data is None) else [ file_data ] )
assert isinstance( options, (dict)), "upload_files(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['file_names'] = file_names
obj['file_data'] = file_data
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/upload/files', obj, convert_to_attr_dict = True )
return response
# end upload_files
# begin upload_files_fromurl
[docs] def upload_files_fromurl( self, file_names = None, urls = None, options = {} ):
"""Uploads one or more files to `KiFS <../../../../tools/kifs/>`__.
Each file path must reside in a top-level KiFS directory, i.e. one of
the
directories listed in :meth:`GPUdb.show_directories`. The user must
have write
permission on the directory. Nested directories are permitted in file
name
paths. Directories are deliniated with the directory separator of '/'.
For
example, given the file path '/a/b/c/d.txt', 'a' must be a KiFS
directory.
These characters are allowed in file name paths: letters, numbers,
spaces, the
path delimiter of '/', and the characters: '.' '-' ':' '[' ']' '(' ')'
'#' '='.
Parameters:
file_names (list of str)
An array of full file name paths to be used for the files
uploaded to KiFS. File names may have any number of nested
directories in their
paths, but the top-level directory must be an existing KiFS
directory. Each file
must reside in or under a top-level directory. A full file name
path cannot be
larger than 1024 characters. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
urls (list of str)
List of URLs to upload, for each respective file in input
parameter *file_names*. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
A dict with the following entries--
successful_file_names (list of str)
List of input parameter *file_names* that were successfully
uploaded.
successful_urls (list of str)
List of input parameter *urls* that were successfully uploaded.
info (dict of str to str)
Additional information.
"""
file_names = file_names if isinstance( file_names, list ) else ( [] if (file_names is None) else [ file_names ] )
urls = urls if isinstance( urls, list ) else ( [] if (urls is None) else [ urls ] )
assert isinstance( options, (dict)), "upload_files_fromurl(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['file_names'] = file_names
obj['urls'] = urls
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/upload/files/fromurl', obj, convert_to_attr_dict = True )
return response
# end upload_files_fromurl
# begin visualize_get_feature_info
def visualize_get_feature_info( self, table_names = None, x_column_names = None,
y_column_names = None, geometry_column_names
= None, query_column_names = None,
projection = None, min_x = None, max_x =
None, min_y = None, max_y = None, width =
None, height = None, x = None, y = None,
radius = None, limit = None, encoding =
None, options = {} ):
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
x_column_names = x_column_names if isinstance( x_column_names, list ) else ( [] if (x_column_names is None) else [ x_column_names ] )
y_column_names = y_column_names if isinstance( y_column_names, list ) else ( [] if (y_column_names is None) else [ y_column_names ] )
geometry_column_names = geometry_column_names if isinstance( geometry_column_names, list ) else ( [] if (geometry_column_names is None) else [ geometry_column_names ] )
query_column_names = query_column_names if isinstance( query_column_names, list ) else ( [] if (query_column_names is None) else [ query_column_names ] )
assert isinstance( projection, (basestring)), "visualize_get_feature_info(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__
assert isinstance( min_x, (int, long, float)), "visualize_get_feature_info(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_get_feature_info(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_get_feature_info(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_get_feature_info(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_get_feature_info(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_get_feature_info(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( x, (int, long, float)), "visualize_get_feature_info(): Argument 'x' must be (one) of type(s) '(int, long, float)'; given %s" % type( x ).__name__
assert isinstance( y, (int, long, float)), "visualize_get_feature_info(): Argument 'y' must be (one) of type(s) '(int, long, float)'; given %s" % type( y ).__name__
assert isinstance( radius, (int, long, float)), "visualize_get_feature_info(): Argument 'radius' must be (one) of type(s) '(int, long, float)'; given %s" % type( radius ).__name__
assert isinstance( limit, (int, long, float)), "visualize_get_feature_info(): Argument 'limit' must be (one) of type(s) '(int, long, float)'; given %s" % type( limit ).__name__
assert isinstance( encoding, (basestring)), "visualize_get_feature_info(): Argument 'encoding' must be (one) of type(s) '(basestring)'; given %s" % type( encoding ).__name__
assert isinstance( options, (dict)), "visualize_get_feature_info(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_names'] = table_names
obj['x_column_names'] = x_column_names
obj['y_column_names'] = y_column_names
obj['geometry_column_names'] = geometry_column_names
obj['query_column_names'] = query_column_names
obj['projection'] = projection
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['x'] = x
obj['y'] = y
obj['radius'] = radius
obj['limit'] = limit
obj['encoding'] = encoding
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/getfeatureinfo', obj, convert_to_attr_dict = True )
return response
# end visualize_get_feature_info
# begin visualize_image
def visualize_image( self, table_names = None, world_table_names = None,
x_column_name = None, y_column_name = None,
symbol_column_name = None, geometry_column_name = None,
track_ids = None, min_x = None, max_x = None, min_y =
None, max_y = None, width = None, height = None,
projection = 'PLATE_CARREE', bg_color = None,
style_options = None, options = {} ):
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
world_table_names = world_table_names if isinstance( world_table_names, list ) else ( [] if (world_table_names is None) else [ world_table_names ] )
assert isinstance( x_column_name, (basestring)), "visualize_image(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( y_column_name, (basestring)), "visualize_image(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( symbol_column_name, (basestring)), "visualize_image(): Argument 'symbol_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_column_name ).__name__
assert isinstance( geometry_column_name, (basestring)), "visualize_image(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__
track_ids = track_ids if isinstance( track_ids, list ) else ( [] if (track_ids is None) else [ track_ids ] )
assert isinstance( min_x, (int, long, float)), "visualize_image(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_image(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_image(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_image(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_image(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_image(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( projection, (basestring)), "visualize_image(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__
assert isinstance( bg_color, (int, long, float)), "visualize_image(): Argument 'bg_color' must be (one) of type(s) '(int, long, float)'; given %s" % type( bg_color ).__name__
assert isinstance( style_options, (dict)), "visualize_image(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__
assert isinstance( options, (dict)), "visualize_image(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_names'] = table_names
obj['world_table_names'] = world_table_names
obj['x_column_name'] = x_column_name
obj['y_column_name'] = y_column_name
obj['symbol_column_name'] = symbol_column_name
obj['geometry_column_name'] = geometry_column_name
obj['track_ids'] = track_ids
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['projection'] = projection
obj['bg_color'] = bg_color
obj['style_options'] = self.__sanitize_dicts( style_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/image', obj, convert_to_attr_dict = True )
return response
# end visualize_image
# begin visualize_image_chart
[docs] def visualize_image_chart( self, table_name = None, x_column_names = None,
y_column_names = None, min_x = None, max_x =
None, min_y = None, max_y = None, width = None,
height = None, bg_color = None, style_options =
None, options = {} ):
"""Scatter plot is the only plot type currently supported. A non-numeric
column can be specified as x or y column and jitters can be added to
them to avoid excessive overlapping. All color values must be in the
format RRGGBB or AARRGGBB (to specify the alpha value).
The image is contained in the output parameter *image_data* field.
Parameters:
table_name (str)
Name of the table containing the data to be drawn as a chart,
in [schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
x_column_names (list of str)
Names of the columns containing the data mapped to the x axis
of a chart. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
y_column_names (list of str)
Names of the columns containing the data mapped to the y axis
of a chart. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
min_x (float)
Lower bound for the x column values. For non-numeric x column,
each x column item is mapped to an integral value starting from
0.
max_x (float)
Upper bound for the x column values. For non-numeric x column,
each x column item is mapped to an integral value starting from
0.
min_y (float)
Lower bound for the y column values. For non-numeric y column,
each y column item is mapped to an integral value starting from
0.
max_y (float)
Upper bound for the y column values. For non-numeric y column,
each y column item is mapped to an integral value starting from
0.
width (int)
Width of the generated image in pixels.
height (int)
Height of the generated image in pixels.
bg_color (str)
Background color of the generated image.
style_options (dict of str to lists of str)
Rendering style options for a chart.
Allowed keys are:
* **pointcolor** --
The color of points in the plot represented as a hexadecimal
number. The default value is '0000FF'.
* **pointsize** --
The size of points in the plot represented as number of
pixels. The default value is '3'.
* **pointshape** --
The shape of points in the plot.
Allowed values are:
* none
* circle
* square
* diamond
* hollowcircle
* hollowsquare
* hollowdiamond
The default value is 'square'.
* **cb_pointcolors** --
Point color class break information consisting of three
entries: class-break attribute, class-break values/ranges,
and point color values. This option overrides the pointcolor
option if both are provided. Class-break ranges are
represented in the form of "min:max". Class-break
values/ranges and point color values are separated by
cb_delimiter, e.g. {"price", "20:30;30:40;40:50",
"0xFF0000;0x00FF00;0x0000FF"}.
* **cb_pointsizes** --
Point size class break information consisting of three
entries: class-break attribute, class-break values/ranges,
and point size values. This option overrides the pointsize
option if both are provided. Class-break ranges are
represented in the form of "min:max". Class-break
values/ranges and point size values are separated by
cb_delimiter, e.g. {"states", "NY;TX;CA", "3;5;7"}.
* **cb_pointshapes** --
Point shape class break information consisting of three
entries: class-break attribute, class-break values/ranges,
and point shape names. This option overrides the pointshape
option if both are provided. Class-break ranges are
represented in the form of "min:max". Class-break
values/ranges and point shape names are separated by
cb_delimiter, e.g. {"states", "NY;TX;CA",
"circle;square;diamond"}.
* **cb_delimiter** --
A character or string which separates per-class values in a
class-break style option string. The default value is ';'.
* **x_order_by** --
An expression or aggregate expression by which non-numeric x
column values are sorted, e.g. "avg(price) descending".
* **y_order_by** --
An expression or aggregate expression by which non-numeric y
column values are sorted, e.g. "avg(price)", which defaults
to "avg(price) ascending".
* **scale_type_x** --
Type of x axis scale.
Allowed values are:
* **none** --
No scale is applied to the x axis.
* **log** --
A base-10 log scale is applied to the x axis.
The default value is 'none'.
* **scale_type_y** --
Type of y axis scale.
Allowed values are:
* **none** --
No scale is applied to the y axis.
* **log** --
A base-10 log scale is applied to the y axis.
The default value is 'none'.
* **min_max_scaled** --
If this options is set to "false", this endpoint expects
request's min/max values are not yet scaled. They will be
scaled according to scale_type_x or scale_type_y for
response. If this options is set to "true", this endpoint
expects request's min/max values are already scaled according
to scale_type_x/scale_type_y. Response's min/max values will
be equal to request's min/max values. The default value is
'false'.
* **jitter_x** --
Amplitude of horizontal jitter applied to non-numeric x
column values. The default value is '0.0'.
* **jitter_y** --
Amplitude of vertical jitter applied to non-numeric y column
values. The default value is '0.0'.
* **plot_all** --
If this options is set to "true", all non-numeric column
values are plotted ignoring min_x, max_x, min_y and max_y
parameters. The default value is 'false'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **image_encoding** --
Encoding to be applied to the output image. When using JSON
serialization it is recommended to specify this as *base64*.
Allowed values are:
* **base64** --
Apply base64 encoding to the output image.
* **none** --
Do not apply any additional encoding to the output image.
The default value is 'none'.
Returns:
A dict with the following entries--
min_x (float)
Lower bound for the x column values as provided in input
parameter *min_x* or calculated for non-numeric columns when
plot_all option is used.
max_x (float)
Upper bound for the x column values as provided in input
parameter *max_x* or calculated for non-numeric columns when
plot_all option is used.
min_y (float)
Lower bound for the y column values as provided in input
parameter *min_y* or calculated for non-numeric columns when
plot_all option is used.
max_y (float)
Upper bound for the y column values as provided in input
parameter *max_y* or calculated for non-numeric columns when
plot_all option is used.
width (int)
Width of the image as provided in input parameter *width*.
height (int)
Height of the image as provided in input parameter *height*.
bg_color (str)
Background color of the image as provided in input parameter
*bg_color*.
image_data (bytes)
The generated image data.
axes_info (dict of str to lists of str)
Information returned for drawing labels for the axes associated
with non-numeric columns.
Allowed keys are:
* **sorted_x_values** --
Sorted non-numeric x column value list for drawing x axis
label.
* **location_x** --
X axis label positions of sorted_x_values in pixel
coordinates.
* **sorted_y_values** --
Sorted non-numeric y column value list for drawing y axis
label.
* **location_y** --
Y axis label positions of sorted_y_values in pixel
coordinates.
info (dict of str to str)
Additional information.
"""
assert isinstance( table_name, (basestring)), "visualize_image_chart(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
x_column_names = x_column_names if isinstance( x_column_names, list ) else ( [] if (x_column_names is None) else [ x_column_names ] )
y_column_names = y_column_names if isinstance( y_column_names, list ) else ( [] if (y_column_names is None) else [ y_column_names ] )
assert isinstance( min_x, (int, long, float)), "visualize_image_chart(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_image_chart(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_image_chart(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_image_chart(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_image_chart(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_image_chart(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( bg_color, (basestring)), "visualize_image_chart(): Argument 'bg_color' must be (one) of type(s) '(basestring)'; given %s" % type( bg_color ).__name__
assert isinstance( style_options, (dict)), "visualize_image_chart(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__
assert isinstance( options, (dict)), "visualize_image_chart(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['x_column_names'] = x_column_names
obj['y_column_names'] = y_column_names
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['bg_color'] = bg_color
obj['style_options'] = self.__sanitize_dicts( style_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/image/chart', obj, convert_to_attr_dict = True )
return response
# end visualize_image_chart
# begin visualize_image_classbreak
def visualize_image_classbreak( self, table_names = None, world_table_names =
None, x_column_name = None, y_column_name =
None, symbol_column_name = None,
geometry_column_name = None, track_ids =
None, cb_attr = None, cb_vals = None,
cb_pointcolor_attr = None,
cb_pointcolor_vals = None,
cb_pointalpha_attr = None,
cb_pointalpha_vals = None, cb_pointsize_attr
= None, cb_pointsize_vals = None,
cb_pointshape_attr = None,
cb_pointshape_vals = None, min_x = None,
max_x = None, min_y = None, max_y = None,
width = None, height = None, projection =
'PLATE_CARREE', bg_color = None,
style_options = None, options = {},
cb_transparency_vec = None ):
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
world_table_names = world_table_names if isinstance( world_table_names, list ) else ( [] if (world_table_names is None) else [ world_table_names ] )
assert isinstance( x_column_name, (basestring)), "visualize_image_classbreak(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( y_column_name, (basestring)), "visualize_image_classbreak(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( symbol_column_name, (basestring)), "visualize_image_classbreak(): Argument 'symbol_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( symbol_column_name ).__name__
assert isinstance( geometry_column_name, (basestring)), "visualize_image_classbreak(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__
track_ids = track_ids if isinstance( track_ids, list ) else ( [] if (track_ids is None) else [ track_ids ] )
assert isinstance( cb_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_attr ).__name__
cb_vals = cb_vals if isinstance( cb_vals, list ) else ( [] if (cb_vals is None) else [ cb_vals ] )
assert isinstance( cb_pointcolor_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointcolor_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointcolor_attr ).__name__
cb_pointcolor_vals = cb_pointcolor_vals if isinstance( cb_pointcolor_vals, list ) else ( [] if (cb_pointcolor_vals is None) else [ cb_pointcolor_vals ] )
assert isinstance( cb_pointalpha_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointalpha_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointalpha_attr ).__name__
cb_pointalpha_vals = cb_pointalpha_vals if isinstance( cb_pointalpha_vals, list ) else ( [] if (cb_pointalpha_vals is None) else [ cb_pointalpha_vals ] )
assert isinstance( cb_pointsize_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointsize_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointsize_attr ).__name__
cb_pointsize_vals = cb_pointsize_vals if isinstance( cb_pointsize_vals, list ) else ( [] if (cb_pointsize_vals is None) else [ cb_pointsize_vals ] )
assert isinstance( cb_pointshape_attr, (basestring)), "visualize_image_classbreak(): Argument 'cb_pointshape_attr' must be (one) of type(s) '(basestring)'; given %s" % type( cb_pointshape_attr ).__name__
cb_pointshape_vals = cb_pointshape_vals if isinstance( cb_pointshape_vals, list ) else ( [] if (cb_pointshape_vals is None) else [ cb_pointshape_vals ] )
assert isinstance( min_x, (int, long, float)), "visualize_image_classbreak(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_image_classbreak(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_image_classbreak(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_image_classbreak(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_image_classbreak(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_image_classbreak(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( projection, (basestring)), "visualize_image_classbreak(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__
assert isinstance( bg_color, (int, long, float)), "visualize_image_classbreak(): Argument 'bg_color' must be (one) of type(s) '(int, long, float)'; given %s" % type( bg_color ).__name__
assert isinstance( style_options, (dict)), "visualize_image_classbreak(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__
assert isinstance( options, (dict)), "visualize_image_classbreak(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
cb_transparency_vec = cb_transparency_vec if isinstance( cb_transparency_vec, list ) else ( [] if (cb_transparency_vec is None) else [ cb_transparency_vec ] )
obj = {}
obj['table_names'] = table_names
obj['world_table_names'] = world_table_names
obj['x_column_name'] = x_column_name
obj['y_column_name'] = y_column_name
obj['symbol_column_name'] = symbol_column_name
obj['geometry_column_name'] = geometry_column_name
obj['track_ids'] = track_ids
obj['cb_attr'] = cb_attr
obj['cb_vals'] = cb_vals
obj['cb_pointcolor_attr'] = cb_pointcolor_attr
obj['cb_pointcolor_vals'] = cb_pointcolor_vals
obj['cb_pointalpha_attr'] = cb_pointalpha_attr
obj['cb_pointalpha_vals'] = cb_pointalpha_vals
obj['cb_pointsize_attr'] = cb_pointsize_attr
obj['cb_pointsize_vals'] = cb_pointsize_vals
obj['cb_pointshape_attr'] = cb_pointshape_attr
obj['cb_pointshape_vals'] = cb_pointshape_vals
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['projection'] = projection
obj['bg_color'] = bg_color
obj['style_options'] = self.__sanitize_dicts( style_options )
obj['options'] = self.__sanitize_dicts( options )
obj['cb_transparency_vec'] = cb_transparency_vec
response = self.__submit_request( '/visualize/image/classbreak', obj, convert_to_attr_dict = True )
return response
# end visualize_image_classbreak
# begin visualize_image_contour
def visualize_image_contour( self, table_names = None, x_column_name = None,
y_column_name = None, value_column_name = None,
min_x = None, max_x = None, min_y = None, max_y
= None, width = None, height = None, projection
= 'PLATE_CARREE', style_options = None, options
= {} ):
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
assert isinstance( x_column_name, (basestring)), "visualize_image_contour(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( y_column_name, (basestring)), "visualize_image_contour(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( value_column_name, (basestring)), "visualize_image_contour(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__
assert isinstance( min_x, (int, long, float)), "visualize_image_contour(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_image_contour(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_image_contour(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_image_contour(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_image_contour(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_image_contour(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( projection, (basestring)), "visualize_image_contour(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__
assert isinstance( style_options, (dict)), "visualize_image_contour(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__
assert isinstance( options, (dict)), "visualize_image_contour(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_names'] = table_names
obj['x_column_name'] = x_column_name
obj['y_column_name'] = y_column_name
obj['value_column_name'] = value_column_name
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['projection'] = projection
obj['style_options'] = self.__sanitize_dicts( style_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/image/contour', obj, convert_to_attr_dict = True )
return response
# end visualize_image_contour
# begin visualize_image_heatmap
def visualize_image_heatmap( self, table_names = None, x_column_name = None,
y_column_name = None, value_column_name = None,
geometry_column_name = None, min_x = None,
max_x = None, min_y = None, max_y = None, width
= None, height = None, projection =
'PLATE_CARREE', style_options = None, options =
{} ):
table_names = table_names if isinstance( table_names, list ) else ( [] if (table_names is None) else [ table_names ] )
assert isinstance( x_column_name, (basestring)), "visualize_image_heatmap(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( y_column_name, (basestring)), "visualize_image_heatmap(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( value_column_name, (basestring)), "visualize_image_heatmap(): Argument 'value_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( value_column_name ).__name__
assert isinstance( geometry_column_name, (basestring)), "visualize_image_heatmap(): Argument 'geometry_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( geometry_column_name ).__name__
assert isinstance( min_x, (int, long, float)), "visualize_image_heatmap(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_image_heatmap(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_image_heatmap(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_image_heatmap(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_image_heatmap(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_image_heatmap(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( projection, (basestring)), "visualize_image_heatmap(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__
assert isinstance( style_options, (dict)), "visualize_image_heatmap(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__
assert isinstance( options, (dict)), "visualize_image_heatmap(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_names'] = table_names
obj['x_column_name'] = x_column_name
obj['y_column_name'] = y_column_name
obj['value_column_name'] = value_column_name
obj['geometry_column_name'] = geometry_column_name
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['projection'] = projection
obj['style_options'] = self.__sanitize_dicts( style_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/image/heatmap', obj, convert_to_attr_dict = True )
return response
# end visualize_image_heatmap
# begin visualize_image_labels
def visualize_image_labels( self, table_name = None, x_column_name = None,
y_column_name = None, x_offset = '', y_offset =
'', text_string = None, font = '', text_color =
'', text_angle = '', text_scale = '', draw_box =
'', draw_leader = '', line_width = '',
line_color = '', fill_color = '',
leader_x_column_name = '', leader_y_column_name
= '', filter = '', min_x = None, max_x = None,
min_y = None, max_y = None, width = None, height
= None, projection = 'PLATE_CARREE', options =
{} ):
assert isinstance( table_name, (basestring)), "visualize_image_labels(): Argument 'table_name' must be (one) of type(s) '(basestring)'; given %s" % type( table_name ).__name__
assert isinstance( x_column_name, (basestring)), "visualize_image_labels(): Argument 'x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( x_column_name ).__name__
assert isinstance( y_column_name, (basestring)), "visualize_image_labels(): Argument 'y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( y_column_name ).__name__
assert isinstance( x_offset, (basestring)), "visualize_image_labels(): Argument 'x_offset' must be (one) of type(s) '(basestring)'; given %s" % type( x_offset ).__name__
assert isinstance( y_offset, (basestring)), "visualize_image_labels(): Argument 'y_offset' must be (one) of type(s) '(basestring)'; given %s" % type( y_offset ).__name__
assert isinstance( text_string, (basestring)), "visualize_image_labels(): Argument 'text_string' must be (one) of type(s) '(basestring)'; given %s" % type( text_string ).__name__
assert isinstance( font, (basestring)), "visualize_image_labels(): Argument 'font' must be (one) of type(s) '(basestring)'; given %s" % type( font ).__name__
assert isinstance( text_color, (basestring)), "visualize_image_labels(): Argument 'text_color' must be (one) of type(s) '(basestring)'; given %s" % type( text_color ).__name__
assert isinstance( text_angle, (basestring)), "visualize_image_labels(): Argument 'text_angle' must be (one) of type(s) '(basestring)'; given %s" % type( text_angle ).__name__
assert isinstance( text_scale, (basestring)), "visualize_image_labels(): Argument 'text_scale' must be (one) of type(s) '(basestring)'; given %s" % type( text_scale ).__name__
assert isinstance( draw_box, (basestring)), "visualize_image_labels(): Argument 'draw_box' must be (one) of type(s) '(basestring)'; given %s" % type( draw_box ).__name__
assert isinstance( draw_leader, (basestring)), "visualize_image_labels(): Argument 'draw_leader' must be (one) of type(s) '(basestring)'; given %s" % type( draw_leader ).__name__
assert isinstance( line_width, (basestring)), "visualize_image_labels(): Argument 'line_width' must be (one) of type(s) '(basestring)'; given %s" % type( line_width ).__name__
assert isinstance( line_color, (basestring)), "visualize_image_labels(): Argument 'line_color' must be (one) of type(s) '(basestring)'; given %s" % type( line_color ).__name__
assert isinstance( fill_color, (basestring)), "visualize_image_labels(): Argument 'fill_color' must be (one) of type(s) '(basestring)'; given %s" % type( fill_color ).__name__
assert isinstance( leader_x_column_name, (basestring)), "visualize_image_labels(): Argument 'leader_x_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( leader_x_column_name ).__name__
assert isinstance( leader_y_column_name, (basestring)), "visualize_image_labels(): Argument 'leader_y_column_name' must be (one) of type(s) '(basestring)'; given %s" % type( leader_y_column_name ).__name__
assert isinstance( filter, (basestring)), "visualize_image_labels(): Argument 'filter' must be (one) of type(s) '(basestring)'; given %s" % type( filter ).__name__
assert isinstance( min_x, (int, long, float)), "visualize_image_labels(): Argument 'min_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_x ).__name__
assert isinstance( max_x, (int, long, float)), "visualize_image_labels(): Argument 'max_x' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_x ).__name__
assert isinstance( min_y, (int, long, float)), "visualize_image_labels(): Argument 'min_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( min_y ).__name__
assert isinstance( max_y, (int, long, float)), "visualize_image_labels(): Argument 'max_y' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_y ).__name__
assert isinstance( width, (int, long, float)), "visualize_image_labels(): Argument 'width' must be (one) of type(s) '(int, long, float)'; given %s" % type( width ).__name__
assert isinstance( height, (int, long, float)), "visualize_image_labels(): Argument 'height' must be (one) of type(s) '(int, long, float)'; given %s" % type( height ).__name__
assert isinstance( projection, (basestring)), "visualize_image_labels(): Argument 'projection' must be (one) of type(s) '(basestring)'; given %s" % type( projection ).__name__
assert isinstance( options, (dict)), "visualize_image_labels(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['table_name'] = table_name
obj['x_column_name'] = x_column_name
obj['y_column_name'] = y_column_name
obj['x_offset'] = x_offset
obj['y_offset'] = y_offset
obj['text_string'] = text_string
obj['font'] = font
obj['text_color'] = text_color
obj['text_angle'] = text_angle
obj['text_scale'] = text_scale
obj['draw_box'] = draw_box
obj['draw_leader'] = draw_leader
obj['line_width'] = line_width
obj['line_color'] = line_color
obj['fill_color'] = fill_color
obj['leader_x_column_name'] = leader_x_column_name
obj['leader_y_column_name'] = leader_y_column_name
obj['filter'] = filter
obj['min_x'] = min_x
obj['max_x'] = max_x
obj['min_y'] = min_y
obj['max_y'] = max_y
obj['width'] = width
obj['height'] = height
obj['projection'] = projection
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/image/labels', obj, convert_to_attr_dict = True )
return response
# end visualize_image_labels
# begin visualize_isochrone
[docs] def visualize_isochrone( self, graph_name = None, source_node = None,
max_solution_radius = '-1.0', weights_on_edges =
[], restrictions = [], num_levels = '1',
generate_image = True, levels_table = '',
style_options = None, solve_options = {},
contour_options = {}, options = {} ):
"""Generate an image containing isolines for travel results using an
existing graph. Isolines represent curves of equal cost, with cost
typically
referring to the time or distance assigned as the weights of the
underlying
graph. See
`Network Graphs & Solvers
<../../../../graph_solver/network_graph_solver/>`__
for more information on graphs.
Parameters:
graph_name (str)
Name of the graph on which the isochrone is to be computed.
source_node (str)
Starting vertex on the underlying graph from/to which the
isochrones are created.
max_solution_radius (float)
Extent of the search radius around input parameter
*source_node*. Set to '-1.0' for unrestricted search radius.
The default value is -1.0.
weights_on_edges (list of str)
Additional weights to apply to the edges of an existing graph.
Weights must be specified using `identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS WEIGHTS_EDGE_ID', or expressions, e.g.,
'ST_LENGTH(wkt) AS WEIGHTS_VALUESPECIFIED'. Any provided
weights will be added (in the case of 'WEIGHTS_VALUESPECIFIED')
to or multiplied with (in the case of
'WEIGHTS_FACTORSPECIFIED') the existing weight(s). The default
value is an empty list ( [] ). The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
restrictions (list of str)
Additional restrictions to apply to the nodes/edges of an
existing graph. Restrictions must be specified using
`identifiers
<../../../../graph_solver/network_graph_solver/#identifiers>`__;
identifiers are grouped as `combinations
<../../../../graph_solver/network_graph_solver/#id-combos>`__.
Identifiers can be used with existing column names, e.g.,
'table.column AS RESTRICTIONS_EDGE_ID', or expressions, e.g.,
'column/2 AS RESTRICTIONS_VALUECOMPARED'. If
*remove_previous_restrictions* is set to *true*, any provided
restrictions will replace the existing restrictions. If
*remove_previous_restrictions* is set to *false*, any provided
restrictions will be added (in the case of
'RESTRICTIONS_VALUECOMPARED') to or replaced (in the case of
'RESTRICTIONS_ONOFFCOMPARED'). The default value is an empty
list ( [] ). The user can provide a single element (which will
be automatically promoted to a list internally) or a list.
num_levels (int)
Number of equally-separated isochrones to compute. The default
value is 1.
generate_image (bool)
If set to *true*, generates a PNG image of the isochrones in
the response.
Allowed values are:
* true
* false
The default value is True.
levels_table (str)
Name of the table to output the isochrones to, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. The
table will contain levels and their corresponding WKT geometry.
If no value is provided, the table is not generated. The
default value is ''.
style_options (dict of str to str)
Various style related options of the isochrone image.
Allowed keys are:
* **line_size** --
The width of the contour lines in pixels. The default value
is '3'.
* **color** --
Color of generated isolines. All color values must be in the
format RRGGBB or AARRGGBB (to specify the alpha value). If
alpha is specified and flooded contours are enabled, it will
be used for as the transparency of the latter. The default
value is 'FF696969'.
* **bg_color** --
When input parameter *generate_image* is set to *true*,
background color of the generated image. All color values
must be in the format RRGGBB or AARRGGBB (to specify the
alpha value). The default value is '00000000'.
* **text_color** --
When *add_labels* is set to *true*, color for the labels. All
color values must be in the format RRGGBB or AARRGGBB (to
specify the alpha value). The default value is 'FF000000'.
* **colormap** --
Colormap for contours or fill-in regions when applicable. All
color values must be in the format RRGGBB or AARRGGBB (to
specify the alpha value).
Allowed values are:
* jet
* accent
* afmhot
* autumn
* binary
* blues
* bone
* brbg
* brg
* bugn
* bupu
* bwr
* cmrmap
* cool
* coolwarm
* copper
* cubehelix
* dark2
* flag
* gist_earth
* gist_gray
* gist_heat
* gist_ncar
* gist_rainbow
* gist_stern
* gist_yarg
* gnbu
* gnuplot2
* gnuplot
* gray
* greens
* greys
* hot
* hsv
* inferno
* magma
* nipy_spectral
* ocean
* oranges
* orrd
* paired
* pastel1
* pastel2
* pink
* piyg
* plasma
* prgn
* prism
* pubu
* pubugn
* puor
* purd
* purples
* rainbow
* rdbu
* rdgy
* rdpu
* rdylbu
* rdylgn
* reds
* seismic
* set1
* set2
* set3
* spectral
* spring
* summer
* terrain
* viridis
* winter
* wistia
* ylgn
* ylgnbu
* ylorbr
* ylorrd
The default value is 'jet'.
solve_options (dict of str to str)
Solver specific parameters. The default value is an empty dict
( {} ).
Allowed keys are:
* **remove_previous_restrictions** --
Ignore the restrictions applied to the graph during the
creation stage and only use the restrictions specified in
this request if set to *true*.
Allowed values are:
* true
* false
The default value is 'false'.
* **restriction_threshold_value** --
Value-based restriction comparison. Any node or edge with a
'RESTRICTIONS_VALUECOMPARED' value greater than the
*restriction_threshold_value* will not be included in the
solution.
* **uniform_weights** --
When specified, assigns the given value to all the edges in
the graph. Note that weights provided in input parameter
*weights_on_edges* will override this value.
contour_options (dict of str to str)
Solver specific parameters. The default value is an empty dict
( {} ).
Allowed keys are:
* **projection** --
Spatial Reference System (i.e. EPSG Code).
Allowed values are:
* 3857
* 102100
* 900913
* EPSG:4326
* PLATE_CARREE
* EPSG:900913
* EPSG:102100
* EPSG:3857
* WEB_MERCATOR
The default value is 'PLATE_CARREE'.
* **width** --
When input parameter *generate_image* is set to *true*, width
of the generated image. The default value is '512'.
* **height** --
When input parameter *generate_image* is set to *true*,
height of the generated image. If the default value is used,
the *height* is set to the value resulting from multiplying
the aspect ratio by the *width*. The default value is '-1'.
* **search_radius** --
When interpolating the graph solution to generate the
isochrone, neighborhood of influence of sample data (in
percent of the image/grid). The default value is '20'.
* **grid_size** --
When interpolating the graph solution to generate the
isochrone, number of subdivisions along the x axis when
building the grid (the y is computed using the aspect ratio
of the output image). The default value is '100'.
* **color_isolines** --
Color each isoline according to the colormap; otherwise, use
the foreground color.
Allowed values are:
* true
* false
The default value is 'true'.
* **add_labels** --
If set to *true*, add labels to the isolines.
Allowed values are:
* true
* false
The default value is 'false'.
* **labels_font_size** --
When *add_labels* is set to *true*, size of the font (in
pixels) to use for labels. The default value is '12'.
* **labels_font_family** --
When *add_labels* is set to *true*, font name to be used when
adding labels. The default value is 'arial'.
* **labels_search_window** --
When *add_labels* is set to *true*, a search window is used
to rate the local quality of each isoline. Smooth,
continuous, long stretches with relatively flat angles are
favored. The provided value is multiplied by the
*labels_font_size* to calculate the final window size. The
default value is '4'.
* **labels_intralevel_separation** --
When *add_labels* is set to *true*, this value determines the
distance (in multiples of the *labels_font_size*) to use when
separating labels of different values. The default value is
'4'.
* **labels_interlevel_separation** --
When *add_labels* is set to *true*, this value determines the
distance (in percent of the total window size) to use when
separating labels of the same value. The default value is
'20'.
* **labels_max_angle** --
When *add_labels* is set to *true*, maximum angle (in
degrees) from the vertical to use when adding labels. The
default value is '60'.
options (dict of str to str)
Additional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **solve_table** --
Name of the table to host intermediate solve results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
This table will contain the position and cost for each vertex
in the graph. If the default value is used, a temporary table
is created and deleted once the solution is calculated. The
default value is ''.
* **is_replicated** --
If set to *true*, replicate the *solve_table*.
Allowed values are:
* true
* false
The default value is 'true'.
* **data_min_x** --
Lower bound for the x values. If not provided, it will be
computed from the bounds of the input data.
* **data_max_x** --
Upper bound for the x values. If not provided, it will be
computed from the bounds of the input data.
* **data_min_y** --
Lower bound for the y values. If not provided, it will be
computed from the bounds of the input data.
* **data_max_y** --
Upper bound for the y values. If not provided, it will be
computed from the bounds of the input data.
* **concavity_level** --
Factor to qualify the concavity of the isochrone curves. The
lower the value, the more convex (with '0' being completely
convex and '1' being the most concave). The default value is
'0.5'.
* **use_priority_queue_solvers** --
sets the solver methods explicitly if true.
Allowed values are:
* **true** --
uses the solvers scheduled for 'shortest_path' and
'inverse_shortest_path' based on solve_direction
* **false** --
uses the solvers 'priority_queue' and
'inverse_priority_queue' based on solve_direction
The default value is 'false'.
* **solve_direction** --
Specify whether we are going to the source node, or starting
from it.
Allowed values are:
* **from_source** --
Shortest path to get to the source (inverse Dijkstra)
* **to_source** --
Shortest path to source (Dijkstra)
The default value is 'from_source'.
Returns:
A dict with the following entries--
width (int)
Width of the image as provided in *width*.
height (int)
Height of the image as provided in *height*.
bg_color (long)
Background color of the image as provided in *bg_color*.
image_data (bytes)
Generated contour image data.
info (dict of str to str)
Additional information.
solve_info (dict of str to str)
Additional information.
contour_info (dict of str to str)
Additional information.
"""
assert isinstance( graph_name, (basestring)), "visualize_isochrone(): Argument 'graph_name' must be (one) of type(s) '(basestring)'; given %s" % type( graph_name ).__name__
assert isinstance( source_node, (basestring)), "visualize_isochrone(): Argument 'source_node' must be (one) of type(s) '(basestring)'; given %s" % type( source_node ).__name__
assert isinstance( max_solution_radius, (int, long, float)), "visualize_isochrone(): Argument 'max_solution_radius' must be (one) of type(s) '(int, long, float)'; given %s" % type( max_solution_radius ).__name__
weights_on_edges = weights_on_edges if isinstance( weights_on_edges, list ) else ( [] if (weights_on_edges is None) else [ weights_on_edges ] )
restrictions = restrictions if isinstance( restrictions, list ) else ( [] if (restrictions is None) else [ restrictions ] )
assert isinstance( num_levels, (int, long, float)), "visualize_isochrone(): Argument 'num_levels' must be (one) of type(s) '(int, long, float)'; given %s" % type( num_levels ).__name__
assert isinstance( generate_image, (bool)), "visualize_isochrone(): Argument 'generate_image' must be (one) of type(s) '(bool)'; given %s" % type( generate_image ).__name__
assert isinstance( levels_table, (basestring)), "visualize_isochrone(): Argument 'levels_table' must be (one) of type(s) '(basestring)'; given %s" % type( levels_table ).__name__
assert isinstance( style_options, (dict)), "visualize_isochrone(): Argument 'style_options' must be (one) of type(s) '(dict)'; given %s" % type( style_options ).__name__
assert isinstance( solve_options, (dict)), "visualize_isochrone(): Argument 'solve_options' must be (one) of type(s) '(dict)'; given %s" % type( solve_options ).__name__
assert isinstance( contour_options, (dict)), "visualize_isochrone(): Argument 'contour_options' must be (one) of type(s) '(dict)'; given %s" % type( contour_options ).__name__
assert isinstance( options, (dict)), "visualize_isochrone(): Argument 'options' must be (one) of type(s) '(dict)'; given %s" % type( options ).__name__
obj = {}
obj['graph_name'] = graph_name
obj['source_node'] = source_node
obj['max_solution_radius'] = max_solution_radius
obj['weights_on_edges'] = weights_on_edges
obj['restrictions'] = restrictions
obj['num_levels'] = num_levels
obj['generate_image'] = generate_image
obj['levels_table'] = levels_table
obj['style_options'] = self.__sanitize_dicts( style_options )
obj['solve_options'] = self.__sanitize_dicts( solve_options )
obj['contour_options'] = self.__sanitize_dicts( contour_options )
obj['options'] = self.__sanitize_dicts( options )
response = self.__submit_request( '/visualize/isochrone', obj, convert_to_attr_dict = True )
return response
# end visualize_isochrone
# -----------------------------------------------------------------------
# End autogenerated functions
# -----------------------------------------------------------------------
# end class GPUdb
# ---------------------------------------------------------------------------
# Import GPUdbIngestor; try from an installed package first, if not, try local
if IS_PYTHON_3:
try: # Installed
from gpudb_multihead_io import GPUdbIngestor, RecordRetriever
except ImportError: # Local
from gpudb_multihead_io import GPUdbIngestor, RecordRetriever
else:
try: # Installed
from gpudb import GPUdbIngestor, RecordRetriever
except ImportError: # Local
try:
from gpudb_multihead_io import GPUdbIngestor, RecordRetriever
except ImportError: # Failsafe
from gpudb_multihead_io import GPUdbIngestor, RecordRetriever
# done importing GPUdbIngestor
# ---------------------------------------------------------------------------
# GPUdbTable - Class to Handle GPUdb Tables
# ---------------------------------------------------------------------------
[docs]class GPUdbTable( object ):
[docs] @staticmethod
def random_name():
"""Returns a randomly generated uuid-based name. Use underscores
instead of hyphens.
"""
return str( uuid.uuid4() ).replace( '-', '_' )
# end random_name
[docs] @staticmethod
def prefix_name( val ):
"""Returns a random name with the specified prefix"""
return val + GPUdbTable.random_name()
# end prefix_name
def __init__( self, _type = None, name = None, options = None, db = None,
read_only_table_count = None,
delete_temporary_views = True,
temporary_view_names = None,
create_views = True,
use_multihead_io = False,
use_multihead_ingest = False,
multihead_ingest_batch_size = 10000,
flush_multi_head_ingest_per_insertion = False ):
"""
Parameters:
_type (:class:`RecordType` or :class:`GPUdbRecordType` or list of lists of str)
Either a :class:`.GPUdbRecordType` or :class:`RecordType` object which
represents a type for the table, or a nested list of lists, where each
internal list has the format of:
::
# Just the name and type
[ "name", "type (double, int etc.)" ]
# Name, type, and one column property
[ "name", "type (double, int etc.)", "nullable" ]
# Name, type, and multiple column properties
[ "name", "string", "char4", "nullable" ]
Pass None for collections. If creating a GPUdbTable
object for a pre-existing table, then also pass None.
If no table with the given name exists, then the given type
will be created in GPUdb before creating the table.
Default is None.
name (str)
The name for the table. if none provided, then a random
name will be generated using :meth:`.random_name`. The
name may contain the schema name (separated by a period).
Alternatively, if the table name has no schema name but
a collection name is specified via the options, that collection
name will be treated as the schema name. Must not specify
a schema name in this argument *and* specify a collection
name also. The fully qualified version of the table name,
i.e. 'schema_name.table_name' will be used for all endpoint
calls internally.
options (GPUdbTableOptions or dict)
A :class:`.GPUdbTableOptions` object or a dict containing
options for the table creation.
db (GPUdb)
A :class:`.GPUdb` object that allows the user to connect to
the GPUdb server.
read_only_table_count (int)
For known read-only tables, provide the number of records
in it. Integer. Must provide the name of the table.
delete_temporary_views (bool)
If true, then in terminal queries--queries that can not be
chained--delete the temporary views upon completion. Defaults
to True.
create_views (bool)
Indicates whether or not to create views for this table.
temporary_view_names (list)
Optional list of temporary view names (that ought
to be deleted upon terminal queries)
use_multihead_io (bool)
Indicates whether or not to use multi-head input and output
(meaning ingestion and lookup). Default is False.
Note that multi-head ingestion is more computation intensive
for sharded tables, and it is probably advisable only if there
is a heavy ingestion load. Choose carefully.
Please see documentation of parameters *multihead_ingest_batch_size*
and *flush_multi_head_ingest_per_insertion* for controlling
the multi-head ingestion related behavior.
use_multihead_ingest (bool)
Indicates whether or not to use multi-head ingestion, if
available upon insertion. Note that multi-head ingestion
is more computation intensive for sharded tables, and it
is probably advisable only if there is a heavy ingestion
load. Default is False. Will be deprecated in version 7.0.
multihead_ingest_batch_size (int)
Used only in conjunction with *use_multihead_ingest*;
ignored otherwise. Sets the batch size to be used for the
ingestor. Must be greater than zero. Default is 10,000.
The multi-head ingestor flushes the inserted records every
*multihead_ingest_batch_size* automatically, unless
*flush_multi_head_ingest_automatically* is False. Any
remaining records would have to be manually flushed using
:meth:`.flush_data_to_server` by the user, or will be
automatically flushed per :meth:`.insert_records` if
*flush_multi_head_ingest_automatically* is True.
flush_multi_head_ingest_per_insertion (bool)
Used only in conjunction with *use_multihead_ingest*;
ignored otherwise. If True, flushes the multi-head ingestor in
every :meth:`.insert_records` call. Otherwise, the multi-head
ingestor flushes the data to the server when a worker queue
reaches *multihead_ingest_batch_size* in size, and any
remaining records will have to be manually flushed using
:meth:`.flush_data_to_server`. Default False.
Returns:
A GPUdbTable object.
"""
# Class level logger so that setting it for ond GPUdbTabl instance
# doesn't set it for ALL GPUdbTable instances after that change (even
# if it is outside of the scope of the first instance whose log level
# was changed
self.log = logging.getLogger( "gpudb.GPUdbTable_instance_"
+ str( uuid.uuid4() ) )
# Handlers need to be instantiated only ONCE for a given module
# (i.e. not per class instance)
handler = logging.StreamHandler()
formatter = logging.Formatter( fmt = GPUdb._LOG_MESSAGE_FORMAT,
datefmt = GPUdb._LOG_DATETIME_FORMAT )
handler.setFormatter( formatter )
self.log.addHandler( handler )
# Prevent logging statements from being duplicated
self.log.propagate = False
# The given DB handle must be a GPUdb instance
if not isinstance( db, GPUdb ):
raise GPUdbException( "Argument 'db' must be a GPUdb object; "
"given %s" % str( type(db) ) )
self.db = db
# Save the options (maybe need to convert to a dict)
if options:
if isinstance( options, GPUdbTableOptions ):
self.options = options
elif isinstance( options, dict ):
self.options = GPUdbTableOptions( options )
else:
raise GPUdbException( "Argument 'options' must be either a dict "
"or a GPUdbTableOptions object; given '%s'"
% str( type( options ) ) )
else:
self.options = GPUdbTableOptions()
# Save the type
self._type = _type
if isinstance( _type, RecordType):
self.record_type = _type
type_info = _type.to_type_schema()
self.gpudbrecord_type = GPUdbRecordType( schema_string = type_info["type_definition"],
column_properties = type_info["properties"] )
if isinstance( _type, GPUdbRecordType):
self.gpudbrecord_type = _type
self.record_type = _type.record_type
elif not _type:
self.gpudbrecord_type = None
self.record_type = None
else:
_type = GPUdbRecordType( _type )
self.gpudbrecord_type = _type
self.record_type = _type.record_type
# Save passed-in arguments
self._delete_temporary_views = delete_temporary_views
self.create_views = create_views
# Create and update the set of temporary table names
self._temporary_view_names = set()
if temporary_view_names:
self._temporary_view_names.update( temporary_view_names )
# Some default values (assuming it is not a read-only table)
self._count = None
self._is_read_only = False
self._is_collection = False
self._collection_name = self.options._collection_name
self._type_id = None
self._is_replicated = self.options._is_replicated
if not self.options._create_temp_table:
# Create a random table name if none is given
if not name:
# No name is given
self._name = GPUdbTable.random_name()
self.qualified_name = self._name
# Check if a collection name was given
if ( self._collection_name is not None ):
# The user has specified a collection name, treat that as
# the schema name
self.qualified_name = ( "{coll}.{table}"
"".format( coll = self._collection_name,
table = self._name ) )
else:
# The user gave a name, save it
self._name = name
self.qualified_name = self.__get_qualified_name( self._name )
# end handling name
self.__log_debug( "Table name '{}', qualified name '{}'"
"".format( self._name, self.qualified_name ) )
else:
self._name = ""
self.qualified_name = ""
# The table is known to be read only
if read_only_table_count is not None: # Integer value 0 accepted
if not name: # name must be given!
raise GPUdbException( "Table name must be provided with 'read_only_table_count'." )
if not isinstance( read_only_table_count, (int, long) ):
raise GPUdbException( "Argument 'read_only_table_count' must be an integer." )
if (read_only_table_count < 0):
raise GPUdbException( "Argument 'read_only_table_count' must be greater than "
"or equal to zero; given %d" % read_only_table_count )
# All checks pass; save the name and count
self._name = name
self._count = read_only_table_count
self._is_read_only = True
# Update the table's type
self.__update_table_type()
self.__log_debug( "Created a read-only table" )
return # Nothing more to do
# end if
# NOT a known read-only table; need to either get info on it or create it
# -----------------------------------------------------------------------
try:
if not self.options._create_temp_table:
# Does a table with the same name exist already?
has_table_rsp = self.db.has_table( self.qualified_name )
if not has_table_rsp.is_ok( ):
# There was a problem checking for the table
raise GPUdbException( "Problem checking existence of the table: " + _Util.get_error_msg( has_table_rsp ) )
table_exists = has_table_rsp["table_exists"]
self.__log_debug( "Does a table with qualified name '{}' already "
"exist?: {}"
"".format( self.qualified_name, table_exists ) )
else:
table_exists = False
# Do different things based on whether the table already exists
if table_exists:
# Check that the given type agrees with the existing table's type, if any given
show_table_rsp = self.db.show_table( self.qualified_name,
options = {"show_children": "false"} )
if not _Util.is_ok( show_table_rsp ): # problem creating the table
raise GPUdbException( "Problem creating the table: " + _Util.get_error_msg( show_table_rsp ) )
# Check if the user has access to this table
if len(show_table_rsp[ C._table_descriptions ]) == 0:
msg = ( "Table {} does not exist or user does not "
"have access to it".format( self.qualified_name ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# Check if the table is a collection
if ( (show_table_rsp[ C._table_descriptions ] == C._collection)
or (C._collection in show_table_rsp[ C._table_descriptions ][0]) ):
self._is_collection = True
else: # need to save the type ID for regular tables
self._type_id = show_table_rsp["type_ids"][0]
# Also save the name of any collection this table is a part of
if ( (C._collection_names in show_table_rsp[ C._additional_info ][0] )
and show_table_rsp[ C._additional_info ][0][ C._collection_names ] ):
self._collection_name = show_table_rsp[ C._additional_info ][0][ C._collection_names ]
# end if else
if not self._is_collection: # not a collection
gtable_type = GPUdbRecordType( None, "", show_table_rsp["type_schemas"][0],
show_table_rsp["properties"][0] )
table_type = RecordType.from_type_schema( "", show_table_rsp["type_schemas"][0],
show_table_rsp["properties"][0] )
else:
gtable_type = None
table_type = None
if ( self.record_type and not table_type ):
# TODO: Decide if we should have this check or silently ignore the given type
raise GPUdbException( "Table '%s' is an existing collection; so cannot be of the "
"given type." % self.qualified_name )
if ( self.gpudbrecord_type and (self.gpudbrecord_type != gtable_type) ):
raise GPUdbException( "Table '%s' exists; existing table's type does "
"not match the given type." % self.qualified_name )
# Save the types
self.record_type = table_type
self.gpudbrecord_type = gtable_type
# Check if the table is read-only or not
if show_table_rsp[ C._table_descriptions ] in [ C._view, C._join, C._result_table ]:
self._is_read_only = True
# Check if the table is replicated
if ( (show_table_rsp[ C._table_descriptions ] == C._replicated)
or (C._replicated in show_table_rsp[ C._table_descriptions ][0]) ):
self._is_replicated = True
else: # table does not already exist in GPUdb
# Create the table (and the type)
if self.options._is_collection: # Create a collection
rsp_obj = self.db.create_table( self.qualified_name, "",
self.options.as_dict() )
self._is_collection = True
elif self.record_type: # create a regular table
type_id = self.gpudbrecord_type.create_type( self.db )
rsp_obj = self.db.create_table( self.qualified_name, type_id,
self.options.as_dict() )
self._type_id = type_id
else: # Need to create a table-hence the type-but none given
raise GPUdbException( "Must provide a type to create a new table; none given." )
if not _Util.is_ok( rsp_obj ): # problem creating the table
raise GPUdbException( _Util.get_error_msg( rsp_obj ) )
if self.options._create_temp_table:
self._name = rsp_obj["table_name"]
self.qualified_name = rsp_obj["info"]["qualified_table_name"]
# end if-else
except GPUdbException as e:
if IS_PYTHON_3:
raise GPUdbException( "Error creating GPUdbTable: '{}'"
"".format( e ) )
else:
raise GPUdbException( "Error creating GPUdbTable: '{}'"
"".format( e.message ) )
except Exception as e: # all other exceptions
ex_str = GPUdbException.stringify_exception( e )
raise GPUdbException( "Error creating GPUdbTable; {}: '{}'"
"".format( e.__doc__, ex_str ) )
# Set up multi-head ingestion, if needed
if not isinstance( use_multihead_io, bool ):
raise GPUdbException( "Argument 'use_multihead_io' must be "
"a bool; given '%s'"
% str( type( use_multihead_io ) ) )
if not isinstance( use_multihead_ingest, bool ):
raise GPUdbException( "Argument 'use_multihead_ingest' must be "
"a bool; given '%s'"
% str( type( use_multihead_ingest ) ) )
self._multihead_ingestor = None
if use_multihead_ingest or use_multihead_io:
# Check multihead_ingest_batch_size
if ( not isinstance( multihead_ingest_batch_size, (int, long) )
or (multihead_ingest_batch_size < 1) ):
raise GPUdbException( "Argument 'multihead_ingest_batch_size' "
"must be an integer greater than zero; "
"given: " + multihead_ingest_batch_size )
self._multihead_ingestor = GPUdbIngestor( self.db, self.qualified_name,
self.gpudbrecord_type,
multihead_ingest_batch_size,
is_table_replicated = self._is_replicated )
# Save the per-insertion-call flushing setting
self._flush_multi_head_ingest_per_insertion = flush_multi_head_ingest_per_insertion
# end if
# Set up multi-head record retriever
self._multihead_retriever = None
if use_multihead_io:
self._multihead_retriever = RecordRetriever( self.db, self.qualified_name,
self.gpudbrecord_type,
is_table_replicated = self._is_replicated )
# end if
# Set the encoding function for data to be inserted
self._record_encoding_function = lambda vals: self.__encode_data_for_insertion_cext( vals )
# end __init__
def __str__( self ):
return self.qualified_name
# end __str__
def __eq__( self, other ):
"""Override the equality operator.
"""
# Check the type of the other object
if not isinstance( other, GPUdbTable ):
return False
# Check the name
if (self.qualified_name != other.qualified_name):
return False
# Check for GPUdbRecordType equivalency
if (self.gpudbrecord_type != other.gpudbrecord_type):
return False
# TODO: Add the c-extension RecordType class equivalency
# Check for the database client handle equivalency
if (self.db != other.db):
return False
return True
# end __eq__
def __ne__(self, other):
return not self.__eq__(other)
# end __ne__
def __len__( self ):
"""Return the current size of the table. If it is a read-only table,
then return the cached count; if not a read-only table, get the current
size from GPUdb.
"""
if self._is_read_only:
# Get the count, if not known
if (self._count is None):
show_table_rsp = self.db.show_table( self.qualified_name,
options = {"get_sizes": "true"} )
if not show_table_rsp.is_ok():
raise GPUdbException( "Problem getting table size: '{}'"
"".format( show_table_rsp.get_error_msg() ) )
self._count = show_table_rsp[ C._total_full_size ]
# end inner if
return self._count
# end if read only table
# Not a read-only table; get the current size
show_table_rsp = self.db.show_table( self.qualified_name, options = {"get_sizes": "true"} )
if not show_table_rsp.is_ok():
raise GPUdbException( "Problem getting table size: '{}'"
"".format( show_table_rsp.get_error_msg() ) )
return show_table_rsp[ C._total_full_size ]
# end __len__
[docs] def size( self ):
"""Return the table's size/length/count.
"""
return self.__len__()
# end size
def __getitem__( self, key ):
"""Implement indexing and slicing for the table.
"""
# A single integer--get a single record
if isinstance( key, (int, long) ):
if (key < 0):
raise TypeError( "GPUdbTable does not support negative indexing" )
return self.get_records( key, 1 )
# end if
# Handle slicing
if isinstance( key, slice ):
if key.step and (key.step != 1):
raise TypeError( "GPUdbTable does not support slicing with steps" )
if not isinstance(key.start, (int, long)) or not isinstance(key.stop, (int, long)):
raise TypeError( "GPUdbTable slicing requires integers" )
if (key.start < 0):
raise TypeError( "GPUdbTable does not support negative indexing" )
if ( (key.stop < 0) and (key.stop != self.db.END_OF_SET) ):
raise TypeError( "GPUdbTable does not support negative indexing" )
if ( (key.stop <= key.start) and (key.stop != self.db.END_OF_SET) ):
raise IndexError( "GPUdbTable slice start index must be greater than the stop index" )
limit = key.stop if (key.stop == self.db.END_OF_SET) \
else (key.stop - key.start)
return self.get_records( key.start, limit )
# end if
raise TypeError( "GPUdbTable indexing/slicing requires integers" )
# end __getitem__
def __iter__( self ):
"""Return a table iterator for this table. Defaults to the first
10,000 records in the table. If needing to access more records,
please use the GPUdbTableIterator class directly.
"""
return GPUdbTableIterator( self )
# end __iter__
def __get_qualified_name(self, name):
"""Given a name for the table, process it and the schema/collection
name according to 7.1 schema and collection backward compatibility
rules. Return the qualified name."""
# Check if the given name is unqualified and a default schema is available
if ( len( name.split( "." ) ) == 1 and self._collection_name is not None ):
# The GPUdbTable has a collection name; use it for schema name
qualified_name = "{coll}.{table}".format( coll = self._collection_name, table = name )
else:
# Use the name given
qualified_name = name
# end if
return qualified_name
# end __get_qualified_name
def __process_view_name(self, view_name ):
"""Given a view name, process it as needed.
Returns:
The processed view name
"""
# If no view name is given but views ought to be created, get a random name
if not view_name:
if self.create_views: # will create a view
view_name = self.__get_qualified_name( GPUdbTable.random_name() )
else: # won't create views
view_name = ""
# end if
return view_name
# end __process_view_name
def __save_table_type( self, type_schema_str, properties = None ):
"""Given the type information, save the table's current/new
type.
"""
# A collection can't be changed
if self._is_collection:
return
# No new type given; so no modification was done
if (not type_schema_str):
return
# Save the GPUdbRecordType object
self.gpudbrecord_type = GPUdbRecordType( None, "", type_schema_str,
properties )
# Save the RecordType C object
self.record_type = RecordType.from_type_schema( "", type_schema_str,
properties )
# end __save_table_type
def __update_table_type( self):
"""Update the table's type by getting the latest table information
(the table type may have been altered by an /alter/table call).
Returns:
If the type was updated, i.e. the cached type needed to be changed,
then returns True. If the cached type is still valid, then returns False.
"""
self.__log_debug( "Fetching table information to update type for {}"
"".format( self.qualified_name ) )
show_table_rsp = self.db.show_table( self.qualified_name )
if not show_table_rsp.is_ok():
# First check that whether table still exists
if not self.exists():
msg = ( "Table does not exist anymore; error from server: '{}'"
"".format( show_table_rsp.get_error_msg() ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# end if
# Table exists; so the problem is something else
msg = ( "Problem while updating table type: '{}'"
"".format( show_table_rsp.get_error_msg() ) )
self.__log_debug( msg )
raise GPUdbException( msg )
# Check if the type ID matches with the cached type
type_id = show_table_rsp["type_ids"][0]
if (self._type_id == type_id):
return False
self.__save_table_type( show_table_rsp["type_schemas"][0],
show_table_rsp["properties"][0] )
# And also the type ID
self._type_id = type_id
self.__log_debug( "Updated type for table {}".format( self.qualified_name ) )
return True # yes, the type was updated
# end __update_table_type
[docs] def set_logger_level( self, log_level ):
"""Set the log level for the GPUdbTable class and any multi-head i/o
related classes it uses.
Parameters:
log_level (int, long, or str)
A valid log level for the logging module
"""
try:
# This class's logger
self.log.setLevel( log_level )
# The DB handle's logger
self.db.set_client_logger_level( log_level )
# The multi-head ingestor's logger
if self._multihead_ingestor:
self._multihead_ingestor.set_logger_level( log_level )
# The multi-head retriever's logger
if self._multihead_retriever:
self._multihead_retriever.set_logger_level( log_level )
except (ValueError, TypeError, Exception) as ex:
ex_str = GPUdbException.stringify_exception( ex )
raise GPUdbException("Invalid log level: '{}'".format( ex_str ))
# end set_logger_level
@property
def table_name( self ):
"""Return user given name for this table (or the randomly generated
one, if applicable)."""
return self._name
# end table_name
@property
def name( self ):
"""Return user given name for this table (or the randomly generated
one, if applicable). Return the qualified version"""
return self._name
# end table_name
@property
def qualified_table_name( self ):
"""Return the fully qualified name for this table, including any
schemas."""
return self.qualified_name
# end table_name
@property
def is_read_only( self ): # read-only attribute is_read_only
"""Is the table read-only, or can we modify it?
"""
return self._is_read_only
# end is_read_only
@property
def count( self ): # read-only property count
"""Return the table's size/length/count.
"""
return self.__len__()
# end count
@property
def is_collection( self ):
"""Returns True if the table is a collection; False otherwise."""
return self._is_collection
# end is_collection
@property
def collection_name( self ):
"""Returns the name of the collection this table is a member of; None if
this table does not belong to any collection.
"""
return self._collection_name
# end collection_name
[docs] def is_replicated( self ):
"""Returns True if the table is replicated."""
return self._is_replicated
# end is_replicated
[docs] def get_table_type( self ):
"""Return the table's (record) type (the GPUdbRecordType object, not the c-extension RecordType)."""
return self.gpudbrecord_type
# end get_table_type
[docs] def alias( self, alias ):
"""Create an alias string for this table.
Parameters:
alias (str)
A string that contains the alias.
Returns:
A string with the format "this-table-name as alias".
"""
if not isinstance( alias, (str, unicode) ):
raise GPUdbException( "'alias' must be a string; given {0}"
"".format( str( type( alias ) ) ) )
return "{0} as {1}".format( self.qualified_name, alias )
# end alias
[docs] def create_view( self, view_name, count = None ):
"""Given a view name and a related response, create a new GPUdbTable object
which is a read-only table with the intermediate tables automatically
updated.
Returns:
A :class:`.GPUdbTable` object
"""
# If the current table is read-only, add it to the list of intermediate
# temporary table names
if self.is_read_only:
self._temporary_view_names.update( [ self.qualified_name ] )
qualified_view_name = self.__get_qualified_name( view_name )
view = GPUdbTable( None, name = qualified_view_name,
read_only_table_count = count,
db = self.db,
temporary_view_names = self._temporary_view_names )
return view
# end create_view
[docs] def cleanup( self ):
"""Clear/drop all intermediate tables if settings allow it.
Returns:
self for enabling chaining method invocations.
"""
# Clear/drop all temporary tables
if self._delete_temporary_views:
for view in list(self._temporary_view_names): # iterate over a copy
self.db.clear_table( table_name = view )
self._temporary_view_names.remove( view )
else: # We're not allowed to delete intermediate tables!
raise GPUdbException( "Not allowed to delete intermediate "
"tables." )
return self
# end cleanup
[docs] def exists( self, options = {} ):
"""Checks for the existence of a table with the given name.
Returns:
A boolean flag indicating whether the table currently
exists in the database.
"""
response = self.db.has_table( self.qualified_name, options = options )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
return response[ "table_exists" ]
# end exists
[docs] def flush_data_to_server( self ):
"""If multi-head ingestion is enabled, then flush all records
in the ingestors' worker queues so that they actually get
inserted to the server database.
"""
if self._multihead_ingestor:
self._multihead_ingestor.flush( is_data_encoded = True )
# end flush_data_to_server
def __log_debug( self, message ):
if not self.log.isEnabledFor( logging.DEBUG ):
# No-op if debug is not enabled. This is important
# because the inspect module is a time killer!
return
# end if
try:
# Get calling method's information from the stack
stack = inspect.stack()
# stack[1] gives the previous/calling function
filename = stack[1][1].split("/")[-1]
ln = stack[1][2]
func = stack[1][3]
self.log.debug( "[GPUdbTable::{fn}::{line}::{func}] {msg}"
"".format( fn = filename,
func = func, line = ln,
msg = message ) )
except:
# Some error occurred with inspect; just log the debug message
self.log.debug( "[GPUdbTable] {msg}"
"".format( msg = message ) )
# end __debug
def __log_warn( self, message ):
self.log.warn( "[GPUdbTable] {}".format( message ) )
# end __warn
def __log_info( self, message ):
self.log.info( "[GPUdbTable] {}".format( message ) )
# end __log_info
def __log_error( self, message ):
self.log.error( "[GPUdbTable] {}".format( message ) )
# end __log_error
def __encode_data_for_insertion_avro( self, values ):
"""Encode the given values with the database client's encoding
and return the encoded data.
"""
encoding = self.db._GPUdb__client_to_object_encoding()
if encoding == "binary":
encoded_record = GPUdbRecord( self.gpudbrecord_type, values ).binary_data
else: # JSON encoding
encoded_record = GPUdbRecord( self.gpudbrecord_type, values ).json_data_string
return encoded_record
# end __encode_data_for_insertion_avro
def __encode_data_for_insertion_cext( self, values ):
"""Encode the given values with the database client's encoding
and return the encoded data.
"""
encoding = self.db._GPUdb__client_to_object_encoding()
if encoding == "binary": # No encoding is needed here
encoded_record = values
else: # JSON encoding
encoded_record = GPUdbRecord( self.gpudbrecord_type, values ).json_data_string
return encoded_record
# end __encode_data_for_insertion_cext
def __encode_data_for_insertion( self, *args, **kwargs ):
"""Parse the input and encode the data for insertion.
Returns:
The encoded data.
"""
encoded_data = []
# Process the input--single record or multiple records (or invalid syntax)?
if args and kwargs:
# Cannot give both args and kwargs
raise GPUdbException( "Cannot specify both args and kwargs: either provide "
"the column values for a single record "
"in 'kwargs', or provide column values for any number "
"of records in 'args'." )
if kwargs:
# Gave the column values for a single record in kwargs
encoded_record = self._record_encoding_function( kwargs )
encoded_data.append( encoded_record )
elif not any( _Util.is_list_or_dict( i ) for i in args):
# Column values not within a single list/dict: so it is a single record
if (isinstance( args[0], GPUdbRecord) or isinstance( args[0], Record) ):
encoded_data.append( args[0] )
else:
encoded_record = self._record_encoding_function( list(args) )
encoded_data.append( encoded_record )
elif not all( _Util.is_list_or_dict( i ) for i in args):
# Some values are lists or dicts, but not all--this is an error case
raise GPUdbException( "Arguments must be either contain no list, or contain only "
"lists or dicts; i.e. it must not be a mix; "
"given {0}".format( args ) )
elif (len( args ) == 1):
# A list/dict of length one given
if any( _Util.is_list_or_dict( i ) for i in args[0]):
# At least one element within the list is also a list
if not all( _Util.is_list_or_dict( i ) for i in args[0]):
# But not all elements are lists/dict; this is an error case
raise GPUdbException( "Arguments must be either a single list, multiple lists, "
"a list of lists, or contain no lists; i.e. it must not be "
"a mix of lists and non-lists; given a list with mixed "
"elements: {0}".format( args ) )
else:
# A list of lists/dicts--multiple records within a list
for record in args[0]:
encoded_record = self._record_encoding_function( record )
encoded_data.append( encoded_record )
# end for
# end inner-most if-else
else:
# A single list--a single record
encoded_record = self._record_encoding_function( *args )
encoded_data.append( encoded_record )
# end 2nd inner if-else
else:
# All arguments are either lists or dicts, so multiple records given
for col_vals in args:
encoded_record = self._record_encoding_function( col_vals )
encoded_data.append( encoded_record )
# end for
# end if-else
if not encoded_data: # No data given
raise GPUdbException( "Must provide data for at least a single record; none given." )
return encoded_data
# end __encode_data_for_insertion
def __insert_encoded_records( self, encoded_data, options ):
"""Given encoded records and some options, insert the records
into the respective table in Kinetica.
"""
# Make the insertion call-- either with the multi-head ingestor or the regular way
if self._multihead_ingestor:
# Set the multi-head ingestor's options
self._multihead_ingestor.options = options
# Call the insertion funciton
self._multihead_ingestor.insert_records( encoded_data,
is_data_encoded = True )
# Need to flush the records, per the setting
if self._flush_multi_head_ingest_per_insertion:
self._multihead_ingestor.flush( is_data_encoded = True )
else:
# Call the insert function and check the status
response = self.db.insert_records( self.qualified_name, encoded_data,
options = options,
record_type = self.record_type )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# end if-else
# end __insert_encoded_records
[docs] def insert_records( self, *args, **kwargs ):
"""Insert one or more records.
Parameters:
args
Values for all columns of a single record or multiple records.
For a single record, use either of the following syntaxes:
::
insert_records( 1, 2, 3 )
insert_records( [1, 2, 3] )
For multiple records, use either of the following syntaxes:
::
insert_records( [ [1, 2, 3], [4, 5, 6] ] )
insert_records( [1, 2, 3], [4, 5, 6] )
Also, the user can use keyword arguments to pass in values:
::
# For a record type with two integers named 'a' and 'b':
insert_records( {"a": 1, "b": 1},
{"a": 42, "b": 32} )
# Also can use a list to pass the dicts
insert_records( [ {"a": 1, "b": 1},
{"a": 42, "b": 32} ] )
Additionally, the user may provide options for the insertion
operation. For example:
::
insert_records( [1, 2, 3], [4, 5, 6],
options = {"return_record_ids": "true"} )
kwargs
Values for all columns for a single record. Mutually
exclusive with args (i.e. cannot provide both) when it
only contains data.
May contain an 'options' keyword arg which will be passed
to the database for the insertion operation.
Returns:
A :class:`.GPUdbTable` object with the insert_records()
response fields converted to attributes and stored within.
"""
# Extract any options that the user may have provided
options = kwargs.get( "options", None )
if options is not None: # if given, remove from kwargs
kwargs.pop( "options" )
else: # no option given; use an empty dict
options = {}
# Encode the data for insertion
if not args and not kwargs:
raise GPUdbException( "No data given to insert!" )
encoded_data = self.__encode_data_for_insertion( *args, **kwargs )
# self.__insert_encoded_records( encoded_data, options )
try: # if the first attempt fails, we'll check if the table
# type has been modified by any chance
self.__insert_encoded_records( encoded_data, options )
except GPUdbException as e:
self.__log_debug( "Got exception when trying to insert records: "
"{}".format( str(e) ) )
if self.__update_table_type():
# The table type indeed had been modified; retry insertion
# with the current/new type
encoded_data = self.__encode_data_for_insertion( *args, **kwargs )
self.__insert_encoded_records( encoded_data, options )
else:
raise
# end try-catch
return self
# end insert_records
[docs] def insert_records_random( self, count = None, options = {} ):
"""Generates a specified number of random records and adds them to the
given table. There is an optional parameter that allows the user to
customize the ranges of the column values. It also allows the user to
specify linear profiles for some or all columns in which case linear
values are generated rather than random ones. Only individual tables
are supported for this operation.
This operation is synchronous, meaning that a response will not be
returned until all random records are fully available.
Parameters:
count (long)
Number of records to generate.
options (dict of dicts of floats)
Optional parameter to pass in specifications for the randomness
of the values. This map is different from the *options*
parameter of most other endpoints in that it is a map of string
to map of string to doubles, while most others are maps of
string to string. In this map, the top level keys represent
which column's parameters are being specified, while the
internal keys represents which parameter is being specified.
These parameters take on different meanings depending on the
type of the column. Below follows a more detailed description
of the map: Default value is an empty dict ( {} ).
Allowed keys are:
* **seed** --
If provided, the internal random number generator will be
initialized with the given value. The minimum is 0. This
allows for the same set of random numbers to be generated
across invocation of this endpoint in case the user wants to
repeat the test. Since input parameter *options*, is a map
of maps, we need an internal map to provide the seed value.
For example, to pass 100 as the seed value through this
parameter, you need something equivalent to: 'options' =
{'seed': { 'value': 100 } }
Allowed keys are:
* **value** --
Pass the seed value here.
* **all** --
This key indicates that the specifications relayed in the
internal map are to be applied to all columns of the records.
Allowed keys are:
* **min** --
For numerical columns, the minimum of the generated values
is set to this value. Default is -99999. For point,
shape, and track semantic types, min for numeric 'x' and
'y' columns needs to be within [-180, 180] and [-90, 90],
respectively. The default minimum possible values for these
columns in such cases are -180.0 and -90.0. For the
'TIMESTAMP' column, the default minimum corresponds to Jan
1, 2010.
For string columns, the minimum length of the randomly
generated strings is set to this value (default is 0). If
both minimum and maximum are provided, minimum must be less
than or equal to max. Value needs to be within [0, 200].
If the min is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track
types, then those parameters will not be set; however, an
error will not be thrown in such a case. It is the
responsibility of the user to use the *all* parameter
judiciously.
* **max** --
For numerical columns, the maximum of the generated values
is set to this value. Default is 99999. For point, shape,
and track semantic types, max for numeric 'x' and 'y'
columns needs to be within [-180, 180] and [-90, 90],
respectively. The default minimum possible values for these
columns in such cases are 180.0 and 90.0.
For string columns, the maximum length of the randomly
generated strings is set to this value (default is 200). If
both minimum and maximum are provided, *max* must be
greater than or equal to *min*. Value needs to be within
[0, 200].
If the *max* is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track
types, then those parameters will not be set; however, an
error will not be thrown in such a case. It is the
responsibility of the user to use the *all* parameter
judiciously.
* **interval** --
If specified, generate values for all columns evenly spaced
with the given interval value. If a max value is specified
for a given column the data is randomly generated between
min and max and decimated down to the interval. If no max
is provided the data is linearly generated starting at the
minimum value (instead of generating random data). For
non-decimated string-type columns the interval value is
ignored. Instead the values are generated following the
pattern: 'attrname_creationIndex#', i.e. the column name
suffixed with an underscore and a running counter (starting
at 0). For string types with limited size (eg char4) the
prefix is dropped. No nulls will be generated for nullable
columns.
* **null_percentage** --
If specified, then generate the given percentage of the
count as nulls for all nullable columns. This option will
be ignored for non-nullable columns. The value must be
within the range [0, 1.0]. The default value is 5% (0.05).
* **cardinality** --
If specified, limit the randomly generated values to a
fixed set. Not allowed on a column with interval specified,
and is not applicable to WKT or Track-specific columns. The
value must be greater than 0. This option is disabled by
default.
* **attr_name** --
Set the following parameters for the column specified by the
key. This overrides any parameter set by *all*.
Allowed keys are:
* **min** --
For numerical columns, the minimum of the generated values
is set to this value. Default is -99999. For point,
shape, and track semantic types, min for numeric 'x' and
'y' columns needs to be within [-180, 180] and [-90, 90],
respectively. The default minimum possible values for these
columns in such cases are -180.0 and -90.0. For the
'TIMESTAMP' column, the default minimum corresponds to Jan
1, 2010.
For string columns, the minimum length of the randomly
generated strings is set to this value (default is 0). If
both minimum and maximum are provided, minimum must be less
than or equal to max. Value needs to be within [0, 200].
If the min is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track
types, then those parameters will not be set; however, an
error will not be thrown in such a case. It is the
responsibility of the user to use the *all* parameter
judiciously.
* **max** --
For numerical columns, the maximum of the generated values
is set to this value. Default is 99999. For point, shape,
and track semantic types, max for numeric 'x' and 'y'
columns needs to be within [-180, 180] and [-90, 90],
respectively. The default minimum possible values for these
columns in such cases are 180.0 and 90.0.
For string columns, the maximum length of the randomly
generated strings is set to this value (default is 200). If
both minimum and maximum are provided, *max* must be
greater than or equal to *min*. Value needs to be within
[0, 200].
If the *max* is outside the accepted ranges for strings
columns and 'x' and 'y' columns for point/shape/track
types, then those parameters will not be set; however, an
error will not be thrown in such a case. It is the
responsibility of the user to use the *all* parameter
judiciously.
* **interval** --
If specified, generate values for all columns evenly spaced
with the given interval value. If a max value is specified
for a given column the data is randomly generated between
min and max and decimated down to the interval. If no max
is provided the data is linearly generated starting at the
minimum value (instead of generating random data). For
non-decimated string-type columns the interval value is
ignored. Instead the values are generated following the
pattern: 'attrname_creationIndex#', i.e. the column name
suffixed with an underscore and a running counter (starting
at 0). For string types with limited size (eg char4) the
prefix is dropped. No nulls will be generated for nullable
columns.
* **null_percentage** --
If specified and if this column is nullable, then generate
the given percentage of the count as nulls. This option
will result in an error if the column is not nullable. The
value must be within the range [0, 1.0]. The default value
is 5% (0.05).
* **cardinality** --
If specified, limit the randomly generated values to a
fixed set. Not allowed on a column with interval specified,
and is not applicable to WKT or Track-specific columns. The
value must be greater than 0. This option is disabled by
default.
* **track_length** --
This key-map pair is only valid for track type data sets (an
error is thrown otherwise). No nulls would be generated for
nullable columns.
Allowed keys are:
* **min** --
Minimum possible length for generated series; default is
100 records per series. Must be an integral value within
the range [1, 500]. If both min and max are specified, min
must be less than or equal to max.
* **max** --
Maximum possible length for generated series; default is
500 records per series. Must be an integral value within
the range [1, 500]. If both min and max are specified, max
must be greater than or equal to min.
Returns:
A :class:`GPUdbTable` object with the insert_records() response
fields converted to attributes (and stored within) with the
following entries:
table_name (str)
Value of input parameter *table_name*.
count (long)
Value of input parameter *count*.
"""
response = self.db.insert_records_random( self.qualified_name, count = count,
options = options )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# We can
return self
# end insert_records_random
[docs] def get_records_by_key( self, key_values, expression = "", options = None ):
"""Fetches the record(s) from the appropriate worker rank directly
(or, if multi-head record retrieval is not set up, then from the
head node) that map to the given shard key.
Parameters:
key_values (list or dict)
Values for the sharding columns of the record to fetch either in
a list (then it is assumed to be in the order of the sharding
keys in the record type) or a dict. Must not have any missing
sharding/primary column value or any extra column values.
expression (str)
Optional parameter. If given, it is passed to /get/records as
a filter expression.
options (dict of str to str or None)
Any /get/records options to be passed onto the GPUdb server. Optional
parameter.
Returns:
The decoded records.
"""
if not self._multihead_retriever:
raise GPUdbException( "Record retrieval by sharding/primary keys "
"is not set up for this table." )
return self._multihead_retriever.get_records_by_key( key_values, expression, options )
# end get_records_by_key
[docs] def get_records( self, offset = 0, limit = -9999,
encoding = 'binary', options = {},
force_primitive_return_types = True ):
"""Retrieves records from a given table, optionally filtered by an
expression and/or sorted by a column. This operation can be performed
on tables, views, or on homogeneous collections (collections containing
tables of all the same type). Records can be returned encoded as binary
or json.
This operation supports paging through the data via the input parameter
*offset* and input parameter *limit* parameters. Note that when paging
through a table, if the table (or the underlying table in case of a
view) is updated (records are inserted, deleted or modified) the
records retrieved may differ between calls based on the updates
applied.
Decodes and returns the fetched records.
Parameters:
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results).
Default value is 0. The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned. Or END_OF_SET (-9999) to indicate that the max
number of results should be returned. Default value is -9999.
encoding (str)
Specifies the encoding for returned records. Default value is
'binary'.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str)
Default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **fast_index_lookup** --
Indicates if indexes should be used to perform the lookup for
a given expression if possible. Only applicable if there is
no sorting, the expression contains only equivalence
comparisons based on existing tables indexes and the range of
requested values is from [0 to END_OF_SET]. The default value
is true.
* **sort_by** --
Optional column that the data should be sorted by. Empty by
default (i.e. no sorting is applied).
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending. If sort_order is provided, sort_by
has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A list of :class:`Record` objects containing the record values.
"""
response = self.db.get_records_and_decode( self.qualified_name, offset, limit, encoding, options,
record_type = self.record_type,
force_primitive_return_types =
force_primitive_return_types )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# Double check that the type ID is the same
if (response.type_name != self._type_id):
# The table's type seems to have changed; update it!
self.__update_table_type()
# And re-submit the /get/records call
response = self.db.get_records_and_decode( self.qualified_name, offset, limit, encoding, options,
record_type = self.record_type,
force_primitive_return_types =
force_primitive_return_types )
# end if
# Return just the records; disregard the extra info within the response
return response.records
# end get_records
[docs] def get_records_by_column( self, column_names, offset = 0, limit = -9999,
encoding = 'binary', options = {},
print_data = False,
force_primitive_return_types = True, get_column_major = True ):
"""For a given table, retrieves the values of the given columns within a
given range. It returns maps of column name to the vector of values for
each supported data type (double, float, long, int and string). This
operation supports pagination feature, i.e. values that are retrieved
are those associated with the indices between the start (offset) and
end value (offset + limit) parameters (inclusive). If there are
num_points values in the table then each of the indices between 0 and
num_points-1 retrieves a unique value.
Note that when using the pagination feature, if the table (or the
underlying table in case of a view) is updated (records are inserted,
deleted or modified) the records or values retrieved may differ between
calls (discontiguous or overlap) based on the type of the update.
The response is returned as a dynamic schema. For details see: `dynamic
schemas documentation <../../../../api/#dynamic-schemas>`_.
Parameters:
column_names (list of str)
The list of column values to retrieve.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
minimum allowed value is 0. The maximum allowed value is
MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned (if not provided the default is -9999), or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be returned.
encoding (str)
Specifies the encoding for returned records; either 'binary' or
'json'. Default value is 'binary'.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str)
Default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **sort_by** --
Optional column that the data should be sorted by. Empty by
default (i.e. no sorting is applied).
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending. Default is 'ascending'. If
sort_order is provided, sort_by has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
* **order_by** --
Comma-separated list of the columns to be sorted by; e.g.
'timestamp asc, x desc'. The columns specified must be
present in input parameter *column_names*. If any alias is
given for any column name, the alias must be used, rather
than the original column name.
print_data (bool)
If True, print the fetched data to the console in a tabular
format if the data is being returned in the column-major format.
Default is False.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Decodes the fetched records and saves them in the response class in an
attribute called data.
Returns:
A dict of column name to column values for column-major data, or
a list of :class:`Record` objects for row-major data.
"""
# Issue the /get/records/bycolumn query
response = self.db.get_records_by_column_and_decode( self.qualified_name, column_names,
offset, limit, encoding, options,
force_primitive_return_types =
force_primitive_return_types,
get_column_major =
get_column_major )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# Get the records out
data = response[ "records" ]
# Print the date, if desired
if print_data and get_column_major:
print( tabulate( data , headers = 'keys', tablefmt = 'psql') )
# Else, return the decoded records
return data
# end get_records_by_column
[docs] def get_records_by_series( self, world_table_name = None,
offset = 0, limit = 250, encoding = 'binary',
options = {},
force_primitive_return_types = True ):
"""Retrieves the complete series/track records from the given input
parameter *world_table_name* based on the partial track information
contained in the input parameter *table_name*.
This operation supports paging through the data via the input parameter
*offset* and input parameter *limit* parameters.
In contrast to :meth:`.get_records` this returns records grouped by
series/track. So if input parameter *offset* is 0 and input parameter
*limit* is 5 this operation would return the first 5 series/tracks in
input parameter *table_name*. Each series/track will be returned sorted
by their TIMESTAMP column.
Parameters:
world_table_name (str)
Name of the table containing the complete series/track
information to be returned for the tracks present in the input
parameter *table_name*. Typically this is used when retrieving
series/tracks from a view (which contains partial
series/tracks) but the user wants to retrieve the entire
original series/tracks. Can be blank.
offset (int)
A positive integer indicating the number of initial
series/tracks to skip (useful for paging through the results).
Default value is 0. The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (int)
A positive integer indicating the maximum number of
series/tracks to be returned. Or END_OF_SET (-9999) to indicate
that the max number of results should be returned. Default
value is 250.
encoding (str)
Specifies the encoding for returned records; either 'binary' or
'json'. Default value is 'binary'.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str)
Optional parameters. Default value is an empty dict ( {} ).
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A list of list of :class:`Record` objects containing the record values.
Each external record corresponds to a single track (or series).
"""
# Issue the /get/records/byseries query
response = self.db.get_records_by_series_and_decode( self.qualified_name,
world_table_name = world_table_name,
offset = offset, limit = limit,
encoding = encoding,
options = options,
force_primitive_return_types =
force_primitive_return_types )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# Return just the records; disregard the extra info within the response
return response.records
# end get_records_by_series
[docs] def get_records_from_collection( self, offset = 0, limit = -9999,
encoding = 'binary', options = {},
force_primitive_return_types = True ):
"""Retrieves records from a collection. The operation can optionally
return the record IDs which can be used in certain queries such as
:meth:`.delete_records`.
This operation supports paging through the data via the input parameter
*offset* and input parameter *limit* parameters.
Note that when using the Java API, it is not possible to retrieve
records from join tables using this operation.
Parameters:
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results).
Default value is 0. The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or END_OF_SET (-9999) to indicate that the max
number of results should be returned. Default value is -9999.
encoding (str)
Specifies the encoding for returned records; either 'binary' or
'json'. Default value is 'binary'.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str)
Default value is an empty dict ( {} ).
Allowed keys are:
* **return_record_ids** --
If 'true' then return the internal record ID along with each
returned record. Default is 'false'.
Allowed values are:
* true
* false
The default value is 'false'.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A list of :class:`Record` objects containing the record values.
"""
# Issue the /get/records/fromcollection query
response = self.db.get_records_from_collection_and_decode( self.qualified_name,
offset, limit,
encoding, options,
force_primitive_return_types =
force_primitive_return_types )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# Return just the records; disregard the extra info within the response
return response.records
# end get_records_from_collection
[docs] def get_geo_json( self, offset = 0, limit = -9999,
options = {}, force_primitive_return_types = True ):
"""Retrieves records as a GeoJSON from a given table, optionally filtered by an
expression and/or sorted by a column. This operation can be performed
on tables, views, or on homogeneous collections (collections containing
tables of all the same type). Records can be returned encoded as binary
or json.
This operation supports paging through the data via the input parameter
*offset* and input parameter *limit* parameters. Note that when paging
through a table, if the table (or the underlying table in case of a
view) is updated (records are inserted, deleted or modified) the
records retrieved may differ between calls based on the updates
applied.
Decodes and returns the fetched records.
Parameters:
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results).
Default value is 0. The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned. Or END_OF_SET (-9999) to indicate that the max
number of results should be returned. Default value is -9999.
encoding (str)
Specifies the encoding for returned records. Default value is
'binary'.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str)
Default value is an empty dict ( {} ).
Allowed keys are:
* **expression** --
Optional filter expression to apply to the table.
* **fast_index_lookup** --
Indicates if indexes should be used to perform the lookup for
a given expression if possible. Only applicable if there is
no sorting, the expression contains only equivalence
comparisons based on existing tables indexes and the range of
requested values is from [0 to END_OF_SET]. The default value
is true.
* **sort_by** --
Optional column that the data should be sorted by. Empty by
default (i.e. no sorting is applied).
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending. If sort_order is provided, sort_by
has to be provided.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
Returns:
A GeoJSON object (a dict) containing the record values.
"""
response = self.db.get_records_and_decode( self.qualified_name, offset, limit, "geojson", options,
record_type = self.record_type,
force_primitive_return_types =
force_primitive_return_types )
if not _Util.is_ok( response ):
raise GPUdbException( _Util.get_error_msg( response ) )
# Return just the records; disregard the extra info within the response
return response.records[0]
# end get_geo_json
[docs] def to_df(self, **kwargs):
"""Converts the table data to a Pandas Data Frame.
Parameters:
batch_size (int)
The number of records to retrieve at a time from the database
Returns:
A Pandas Data Frame containing the table data.
"""
from . import gpudb_dataframe
return gpudb_dataframe.DataFrameUtils.table_to_df(self.db, self.qualified_name, **kwargs)
# end to_df
[docs] @classmethod
def from_df(cls,
df,
db,
table_name,
column_types = {},
clear_table = False,
create_table = True,
load_data = True,
show_progress = False,
batch_size = 5000,
**kwargs):
""" Load a Data Frame into a table; optionally dropping any existing table,
creating it if it doesn't exist, and loading data into it; and then returning a
GPUdbTable reference to the table.
Parameters:
df (pd.DataFrame)
The Pandas Data Frame to load into a table
db (GPUdb)
GPUdb instance
table_name (str)
Name of the target Kinetica table for the Data Frame loading
column_types (dict)
Optional Kinetica column properties to apply to the column type definitions inferred
from the Data Frame; map of column name to a list of column properties for that
column, excluding the inferred base type. For example::
{ "middle_name": [ 'char64', 'nullable' ], "state": [ 'char2', 'dict' ] }
clear_table (bool)
Whether to drop an existing table of the same name or not before creating this one.
create_table (bool)
Whether to create the table if it doesn't exist or not.
load_data (bool)
Whether to load data into the target table or not.
show_progress (bool)
Whether to show progress of the operation on the console.
batch_size (int)
The number of records at a time to load into the target table.
Raises:
GPUdbException:
Returns:
GPUdbTable: a GPUdbTable instance created from the Data Frame passed in
"""
from . import gpudb_dataframe
return gpudb_dataframe.DataFrameUtils.df_to_table(
df,
db,
table_name,
column_types,
clear_table,
create_table,
load_data,
show_progress,
batch_size,
**kwargs)
# end from_df
[docs] def type_as_df(self):
""" Return table columns as a dataframe for inspection. """
from . import gpudb_dataframe
return gpudb_dataframe.DataFrameUtils.table_type_as_df(self)
# end type_as_df
[docs] def insert_df(self, df, **kwargs):
""" Insert into a GPUdbTable from a dataframe. """
from . import gpudb_dataframe
return gpudb_dataframe.DataFrameUtils.df_insert_into_table(df, self, **kwargs)
# end insert_df
[docs] @staticmethod
def create_join_table( db, join_table_name = None, table_names = None,
column_names = None, expressions = [], options = {}
):
"""Creates a table that is the result of a SQL JOIN.
For join details and examples see: `Joins
<../../../../concepts/joins/>`__. For limitations, see `Join
Limitations and Cautions
<../../../../concepts/joins/#limitations-cautions>`__.
Parameters:
join_table_name (str)
Name of the join table to be created, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
table_names (list of str)
The list of table names composing the join, each in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
Corresponds to a SQL statement FROM clause. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
column_names (list of str)
List of member table columns or column expressions to be
included in the join. Columns can be prefixed with
'table_id.column_name', where 'table_id' is the table name or
alias. Columns can be aliased via the syntax 'column_name as
alias'. Wild cards '*' can be used to include all columns
across member tables or 'table_id.*' for all of a single
table's columns. Columns and column expressions composing the
join must be uniquely named or aliased--therefore, the '*' wild
card cannot be used if column names aren't unique across all
tables. The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
expressions (list of str)
An optional list of expressions to combine and filter the
joined tables. Corresponds to a SQL statement WHERE clause.
For details see: `expressions
<../../../../concepts/expressions/>`__. The default value is
an empty list ( [] ). The user can provide a single element
(which will be automatically promoted to a list internally) or
a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*join_table_name*. This is always allowed even if the caller
does not have permission to create tables. The generated name
is returned in *qualified_join_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
join as part of input parameter *join_table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the join. If the schema
is non-existent, it will be automatically created. The
default value is ''.
* **max_query_dimensions** --
No longer used.
* **optimize_lookups** --
Use more memory to speed up the joining of tables.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the join
table specified in input parameter *join_table_name*.
* **view_id** --
view this projection is part of. The default value is ''.
* **no_count** --
Return a count of 0 for the join table for logging and for
:meth:`GPUdb.show_table`; optimization needed for large
overlapped equi-join stencils. The default value is 'false'.
* **chunk_size** --
Maximum number of records per joined-chunk for this table.
Defaults to the gpudb.conf file chunk size
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
# Create a random table name if none is given
join_table_name = join_table_name if join_table_name else GPUdbTable.random_name()
# Normalize the input table names
table_names = table_names if isinstance( table_names, list ) else [ table_names ]
table_names = [ t.qualified_name if isinstance(t, GPUdbTable) else t for t in table_names ]
# The given DB handle must be a GPUdb instance
if not isinstance( db, GPUdb ):
raise GPUdbException( "Argument 'db' must be a GPUdb object; "
"given %s" % str( type( db ) ) )
response = db.create_join_table( join_table_name, table_names,
column_names, expressions, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_join_table_name" in response.info:
join_table_name = response.info[ "qualified_join_table_name" ]
return GPUdbTable( None, join_table_name, db = db )
# end create_join_table
[docs] @staticmethod
def create_union( db, table_name = None, table_names = None,
input_column_names = None, output_column_names = None,
options = {} ):
"""Merges data from one or more tables with comparable data types into a
new table.
The following merges are supported:
UNION (DISTINCT/ALL) - For data set union details and examples, see
`Union <../../../../concepts/unions/>`__. For limitations, see `Union
Limitations and Cautions
<../../../../concepts/unions/#limitations-and-cautions>`__.
INTERSECT (DISTINCT/ALL) - For data set intersection details and
examples, see `Intersect <../../../../concepts/intersect/>`__. For
limitations, see `Intersect Limitations
<../../../../concepts/intersect/#limitations>`__.
EXCEPT (DISTINCT/ALL) - For data set subtraction details and examples,
see `Except <../../../../concepts/except/>`__. For limitations, see
`Except Limitations <../../../../concepts/except/#limitations>`__.
MERGE VIEWS - For a given set of `filtered views
<../../../../concepts/filtered_views/>`__ on a single table, creates a
single filtered view containing all of the unique records across all of
the given filtered data sets.
Non-charN 'string' and 'bytes' column types cannot be merged, nor can
columns marked as `store-only
<../../../../concepts/types/#data-handling>`__.
Parameters:
table_name (str)
Name of the table to be created, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
table_names (list of str)
The list of table names to merge, in [schema_name.]table_name
format, using standard `name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
contain the names of one or more existing tables. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
input_column_names (list of lists of str)
The list of columns from each of the corresponding input
tables. The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
output_column_names (list of str)
The list of names of the columns to be stored in the output
table. The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*table_name*. If *persist* is *false* (or unspecified), then
this is always allowed even if the caller does not have
permission to create tables. The generated name is returned
in *qualified_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
projection as part of input parameter *table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of the schema for the output table. If
the schema provided is non-existent, it will be automatically
created. The default value is ''.
* **mode** --
If *merge_views*, then this operation will merge the provided
views. All input parameter *table_names* must be views from
the same underlying base table.
Allowed values are:
* **union_all** --
Retains all rows from the specified tables.
* **union** --
Retains all unique rows from the specified tables (synonym
for *union_distinct*).
* **union_distinct** --
Retains all unique rows from the specified tables.
* **except** --
Retains all unique rows from the first table that do not
appear in the second table (only works on 2 tables).
* **except_all** --
Retains all rows(including duplicates) from the first table
that do not appear in the second table (only works on 2
tables).
* **intersect** --
Retains all unique rows that appear in both of the
specified tables (only works on 2 tables).
* **intersect_all** --
Retains all rows(including duplicates) that appear in both
of the specified tables (only works on 2 tables).
* **merge_views** --
Merge two or more views (or views of views) of the same
base data set into a new view. If this mode is selected
input parameter *input_column_names* AND input parameter
*output_column_names* must be empty. The resulting view
would match the results of a SQL OR operation, e.g., if
filter 1 creates a view using the expression 'x = 20' and
filter 2 creates a view using the expression 'x <= 10',
then the merge views operation creates a new view using the
expression 'x = 20 OR x <= 10'.
The default value is 'union_all'.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
output table.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the output table. The columns specified must be present in
input parameter *output_column_names*.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the output
table specified in input parameter *table_name*.
* **persist** --
If *true*, then the output table specified in input parameter
*table_name* will be persisted and will not expire unless a
*ttl* is specified. If *false*, then the output table will
be an in-memory table and will expire unless a *ttl* is
specified otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **view_id** --
ID of view of which this output table is a member. The
default value is ''.
* **force_replicated** --
If *true*, then the output table specified in input parameter
*table_name* will be replicated even if the source tables are
not.
Allowed values are:
* true
* false
The default value is 'false'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
# Create a random table name if none is given
table_name = table_name if table_name else GPUdbTable.random_name()
# Normalize the input table names
table_names = table_names if isinstance( table_names, list ) else [ table_names ]
table_names = [ t.qualified_name if isinstance(t, GPUdbTable) else t for t in table_names ]
# The given DB handle must be a GPUdb instance
if not isinstance( db, GPUdb ):
raise GPUdbException( "Argument 'db' must be a GPUdb object; "
"given %s" % str( type( db ) ) )
response = db.create_union( table_name, table_names, input_column_names,
output_column_names, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_table_name" in response.info:
table_name = response.info[ "qualified_table_name" ]
return GPUdbTable( None, table_name, db = db )
# end create_union
[docs] @staticmethod
def merge_records( db, table_name = None, source_table_names = None,
field_maps = None, options = {} ):
"""Create a new empty result table (specified by input parameter
*table_name*),
and insert all records from source tables
(specified by input parameter *source_table_names*) based on the field
mapping
information (specified by input parameter *field_maps*).
For merge records details and examples, see
`Merge Records <../../../../concepts/merge_records/>`__. For
limitations, see
`Merge Records Limitations and Cautions
<../../../../concepts/merge_records/#limitations-and-cautions>`__.
The field map (specified by input parameter *field_maps*) holds the
user-specified maps
of target table column names to source table columns. The array of
input parameter *field_maps* must match one-to-one with the input
parameter *source_table_names*,
e.g., there's a map present in input parameter *field_maps* for each
table listed in
input parameter *source_table_names*.
Parameters:
table_name (str)
The name of the new result table for the records to be merged
into, in [schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
NOT be an existing table.
source_table_names (list of str)
The list of names of source tables to get the records from,
each in [schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be existing table names. The user can provide a single
element (which will be automatically promoted to a list
internally) or a list.
field_maps (list of dicts of str to str)
Contains a list of source/target column mappings, one mapping
for each source table listed in input parameter
*source_table_names* being merged into the target table
specified by input parameter *table_name*. Each mapping
contains the target column names (as keys) that the data in the
mapped source columns or column `expressions
<../../../../concepts/expressions/>`__ (as values) will be
merged into. All of the source columns being merged into a
given target column must match in type, as that type will
determine the type of the new target column. The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*table_name*. If *persist* is *false*, then this is always
allowed even if the caller does not have permission to create
tables. The generated name is returned in
*qualified_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
merged table as part of input parameter *table_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created merged
table specified by input parameter *table_name*.
* **is_replicated** --
Indicates the `distribution scheme
<../../../../concepts/tables/#distribution>`__ for the data
of the merged table specified in input parameter
*table_name*. If true, the table will be `replicated
<../../../../concepts/tables/#replication>`__. If false, the
table will be `randomly sharded
<../../../../concepts/tables/#random-sharding>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the merged
table specified in input parameter *table_name*.
* **persist** --
If *true*, then the table specified in input parameter
*table_name* will be persisted and will not expire unless a
*ttl* is specified. If *false*, then the table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'true'.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
merged table specified in input parameter *table_name*.
* **view_id** --
view this result table is part of. The default value is ''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
# Create a random table name if none is given
table_name = table_name if table_name else GPUdbTable.random_name()
# Normalize the input table names
source_table_names = source_table_names if isinstance( source_table_names, list ) else [ source_table_names ]
source_table_names = [ t.qualified_name if isinstance(t, GPUdbTable) else t for t in source_table_names ]
# The given DB handle must be a GPUdb instance
if not isinstance( db, GPUdb ):
raise GPUdbException( "Argument 'db' must be a GPUdb object; "
"given %s" % str( type( db ) ) )
response = db.merge_records( table_name, source_table_names, field_maps,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_table_name" in response.info:
table_name = response.info[ "qualified_table_name" ]
return GPUdbTable( None, table_name, db = db )
# end merge_records
[docs] def aggregate_convex_hull( self, x_column_name = None, y_column_name = None,
options = {} ):
"""Calculates and returns the convex hull for the values in a table
specified by input parameter *table_name*.
Parameters:
x_column_name (str)
Name of the column containing the x coordinates of the points
for the operation being performed.
y_column_name (str)
Name of the column containing the y coordinates of the points
for the operation being performed.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
x_vector (list of floats)
Array of x coordinates of the resulting convex set.
y_vector (list of floats)
Array of y coordinates of the resulting convex set.
count (int)
Count of the number of points in the convex set.
is_valid (bool)
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_convex_hull( self.qualified_name,
x_column_name, y_column_name,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end aggregate_convex_hull
[docs] def aggregate_group_by( self, column_names = None, offset = 0, limit =
-9999, encoding = 'binary', options = {},
force_primitive_return_types = True,
get_column_major = True ):
"""Calculates unique combinations (groups) of values for the given columns
in a given table or view and computes aggregates on each unique
combination. This is somewhat analogous to an SQL-style SELECT...GROUP
BY.
For aggregation details and examples, see `Aggregation
<../../../../concepts/aggregation/>`__. For limitations, see
`Aggregation Limitations
<../../../../concepts/aggregation/#limitations>`__.
Any column(s) can be grouped on, and all column types except
unrestricted-length strings may be used for computing applicable
aggregates; columns marked as `store-only
<../../../../concepts/types/#data-handling>`__ are unable to be used in
grouping or aggregation.
The results can be paged via the input parameter *offset* and input
parameter *limit* parameters. For example, to get 10 groups with the
largest counts the inputs would be: limit=10,
options={"sort_order":"descending", "sort_by":"value"}.
Input parameter *options* can be used to customize behavior of this
call e.g. filtering or sorting the results.
To group by columns 'x' and 'y' and compute the number of objects
within each group, use: column_names=['x','y','count(*)'].
To also compute the sum of 'z' over each group, use:
column_names=['x','y','count(*)','sum(z)'].
Available `aggregation functions
<../../../../concepts/expressions/#aggregate-expressions>`__ are:
count(*), sum, min, max, avg, mean, stddev, stddev_pop, stddev_samp,
var, var_pop, var_samp, arg_min, arg_max and count_distinct.
Available grouping functions are `Rollup
<../../../../concepts/rollup/>`__, `Cube
<../../../../concepts/cube/>`__, and `Grouping Sets
<../../../../concepts/grouping_sets/>`__
This service also provides support for `Pivot
<../../../../concepts/pivot/>`__ operations.
Filtering on aggregates is supported via expressions using `aggregation
functions <../../../../concepts/expressions/#aggregate-expressions>`__
supplied to *having*.
The response is returned as a dynamic schema. For details see: `dynamic
schemas documentation <../../../../api/concepts/#dynamic-schemas>`__.
If a *result_table* name is specified in the input parameter *options*,
the results are stored in a new table with that name--no results are
returned in the response. Both the table name and resulting column
names must adhere to `standard naming conventions
<../../../../concepts/tables/#table>`__; column/aggregation expressions
will need to be aliased. If the source table's `shard key
<../../../../concepts/tables/#shard-keys>`__ is used as the grouping
column(s) and all result records are selected (input parameter *offset*
is 0 and input parameter *limit* is -9999), the result table will be
sharded, in all other cases it will be replicated. Sorting will
properly function only if the result table is replicated or if there is
only one processing node and should not be relied upon in other cases.
Not available when any of the values of input parameter *column_names*
is an unrestricted-length string.
Parameters:
column_names (list of str)
List of one or more column names, expressions, and aggregate
expressions.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
provided is non-existent, it will be automatically created.
* **expression** --
Filter expression to apply to the table prior to computing
the aggregate group by.
* **having** --
Filter expression to apply to the aggregated results.
* **sort_order** --
String indicating how the returned values should be sorted -
ascending or descending.
Allowed values are:
* **ascending** --
Indicates that the returned values should be sorted in
ascending order.
* **descending** --
Indicates that the returned values should be sorted in
descending order.
The default value is 'ascending'.
* **sort_by** --
String determining how the results are sorted.
Allowed values are:
* **key** --
Indicates that the returned values should be sorted by key,
which corresponds to the grouping columns. If you have
multiple grouping columns (and are sorting by key), it will
first sort the first grouping column, then the second
grouping column, etc.
* **value** --
Indicates that the returned values should be sorted by
value, which corresponds to the aggregates. If you have
multiple aggregates (and are sorting by value), it will
first sort by the first aggregate, then the second
aggregate, etc.
The default value is 'value'.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
Column names (group-by and aggregate fields) need to be given
aliases e.g. ["FChar256 as fchar256", "sum(FDouble) as sfd"].
If present, no results are returned in the response. This
option is not available if one of the grouping attributes is
an unrestricted string (i.e.; not charN) type.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_generate_pk** --
If *true* then set a primary key for the result table. Must
be used in combination with the *result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the result table. Must be used in combination with the
*result_table* option.
* **view_id** --
ID of view of which the result table will be a member. The
default value is ''.
* **pivot** --
pivot column
* **pivot_values** --
The value list provided will become the column headers in the
output. Should be the values from the pivot_column.
* **grouping_sets** --
Customize the grouping attribute sets to compute the
aggregates. These sets can include ROLLUP or CUBE operartors.
The attribute sets should be enclosed in paranthesis and can
include composite attributes. All attributes specified in the
grouping sets must present in the groupby attributes.
* **rollup** --
This option is used to specify the multilevel aggregates.
* **cube** --
This option is used to specify the multidimensional
aggregates.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A read-only GPUdbTable object if input options has "result_table";
otherwise the response from the server, which is a dict containing
the following entries--
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
data (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_group_by_and_decode( self.qualified_name,
column_names, offset,
limit, encoding,
options,
force_primitive_return_types=
force_primitive_return_types,
get_column_major =
get_column_major )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
response["data"] = response["records"]
if "qualified_result_table_name" in response.info:
qualified_result_table_name = response.info[ "qualified_result_table_name" ]
else:
qualified_result_table_name = None
if qualified_result_table_name:
# Create a read-only table for the result table
return self.create_view( qualified_result_table_name, response[ "total_number_of_records" ] )
return response
# end aggregate_group_by
[docs] def aggregate_histogram( self, column_name = None, start = None, end = None,
interval = None, options = {} ):
"""Performs a histogram calculation given a table, a column, and an
interval function. The input parameter *interval* is used to produce
bins of that size
and the result, computed over the records falling within each bin, is
returned.
For each bin, the start value is inclusive, but the end value is
exclusive--except for the very last bin for which the end value is also
inclusive. The value returned for each bin is the number of records in
it,
except when a column name is provided as a
*value_column*. In this latter case the sum of the
values corresponding to the *value_column* is used as the
result instead. The total number of bins requested cannot exceed
10,000.
NOTE: The Kinetica instance being accessed must be running a CUDA
(GPU-based)
build to service a request that specifies a *value_column*.
Parameters:
column_name (str)
Name of a column or an expression of one or more column names
over which the histogram will be calculated.
start (float)
Lower end value of the histogram interval, inclusive.
end (float)
Upper end value of the histogram interval, inclusive.
interval (float)
The size of each bin within the start and end parameters.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **value_column** --
The name of the column to use when calculating the bin values
(values are summed). The column must be a numerical type
(int, double, long, float).
Returns:
The response from the server which is a dict containing the
following entries--
counts (list of floats)
The array of calculated values that represents the histogram
data points.
start (float)
Value of input parameter *start*.
end (float)
Value of input parameter *end*.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_histogram( self.qualified_name,
column_name, start, end,
interval, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end aggregate_histogram
[docs] def aggregate_k_means( self, column_names = None, k = None, tolerance =
None, options = {} ):
"""This endpoint runs the k-means algorithm - a heuristic algorithm
that attempts to do k-means clustering. An ideal k-means clustering
algorithm
selects k points such that the sum of the mean squared distances of
each member
of the set to the nearest of the k points is minimized. The k-means
algorithm
however does not necessarily produce such an ideal cluster. It begins
with a
randomly selected set of k points and then refines the location of the
points
iteratively and settles to a local minimum. Various parameters and
options are
provided to control the heuristic search.
NOTE: The Kinetica instance being accessed must be running a CUDA
(GPU-based)
build to service this request.
Parameters:
column_names (list of str)
List of column names on which the operation would be performed.
If n columns are provided then each of the k result points will
have n dimensions corresponding to the n columns. The user
can provide a single element (which will be automatically
promoted to a list internally) or a list.
k (int)
The number of mean points to be determined by the algorithm.
tolerance (float)
Stop iterating when the distances between successive points is
less than the given tolerance.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **whiten** --
When set to 1 each of the columns is first normalized by its
stdv - default is not to whiten.
* **max_iters** --
Number of times to try to hit the tolerance limit before
giving up - default is 10.
* **num_tries** --
Number of times to run the k-means algorithm with a different
randomly selected starting points - helps avoid local
minimum. Default is 1.
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
this option is specified, the results are not returned in the
response.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
Returns:
A read-only GPUdbTable object if input options has "result_table";
otherwise the response from the server, which is a dict containing
the following entries--
means (list of lists of floats)
The k-mean values found.
counts (list of longs)
The number of elements in the cluster closest the corresponding
k-means values.
rms_dists (list of floats)
The root mean squared distance of the elements in the cluster
for each of the k-means values.
count (long)
The total count of all the clusters - will be the size of the
input table.
rms_dist (float)
The sum of all the rms_dists - the value the k-means algorithm
is attempting to minimize.
tolerance (float)
The distance between the last two iterations of the algorithm
before it quit.
num_iters (int)
The number of iterations the algorithm executed before it quit.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the result table (i.e. including
the schema) used to store the results.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_k_means( self.qualified_name, column_names,
k, tolerance, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
if "qualified_result_table_name" in response.info:
qualified_result_table_name = response.info[ "qualified_result_table_name" ]
else:
qualified_result_table_name = None
if qualified_result_table_name:
# Create a read-only table for the result table
return self.create_view( qualified_result_table_name )
return response
# end aggregate_k_means
[docs] def aggregate_min_max( self, column_name = None, options = {} ):
"""Calculates and returns the minimum and maximum values of a particular
column in a table.
Parameters:
column_name (str)
Name of a column or an expression of one or more column on
which the min-max will be calculated.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
min (float)
Minimum value of the input parameter *column_name*.
max (float)
Maximum value of the input parameter *column_name*.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_min_max( self.qualified_name, column_name,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end aggregate_min_max
[docs] def aggregate_min_max_geometry( self, column_name = None, options = {} ):
"""Calculates and returns the minimum and maximum x- and y-coordinates
of a particular geospatial geometry column in a table.
Parameters:
column_name (str)
Name of a geospatial geometry column on which the min-max will
be calculated.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
min_x (float)
Minimum x-coordinate value of the input parameter
*column_name*.
max_x (float)
Maximum x-coordinate value of the input parameter
*column_name*.
min_y (float)
Minimum y-coordinate value of the input parameter
*column_name*.
max_y (float)
Maximum y-coordinate value of the input parameter
*column_name*.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_min_max_geometry( self.qualified_name,
column_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end aggregate_min_max_geometry
[docs] def aggregate_statistics( self, column_name = None, stats = None, options =
{} ):
"""Calculates the requested statistics of the given column(s) in a
given table.
The available statistics are:
*count* (number of total objects),
*mean*,
*stdv* (standard deviation),
*variance*,
*skew*,
*kurtosis*,
*sum*,
*min*,
*max*,
*weighted_average*,
*cardinality* (unique count),
*estimated_cardinality*,
*percentile*, and
*percentile_rank*.
Estimated cardinality is calculated by using the hyperloglog
approximation
technique.
Percentiles and percentile ranks are approximate and are calculated
using the
t-digest algorithm. They must include the desired
*percentile*/*percentile_rank*.
To compute multiple percentiles each value must be specified separately
(i.e.
'percentile(75.0),percentile(99.0),percentile_rank(1234.56),percentile_rank(-5)').
A second, comma-separated value can be added to the
*percentile* statistic to calculate percentile
resolution, e.g., a 50th percentile with 200 resolution would be
'percentile(50,200)'.
The weighted average statistic requires a weight column to be specified
in
*weight_column_name*. The weighted average is then
defined as the sum of the products of input parameter *column_name*
times the
*weight_column_name* values divided by the sum of the
*weight_column_name* values.
Additional columns can be used in the calculation of statistics via
*additional_column_names*. Values in these columns will
be included in the overall aggregate calculation--individual aggregates
will not
be calculated per additional column. For instance, requesting the
*count* & *mean* of
input parameter *column_name* x and *additional_column_names*
y & z, where x holds the numbers 1-10, y holds 11-20, and z holds
21-30, would
return the total number of x, y, & z values (30), and the single
average value
across all x, y, & z values (15.5).
The response includes a list of key/value pairs of each statistic
requested and
its corresponding value.
Parameters:
column_name (str)
Name of the primary column for which the statistics are to be
calculated.
stats (str)
Comma separated list of the statistics to calculate, e.g.
"sum,mean".
Allowed values are:
* **count** --
Number of objects (independent of the given column(s)).
* **mean** --
Arithmetic mean (average), equivalent to sum/count.
* **stdv** --
Sample standard deviation (denominator is count-1).
* **variance** --
Unbiased sample variance (denominator is count-1).
* **skew** --
Skewness (third standardized moment).
* **kurtosis** --
Kurtosis (fourth standardized moment).
* **sum** --
Sum of all values in the column(s).
* **min** --
Minimum value of the column(s).
* **max** --
Maximum value of the column(s).
* **weighted_average** --
Weighted arithmetic mean (using the option
*weight_column_name* as the weighting column).
* **cardinality** --
Number of unique values in the column(s).
* **estimated_cardinality** --
Estimate (via hyperloglog technique) of the number of unique
values in the column(s).
* **percentile** --
Estimate (via t-digest) of the given percentile of the
column(s) (percentile(50.0) will be an approximation of the
median). Add a second, comma-separated value to calculate
percentile resolution, e.g., 'percentile(75,150)'
* **percentile_rank** --
Estimate (via t-digest) of the percentile rank of the given
value in the column(s) (if the given value is the median of
the column(s), percentile_rank(<median>) will return
approximately 50.0).
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **additional_column_names** --
A list of comma separated column names over which statistics
can be accumulated along with the primary column. All
columns listed and input parameter *column_name* must be of
the same type. Must not include the column specified in
input parameter *column_name* and no column can be listed
twice.
* **weight_column_name** --
Name of column used as weighting attribute for the weighted
average statistic.
Returns:
The response from the server which is a dict containing the
following entries--
stats (dict of str to floats)
(statistic name, double value) pairs of the requested
statistics, including the total count by default.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_statistics( self.qualified_name,
column_name, stats, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end aggregate_statistics
[docs] def aggregate_statistics_by_range( self, select_expression = '', column_name
= None, value_column_name = None, stats =
None, start = None, end = None, interval
= None, options = {} ):
"""Divides the given set into bins and calculates statistics of the
values of a value-column in each bin. The bins are based on the values
of a
given binning-column. The statistics that may be requested are mean,
stdv
(standard deviation), variance, skew, kurtosis, sum, min, max, first,
last and
weighted average. In addition to the requested statistics the count of
total
samples in each bin is returned. This counts vector is just the
histogram of the
column used to divide the set members into bins. The weighted average
statistic
requires a weight column to be specified in
*weight_column_name*. The weighted average is then
defined as the sum of the products of the value column times the weight
column
divided by the sum of the weight column.
There are two methods for binning the set members. In the first, which
can be
used for numeric valued binning-columns, a min, max and interval are
specified.
The number of bins, nbins, is the integer upper bound of
(max-min)/interval.
Values that fall in the range [min+n*interval,min+(n+1)*interval) are
placed in
the nth bin where n ranges from 0..nbin-2. The final bin is
[min+(nbin-1)*interval,max]. In the second method,
*bin_values* specifies a list of binning column values.
Binning-columns whose value matches the nth member of the
*bin_values* list are placed in the nth bin. When a list
is provided, the binning-column must be of type string or int.
NOTE: The Kinetica instance being accessed must be running a CUDA
(GPU-based)
build to service this request.
Parameters:
select_expression (str)
For a non-empty expression statistics are calculated for those
records for which the expression is true. The default value is
''.
column_name (str)
Name of the binning-column used to divide the set samples into
bins.
value_column_name (str)
Name of the value-column for which statistics are to be
computed.
stats (str)
A string of comma separated list of the statistics to
calculate, e.g. 'sum,mean'. Available statistics: mean, stdv
(standard deviation), variance, skew, kurtosis, sum.
start (float)
The lower bound of the binning-column.
end (float)
The upper bound of the binning-column.
interval (float)
The interval of a bin. Set members fall into bin i if the
binning-column falls in the range [start+interval*i,
start+interval*(i+1)).
options (dict of str to str)
Map of optional parameters:. The default value is an empty
dict ( {} ).
Allowed keys are:
* **additional_column_names** --
A list of comma separated value-column names over which
statistics can be accumulated along with the primary
value_column.
* **bin_values** --
A list of comma separated binning-column values. Values that
match the nth bin_values value are placed in the nth bin.
* **weight_column_name** --
Name of the column used as weighting column for the
weighted_average statistic.
* **order_column_name** --
Name of the column used for candlestick charting techniques.
Returns:
The response from the server which is a dict containing the
following entries--
stats (dict of str to lists of floats)
A map with a key for each statistic in the stats input
parameter having a value that is a vector of the corresponding
value-column bin statistics. In a addition the key count has a
value that is a histogram of the binning-column.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_statistics_by_range( self.qualified_name,
select_expression,
column_name,
value_column_name,
stats, start, end,
interval, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end aggregate_statistics_by_range
[docs] def aggregate_unique( self, column_name = None, offset = 0, limit = -9999,
encoding = 'binary', options = {},
force_primitive_return_types = True, get_column_major
= True ):
"""Returns all the unique values from a particular column
(specified by input parameter *column_name*) of a particular table or
view
(specified by input parameter *table_name*). If input parameter
*column_name* is a numeric column,
the values will be in output parameter *binary_encoded_response*.
Otherwise if
input parameter *column_name* is a string column, the values will be in
output parameter *json_encoded_response*. The results can be paged via
input parameter *offset*
and input parameter *limit* parameters.
Columns marked as `store-only
<../../../../concepts/types/#data-handling>`__
are unable to be used with this function.
To get the first 10 unique values sorted in descending order input
parameter *options*
would be::
{"limit":"10","sort_order":"descending"}.
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
If a *result_table* name is specified in the
input parameter *options*, the results are stored in a new table with
that name--no
results are returned in the response. Both the table name and
resulting column
name must adhere to
`standard naming conventions <../../../../concepts/tables/#table>`__;
any column expression will need to be aliased. If the source table's
`shard key <../../../../concepts/tables/#shard-keys>`__ is used as the
input parameter *column_name*, the result table will be sharded, in all
other cases it
will be replicated. Sorting will properly function only if the result
table is
replicated or if there is only one processing node and should not be
relied upon
in other cases. Not available if the value of input parameter
*column_name* is an
unrestricted-length string.
Parameters:
column_name (str)
Name of the column or an expression containing one or more
column names on which the unique function would be applied.
offset (long)
A positive integer indicating the number of initial results to
skip (this can be useful for paging through the results). The
default value is 0.The minimum allowed value is 0. The maximum
allowed value is MAX_INT.
limit (long)
A positive integer indicating the maximum number of results to
be returned, or
END_OF_SET (-9999) to indicate that the maximum number of
results allowed by the server should be
returned. The number of records returned will never exceed the
server's own limit, defined by the
`max_get_records_size
<../../../../config/#config-main-general>`__ parameter in the
server configuration.
Use output parameter *has_more_records* to see if more records
exist in the result to be fetched, and
input parameter *offset* & input parameter *limit* to request
subsequent pages of results. The default value is -9999.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
provided is non-existent, it will be automatically created.
* **expression** --
Optional filter expression to apply to the table.
* **sort_order** --
String indicating how the returned values should be sorted.
Allowed values are:
* ascending
* descending
The default value is 'ascending'.
* **result_table** --
The name of the table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
present, no results are returned in the response. Not
available if input parameter *column_name* is an
unrestricted-length string.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **result_table_generate_pk** --
If *true* then set a primary key for the result table. Must
be used in combination with the *result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **view_id** --
ID of view of which the result table will be a member. The
default value is ''.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A read-only GPUdbTable object if input options has "result_table";
otherwise the response from the server, which is a dict containing
the following entries--
table_name (str)
The same table name as was passed in the parameter list.
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
data (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_unique_and_decode( self.qualified_name,
column_name, offset,
limit, encoding,
options,
force_primitive_return_types=
force_primitive_return_types,
get_column_major =
get_column_major )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
response["data"] = response["records"]
if "qualified_result_table_name" in response.info:
qualified_result_table_name = response.info[ "qualified_result_table_name" ]
else:
qualified_result_table_name = None
if qualified_result_table_name:
# Create a read-only table for the result table
return self.create_view( qualified_result_table_name )
return response
# end aggregate_unique
[docs] def aggregate_unpivot( self, column_names = None, variable_column_name = '',
value_column_name = '', pivoted_columns = None,
encoding = 'binary', options = {},
force_primitive_return_types = True, get_column_major
= True ):
"""Rotate the column values into rows values.
For unpivot details and examples, see
`Unpivot <../../../../concepts/unpivot/>`__. For limitations, see
`Unpivot Limitations <../../../../concepts/unpivot/#limitations>`__.
Unpivot is used to normalize tables that are built for cross tabular
reporting
purposes. The unpivot operator rotates the column values for all the
pivoted
columns. A variable column, value column and all columns from the
source table
except the unpivot columns are projected into the result table. The
variable
column and value columns in the result table indicate the pivoted
column name
and values respectively.
The response is returned as a dynamic schema. For details see:
`dynamic schemas documentation
<../../../../api/concepts/#dynamic-schemas>`__.
Parameters:
column_names (list of str)
List of column names or expressions. A wildcard '*' can be used
to include all the non-pivoted columns from the source table.
variable_column_name (str)
Specifies the variable/parameter column name. The default
value is ''.
value_column_name (str)
Specifies the value column name. The default value is ''.
pivoted_columns (list of str)
List of one or more values typically the column names of the
input table. All the columns in the source table must have the
same data type.
encoding (str)
Specifies the encoding for returned records.
Allowed values are:
* **binary** --
Indicates that the returned records should be binary encoded.
* **json** --
Indicates that the returned records should be json encoded.
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of *result_table*. If
*result_table_persist* is *false* (or unspecified), then this
is always allowed even if the caller does not have permission
to create tables. The generated name is returned in
*qualified_result_table_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema as part of
*result_table* and use :meth:`GPUdb.create_schema` to create
the schema if non-existent] Name of a schema which is to
contain the table specified in *result_table*. If the schema
is non-existent, it will be automatically created.
* **result_table** --
The name of a table used to store the results, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. If
present, no results are returned in the response.
* **result_table_persist** --
If *true*, then the result table specified in *result_table*
will be persisted and will not expire unless a *ttl* is
specified. If *false*, then the result table will be an
in-memory table and will expire unless a *ttl* is specified
otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **expression** --
Filter expression to apply to the table prior to unpivot
processing.
* **order_by** --
Comma-separated list of the columns to be sorted by; e.g.
'timestamp asc, x desc'. The columns specified must be
present in input table. If any alias is given for any column
name, the alias must be used, rather than the original column
name. The default value is ''.
* **chunk_size** --
Indicates the number of records per chunk to be used for the
result table. Must be used in combination with the
*result_table* option.
* **limit** --
The number of records to keep. The default value is ''.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the table
specified in *result_table*.
* **view_id** --
view this result table is part of. The default value is ''.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the table specified in *result_table*. The columns specified
must be present in output column names. If any alias is
given for any column name, the alias must be used, rather
than the original column name.
* **result_table_force_replicated** --
Force the result table to be replicated (ignores any
sharding). Must be used in combination with the
*result_table* option.
Allowed values are:
* true
* false
The default value is 'false'.
force_primitive_return_types (bool)
If `True`, then `OrderedDict` objects will be returned, where
string sub-type columns will have their values converted back
to strings; for example, the Python `datetime` structs, used
for datetime type columns would have their values returned as
strings. If `False`, then :class:`Record` objects will be
returned, which for string sub-types, will return native or
custom structs; no conversion to string takes place. String
conversions, when returning `OrderedDicts`, incur a speed
penalty, and it is strongly recommended to use the
:class:`Record` object option instead. If `True`, but none of
the returned columns require a conversion, then the original
:class:`Record` objects will be returned. Default value is
True.
get_column_major (bool)
Indicates if the decoded records will be transposed to be
column-major or returned as is (row-major). Default value is
True.
Returns:
A read-only GPUdbTable object if input options has "result_table";
otherwise the response from the server, which is a dict containing
the following entries--
table_name (str)
Typically shows the result-table name if provided in the
request (Ignore otherwise).
response_schema_str (str)
Avro schema of output parameter *binary_encoded_response* or
output parameter *json_encoded_response*.
total_number_of_records (long)
Total/Filtered number of records.
has_more_records (bool)
Too many records. Returned a partial set.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **qualified_result_table_name** --
The fully qualified name of the table (i.e. including the
schema) used to store the results.
records (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
data (list of :class:`Record`)
A list of :class:`Record` objects which contain the decoded
records.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.aggregate_unpivot_and_decode( self.qualified_name,
column_names,
variable_column_name,
value_column_name,
pivoted_columns,
encoding, options,
force_primitive_return_types=
force_primitive_return_types,
get_column_major =
get_column_major )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
response["data"] = response["records"]
if "qualified_result_table_name" in response.info:
qualified_result_table_name = response.info[ "qualified_result_table_name" ]
else:
qualified_result_table_name = None
if qualified_result_table_name:
# Create a read-only table for the result table
return self.create_view( qualified_result_table_name )
return response
# end aggregate_unpivot
[docs] def alter_table( self, action = None, value = None, options = {} ):
"""Apply various modifications to a table or view. The
available modifications include the following:
Manage a table's columns--a column can be added, removed, or have its
`type and properties <../../../../concepts/types/>`__ modified,
including whether it is
`dictionary encoded <../../../../concepts/dictionary_encoding/>`__ or
not.
External tables cannot be modified except for their refresh method.
Create or delete a `column
<../../../../concepts/indexes/#column-index>`__,
`chunk skip <../../../../concepts/indexes/#chunk-skip-index>`__, or
`geospatial <../../../../concepts/indexes/#geospatial-index>`__ index.
This can speed up
certain operations when using expressions containing equality or
relational
operators on indexed columns. This only applies to tables.
Create or delete a `foreign key
<../../../../concepts/tables/#foreign-key>`__
on a particular column.
Manage a
`range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or a
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table's partitions.
Set (or reset) the `tier strategy
<../../../../rm/concepts/#tier-strategies>`__
of a table or view.
Refresh and manage the refresh mode of a
`materialized view <../../../../concepts/materialized_views/>`__ or an
`external table <../../../../concepts/external_tables/>`__.
Set the `time-to-live (TTL) <../../../../concepts/ttl/>`__. This can be
applied
to tables or views.
Set the global access mode (i.e. locking) for a table. This setting
trumps any
role-based access controls that may be in place; e.g., a user with
write access
to a table marked read-only will not be able to insert records into it.
The mode
can be set to read-only, write-only, read/write, and no access.
Parameters:
action (str)
Modification operation to be applied
Allowed values are:
* **allow_homogeneous_tables** --
No longer supported; action will be ignored.
* **create_index** --
Creates a `column (attribute) index
<../../../../concepts/indexes/#column-index>`__,
`chunk skip index
<../../../../concepts/indexes/#chunk-skip-index>`__, or
`geospatial index
<../../../../concepts/indexes/#geospatial-index>`__
(depending on the specified *index_type*), on the column name
specified in input parameter *value*.
If this column already has the specified index, an error will
be returned.
* **delete_index** --
Deletes a `column (attribute) index
<../../../../concepts/indexes/#column-index>`__,
`chunk skip index
<../../../../concepts/indexes/#chunk-skip-index>`__, or
`geospatial index
<../../../../concepts/indexes/#geospatial-index>`__
(depending on the specified *index_type*), on the column name
specified in input parameter *value*.
If this column does not have the specified index, an error
will be returned.
* **move_to_collection** --
[DEPRECATED--please use *move_to_schema* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Moves a table or view into a schema named
input parameter *value*. If the schema provided is
non-existent, it will be automatically created.
* **move_to_schema** --
Moves a table or view into a schema named input parameter
*value*.
If the schema provided is nonexistent, an error will be
thrown.
If input parameter *value* is empty, then the table or view
will be placed in the user's default schema.
* **protected** --
No longer used. Previously set whether the given input
parameter *table_name* should be protected or not. The input
parameter *value* would have been either 'true' or 'false'.
* **rename_table** --
Renames a table or view within its current schema to input
parameter *value*. Has the same naming restrictions as
`tables <../../../../concepts/tables/>`__.
* **ttl** --
Sets the `time-to-live <../../../../concepts/ttl/>`__ in
minutes of the table or view specified in input parameter
*table_name*.
* **add_column** --
Adds the column specified in input parameter *value* to the
table specified in input parameter *table_name*.
Use *column_type* and *column_properties* in input parameter
*options*
to set the column's type and properties, respectively.
* **change_column** --
Changes type and properties of the column specified in input
parameter *value*.
Use *column_type* and *column_properties* in input parameter
*options* to set
the column's type and properties, respectively. Note that
primary key and/or shard key columns cannot be changed.
All unchanging column properties must be listed for the
change to take place, e.g., to add dictionary encoding to
an existing 'char4' column, both 'char4' and 'dict' must be
specified in the input parameter *options* map.
* **set_column_compression** --
No longer supported; action will be ignored.
* **delete_column** --
Deletes the column specified in input parameter *value* from
the table specified in input parameter *table_name*.
* **create_foreign_key** --
Creates a `foreign key
<../../../../concepts/tables/#foreign-key>`__ specified in
input parameter *value* using the format '(source_column_name
[, ...]) references target_table_name(primary_key_column_name
[, ...]) [as foreign_key_name]'.
* **delete_foreign_key** --
Deletes a `foreign key
<../../../../concepts/tables/#foreign-key>`__. The input
parameter *value* should be the foreign_key_name specified
when creating the key or the complete string used to define
it.
* **add_partition** --
Adds the partition specified in input parameter *value*, to
either a `range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table.
* **remove_partition** --
Removes the partition specified in input parameter *value*
(and relocates all of its data to the default partition) from
either a `range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table.
* **delete_partition** --
Deletes the partition specified in input parameter *value*
(and all of its data) from either a `range-partitioned
<../../../../concepts/tables/#partitioning-by-range>`__ or
`manual list-partitioned
<../../../../concepts/tables/#partitioning-by-list-manual>`__
table.
* **set_global_access_mode** --
Sets the global access mode (i.e. locking) for the table
specified in input parameter *table_name*. Specify the access
mode in input parameter *value*. Valid modes are 'no_access',
'read_only', 'write_only' and 'read_write'.
* **refresh** --
For a `materialized view
<../../../../concepts/materialized_views/>`__, replays all
the table creation commands required to create the view. For
an `external table
<../../../../concepts/external_tables/>`__, reloads all data
in the table from its associated source files or `data source
<../../../../concepts/data_sources/>`__.
* **set_refresh_method** --
For a `materialized view
<../../../../concepts/materialized_views/>`__, sets the
method by which the view is refreshed to the method specified
in input parameter *value* - one of 'manual', 'periodic', or
'on_change'. For an `external table
<../../../../concepts/external_tables/>`__, sets the method
by which the table is refreshed to the method specified in
input parameter *value* - either 'manual' or 'on_start'.
* **set_refresh_start_time** --
Sets the time to start periodic refreshes of this
`materialized view
<../../../../concepts/materialized_views/>`__ to the datetime
string specified in input parameter *value* with format
'YYYY-MM-DD HH:MM:SS'. Subsequent refreshes occur at the
specified time + N * the refresh period.
* **set_refresh_stop_time** --
Sets the time to stop periodic refreshes of this
`materialized view
<../../../../concepts/materialized_views/>`__ to the datetime
string specified in input parameter *value* with format
'YYYY-MM-DD HH:MM:SS'.
* **set_refresh_period** --
Sets the time interval in seconds at which to refresh this
`materialized view
<../../../../concepts/materialized_views/>`__ to the value
specified in input parameter *value*. Also, sets the refresh
method to periodic if not already set.
* **set_refresh_span** --
Sets the future time-offset(in seconds) for the view refresh
to stop.
* **set_refresh_execute_as** --
Sets the user name to refresh this `materialized view
<../../../../concepts/materialized_views/>`__ to the value
specified in input parameter *value*.
* **remove_text_search_attributes** --
Removes `text search
<../../../../concepts/full_text_search/>`__ attribute from
all columns.
* **remove_shard_keys** --
Removes the shard key property from all columns, so that the
table will be considered randomly sharded. The data is not
moved. The input parameter *value* is ignored.
* **set_strategy_definition** --
Sets the `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns to the one specified in input parameter
*value*, replacing the existing tier strategy in its
entirety.
* **cancel_datasource_subscription** --
Permanently unsubscribe a data source that is loading
continuously as a stream. The data source can be Kafka / S3 /
Azure.
* **pause_datasource_subscription** --
Temporarily unsubscribe a data source that is loading
continuously as a stream. The data source can be Kafka / S3 /
Azure.
* **resume_datasource_subscription** --
Resubscribe to a paused data source subscription. The data
source can be Kafka / S3 / Azure.
* **change_owner** --
Change the owner resource group of the table.
value (str)
The value of the modification, depending on input parameter
*action*.
For example, if input parameter *action* is *add_column*, this
would be the column name;
while the column's definition would be covered by the
*column_type*,
*column_properties*, *column_default_value*,
and *add_column_expression* in input parameter *options*.
If input parameter *action* is *ttl*, it would be the number of
minutes for the new TTL.
If input parameter *action* is *refresh*, this field would be
blank.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **column_default_value** --
When adding a column, set a default value for existing
records. For nullable columns, the default value will be
null, regardless of data type.
* **column_properties** --
When adding or changing a column, set the column properties
(strings, separated by a comma: data, store_only,
text_search, char8, int8 etc).
* **column_type** --
When adding or changing a column, set the column type
(strings, separated by a comma: int, double, string, null
etc).
* **compression_type** --
No longer supported; option will be ignored.
Allowed values are:
* none
* snappy
* lz4
* lz4hc
The default value is 'snappy'.
* **copy_values_from_column** --
[DEPRECATED--please use *add_column_expression* instead.]
* **rename_column** --
When changing a column, specify new column name.
* **validate_change_column** --
When changing a column, validate the change before applying
it (or not).
Allowed values are:
* **true** --
Validate all values. A value too large (or too long) for
the new type will prevent any change.
* **false** --
When a value is too large or long, it will be truncated.
The default value is 'true'.
* **update_last_access_time** --
Indicates whether the `time-to-live
<../../../../concepts/ttl/>`__ (TTL) expiration countdown
timer should be reset to the table's TTL.
Allowed values are:
* **true** --
Reset the expiration countdown timer to the table's
configured TTL.
* **false** --
Don't reset the timer; expiration countdown will continue
from where it is, as if the table had not been accessed.
The default value is 'true'.
* **add_column_expression** --
When adding a column, an optional expression to use for the
new column's values. Any valid expression may be used,
including one containing references to existing columns in
the same table.
* **strategy_definition** --
Optional parameter for specifying the `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns when input parameter *action* is
*set_strategy_definition*, replacing the existing tier
strategy in its entirety.
* **index_type** --
Type of index to create, when input parameter *action* is
*create_index*,
or to delete, when input parameter *action* is
*delete_index*.
Allowed values are:
* **column** --
Create or delete a `column (attribute) index
<../../../../concepts/indexes/#column-index>`__.
* **chunk_skip** --
Create or delete a `chunk skip index
<../../../../concepts/indexes/#chunk-skip-index>`__.
* **geospatial** --
Create or delete a geospatial index
The default value is 'column'.
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
Table on which the operation was performed.
action (str)
Modification operation that was performed.
value (str)
The value of the modification that was performed.
type_id (str)
return the type_id (when changing a table, a new type may be
created)
type_definition (str)
return the type_definition (when changing a table, a new type
may be created)
properties (dict of str to lists of str)
return the type properties (when changing a table, a new type
may be created)
label (str)
return the type label (when changing a table, a new type may
be created)
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.alter_table( self.qualified_name, action, value,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
self.__save_table_type( response.type_definition,
response.properties )
if (action == "rename_table" ):
self._name = value
self.qualified_name = value
# Update the name for multi-head I/O objects, if any
if self._multihead_ingestor is not None:
self._multihead_ingestor.table_name = value
if self._multihead_retriever is not None:
self._multihead_retriever.table_name = value
return response
# end alter_table
[docs] def alter_table_columns( self, column_alterations = None, options = None ):
"""Apply various modifications to columns in a table, view. The available
modifications include the following:
Create or delete an `index
<../../../../concepts/indexes/#column-index>`__ on a
particular column. This can speed up certain operations when using
expressions
containing equality or relational operators on indexed columns. This
only
applies to tables.
Manage a table's columns--a column can be added, removed, or have its
`type and properties <../../../../concepts/types/>`__ modified,
including whether it is
`dictionary encoded <../../../../concepts/dictionary_encoding/>`__ or
not.
Parameters:
column_alterations (list of dicts of str to str)
List of alter table add/delete/change column requests - all for
the same table. Each request is a map that includes
'column_name', 'action' and the options specific for the
action. Note that the same options as in alter table requests
but in the same map as the column name and the action. For
example:
[{'column_name':'col_1','action':'change_column','rename_column':'col_2'},{'column_name':'col_1','action':'add_column',
'type':'int','default_value':'1'}] The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters.
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
Table on which the operation was performed.
type_id (str)
return the type_id (when changing a table, a new type may be
created)
type_definition (str)
return the type_definition (when changing a table, a new type
may be created)
properties (dict of str to lists of str)
return the type properties (when changing a table, a new type
may be created)
label (str)
return the type label (when changing a table, a new type may
be created)
column_alterations (list of dicts of str to str)
List of alter table add/delete/change column requests - all for
the same table. Each request is a map that includes
'column_name', 'action' and the options specific for the
action. Note that the same options as in alter table requests
but in the same map as the column name and the action. For
example:
[{'column_name':'col_1','action':'change_column','rename_column':'col_2'},{'column_name':'col_1','action':'add_column',
'type':'int','default_value':'1'}]
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.alter_table_columns( self.qualified_name,
column_alterations, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end alter_table_columns
[docs] def append_records( self, source_table_name = None, field_map = None,
options = {} ):
"""Append (or insert) all records from a source table
(specified by input parameter *source_table_name*) to a particular
target table
(specified by input parameter *table_name*). The field map
(specified by input parameter *field_map*) holds the user specified map
of target table
column names with their mapped source column names.
Parameters:
source_table_name (str)
The source table name to get records from, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table name.
field_map (dict of str to str)
Contains the mapping of column names from the target table
(specified by input parameter *table_name*) as the keys, and
corresponding column names or expressions (e.g., 'col_name+1')
from the source table (specified by input parameter
*source_table_name*). Must be existing column names in source
table and target table, and their types must be matched. For
details on using expressions, see `Expressions
<../../../../concepts/expressions/>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **offset** --
A positive integer indicating the number of initial results
to skip from input parameter *source_table_name*. Default is
0. The minimum allowed value is 0. The maximum allowed value
is MAX_INT. The default value is '0'.
* **limit** --
A positive integer indicating the maximum number of results
to be returned from input parameter *source_table_name*. Or
END_OF_SET (-9999) to indicate that the max number of results
should be returned. The default value is '-9999'.
* **expression** --
Optional filter expression to apply to the input parameter
*source_table_name*. The default value is ''.
* **order_by** --
Comma-separated list of the columns to be sorted by from
source table (specified by input parameter
*source_table_name*), e.g., 'timestamp asc, x desc'. The
*order_by* columns do not have to be present in input
parameter *field_map*. The default value is ''.
* **update_on_existing_pk** --
Specifies the record collision policy for inserting source
table
records (specified by input parameter *source_table_name*)
into a target table
(specified by input parameter *table_name*) with a `primary
key <../../../../concepts/tables/#primary-keys>`__. If
set to *true*, any existing table record with
primary key values that match those of a source table record
being inserted will be replaced by that
new record (the new data will be "upserted"). If set to
*false*, any existing table record with primary
key values that match those of a source table record being
inserted will remain unchanged, while the
source record will be rejected and an error handled as
determined by
*ignore_existing_pk*. If the specified table does not have a
primary key,
then this option has no effect.
Allowed values are:
* **true** --
Upsert new records when primary keys match existing records
* **false** --
Reject new records when primary keys match existing records
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
inserting source table records (specified by input parameter
*source_table_name*) into a target table
(specified by input parameter *table_name*) with a `primary
key <../../../../concepts/tables/#primary-keys>`__, only
used when not in upsert mode (upsert mode is disabled when
*update_on_existing_pk* is
*false*). If set to
*true*, any source table record being inserted that
is rejected for having primary key values that match those of
an existing target table record will
be ignored with no error generated. If *false*,
the rejection of any source table record for having primary
key values matching an existing target
table record will result in an error being raised. If the
specified table does not have a primary
key or if upsert mode is in effect (*update_on_existing_pk*
is
*true*), then this option has no effect.
Allowed values are:
* **true** --
Ignore source table records whose primary key values
collide with those of target table records
* **false** --
Raise an error for any source table record whose primary
key values collide with those of a target table record
The default value is 'false'.
* **truncate_strings** --
If set to *true*, it allows inserting longer strings into
smaller charN string columns by truncating the longer strings
to fit.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.append_records( self.qualified_name,
source_table_name, field_map, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end append_records
[docs] def clear_statistics( self, column_name = '', options = {} ):
"""Clears statistics (cardinality, mean value, etc.) for a column in a
specified table.
Parameters:
column_name (str)
Name of the column in input parameter *table_name* for which to
clear statistics. The column must be from an existing table. An
empty string clears statistics for all columns in the table.
The default value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
Value of input parameter *table_name*.
column_name (str)
Value of input parameter *column_name*.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.clear_statistics( self.qualified_name, column_name,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end clear_statistics
[docs] def clear( self, authorization = '', options = {} ):
"""Clears (drops) one or all tables in the database cluster. The
operation is synchronous meaning that the table will be cleared before
the
function returns. The response payload returns the status of the
operation along
with the name of the table that was cleared.
Parameters:
authorization (str)
No longer used. User can pass an empty string. The default
value is ''.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **no_error_if_not_exists** --
If *true* and if the table specified in input parameter
*table_name* does not exist no error is returned. If *false*
and if the table specified in input parameter *table_name*
does not exist then an error is returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
Value of input parameter *table_name* for a given table, or
'ALL CLEARED' in case of clearing all tables.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.clear_table( self.qualified_name, authorization,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end clear
[docs] def collect_statistics( self, column_names = None, options = {} ):
"""Collect statistics for a column(s) in a specified table.
Parameters:
column_names (list of str)
List of one or more column names in input parameter
*table_name* for which to collect statistics (cardinality, mean
value, etc.). The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
Value of input parameter *table_name*.
column_names (list of str)
Value of input parameter *column_names*.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.collect_statistics( self.qualified_name,
column_names, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end collect_statistics
def create_delta_table( self, delta_table_name = None, options = {} ):
response = self.db.create_delta_table( delta_table_name,
self.qualified_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end create_delta_table
[docs] def create_projection( self, column_names = None, options = {},
projection_name = None ):
"""Creates a new `projection <../../../../concepts/projections/>`__ of
an existing table. A projection represents a subset of the columns
(potentially
including derived columns) of a table.
For projection details and examples, see
`Projections <../../../../concepts/projections/>`__. For limitations,
see
`Projection Limitations and Cautions
<../../../../concepts/projections/#limitations-and-cautions>`__.
`Window functions <../../../../concepts/window/>`__, which can perform
operations like moving averages, are available through this endpoint as
well as
:meth:`GPUdb.get_records_by_column`.
A projection can be created with a different
`shard key <../../../../concepts/tables/#shard-keys>`__ than the source
table.
By specifying *shard_key*, the projection will be sharded
according to the specified columns, regardless of how the source table
is
sharded. The source table can even be unsharded or replicated.
If input parameter *table_name* is empty, selection is performed
against a single-row
virtual table. This can be useful in executing temporal
(`NOW() <../../../../concepts/expressions/#date-time-functions>`__),
identity
(`USER()
<../../../../concepts/expressions/#user-security-functions>`__), or
constant-based functions
(`GEODIST(-77.11, 38.88, -71.06, 42.36)
<../../../../concepts/expressions/#scalar-functions>`__).
Parameters:
column_names (list of str)
List of columns from input parameter *table_name* to be
included in the projection. Can include derived columns. Can be
specified as aliased via the syntax 'column_name as alias'.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*projection_name*. If *persist* is *false* (or unspecified),
then this is always allowed even if the caller does not have
permission to create tables. The generated name is returned
in *qualified_projection_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
projection as part of input parameter *projection_name* and
use :meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the projection. If the
schema is non-existent, it will be automatically created.
The default value is ''.
* **expression** --
An optional filter `expression
<../../../../concepts/expressions/>`__ to be applied to the
source table prior to the projection. The default value is
''.
* **is_replicated** --
If *true* then the projection will be replicated even if the
source table is not.
Allowed values are:
* true
* false
The default value is 'false'.
* **offset** --
The number of initial results to skip (this can be useful for
paging through the results). The default value is '0'.
* **limit** --
The number of records to keep. The default value is '-9999'.
* **order_by** --
Comma-separated list of the columns to be sorted by; e.g.
'timestamp asc, x desc'. The columns specified must be
present in input parameter *column_names*. If any alias is
given for any column name, the alias must be used, rather
than the original column name. The default value is ''.
* **chunk_size** --
Indicates the number of records per chunk to be used for this
projection.
* **create_indexes** --
Comma-separated list of columns on which to create indexes on
the projection. The columns specified must be present in
input parameter *column_names*. If any alias is given for
any column name, the alias must be used, rather than the
original column name.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the
projection specified in input parameter *projection_name*.
* **shard_key** --
Comma-separated list of the columns to be sharded on; e.g.
'column1, column2'. The columns specified must be present in
input parameter *column_names*. If any alias is given for
any column name, the alias must be used, rather than the
original column name. The default value is ''.
* **persist** --
If *true*, then the projection specified in input parameter
*projection_name* will be persisted and will not expire
unless a *ttl* is specified. If *false*, then the
projection will be an in-memory table and will expire unless
a *ttl* is specified otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **preserve_dict_encoding** --
If *true*, then columns that were dict encoded in the source
table will be dict encoded in the projection.
Allowed values are:
* true
* false
The default value is 'true'.
* **retain_partitions** --
Determines whether the created projection will retain the
partitioning scheme from the source table.
Allowed values are:
* true
* false
The default value is 'false'.
* **partition_type** --
`Partitioning <../../../../concepts/tables/#partitioning>`__
scheme to use.
Allowed values are:
* **RANGE** --
Use `range partitioning
<../../../../concepts/tables/#partitioning-by-range>`__.
* **INTERVAL** --
Use `interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__.
* **LIST** --
Use `list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__.
* **HASH** --
Use `hash partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__.
* **SERIES** --
Use `series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__.
* **partition_keys** --
Comma-separated list of partition keys, which are the columns
or column expressions by which records will be assigned to
partitions defined by *partition_definitions*.
* **partition_definitions** --
Comma-separated list of partition definitions, whose format
depends on the choice of *partition_type*. See `range
partitioning
<../../../../concepts/tables/#partitioning-by-range>`__,
`interval partitioning
<../../../../concepts/tables/#partitioning-by-interval>`__,
`list partitioning
<../../../../concepts/tables/#partitioning-by-list>`__, `hash
partitioning
<../../../../concepts/tables/#partitioning-by-hash>`__, or
`series partitioning
<../../../../concepts/tables/#partitioning-by-series>`__ for
example formats.
* **is_automatic_partition** --
If *true*, a new partition will be created for values which
don't fall into an existing partition. Currently only
supported for `list partitions
<../../../../concepts/tables/#partitioning-by-list>`__.
Allowed values are:
* true
* false
The default value is 'false'.
* **view_id** --
ID of view of which this projection is a member. The default
value is ''.
* **strategy_definition** --
The `tier strategy
<../../../../rm/concepts/#tier-strategies>`__ for the table
and its columns.
projection_name (str)
Name of the projection to be created, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
projection_name = self.__process_view_name( projection_name )
response = self.db.create_projection( self.qualified_name,
projection_name, column_names,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_projection_name" in response.info:
projection_name = response.info[ "qualified_projection_name" ]
return self.create_view( projection_name )
# end create_projection
def create_state_table( self, table_name = None, options = {} ):
response = self.db.create_state_table( table_name, self.qualified_name,
self.qualified_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end create_state_table
[docs] def create_table_monitor( self, options = {} ):
"""Creates a monitor that watches for a single table modification event
type (insert, update, or delete) on a particular table (identified by
input parameter *table_name*) and forwards event notifications to
subscribers via ZMQ.
After this call completes, subscribe to the returned output parameter
*topic_id* on the
ZMQ table monitor port (default 9002). Each time an operation of the
given type
on the table completes, a multipart message is published for that
topic; the
first part contains only the topic ID, and each subsequent part
contains one
binary-encoded Avro object that corresponds to the event and can be
decoded
using output parameter *type_schema*. The monitor will continue to run
(regardless of
whether or not there are any subscribers) until deactivated with
:meth:`GPUdb.clear_table_monitor`.
For more information on table monitors, see
`Table Monitors <../../../../concepts/table_monitors/>`__.
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **event** --
Type of modification event on the target table to be
monitored by this table monitor.
Allowed values are:
* **insert** --
Get notifications of new record insertions. The new row
images are forwarded to the subscribers.
* **update** --
Get notifications of update operations. The modified row
count information is forwarded to the subscribers.
* **delete** --
Get notifications of delete operations. The deleted row
count information is forwarded to the subscribers.
The default value is 'insert'.
* **monitor_id** --
ID to use for this monitor instead of a randomly generated
one
* **datasink_name** --
Name of an existing `data sink
<../../../../concepts/data_sinks/>`__ to send change data
notifications to
* **destination** --
Destination for the output data in format
'destination_type://path[:port]'. Supported destination types
are 'http', 'https' and 'kafka'.
* **kafka_topic_name** --
Name of the Kafka topic to publish to if *destination* in
input parameter *options* is specified and is a Kafka broker
* **increasing_column** --
Column on subscribed table that will increase for new records
(e.g., TIMESTAMP).
* **expression** --
Filter expression to limit records for notification
* **refresh_method** --
Method controlling when the table monitor reports changes to
the input parameter *table_name*.
Allowed values are:
* **on_change** --
Report changes as they occur.
* **periodic** --
Report changes periodically at rate specified by
*refresh_period*.
The default value is 'on_change'.
* **refresh_period** --
When *refresh_method* is *periodic*, specifies the period in
seconds at which changes are reported.
* **refresh_start_time** --
When *refresh_method* is *periodic*, specifies the first time
at which changes are reported. Value is a datetime string
with format 'YYYY-MM-DD HH:MM:SS'.
Returns:
The response from the server which is a dict containing the
following entries--
topic_id (str)
The ZMQ topic ID to subscribe to for table events.
table_name (str)
Value of input parameter *table_name*.
type_schema (str)
JSON Avro schema of the table, for use in decoding published
records.
info (dict of str to str)
Additional information. The default value is an empty dict (
{} ).
Allowed keys are:
* **insert_topic_id** --
The topic id for 'insert' *event* in input parameter
*options*
* **update_topic_id** --
The topic id for 'update' *event* in input parameter
*options*
* **delete_topic_id** --
The topic id for 'delete' *event* in input parameter
*options*
* **insert_type_schema** --
The JSON Avro schema of the table in output parameter
*table_name*
* **update_type_schema** --
The JSON Avro schema for 'update' events
* **delete_type_schema** --
The JSON Avro schema for 'delete' events
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.create_table_monitor( self.qualified_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end create_table_monitor
[docs] def delete_records( self, expressions = None, options = {} ):
"""Deletes record(s) matching the provided criteria from the given table.
The record selection criteria can either be one or more input
parameter *expressions* (matching multiple records), a single record
identified by *record_id* options, or all records when using
*delete_all_records*. Note that the three selection criteria are
mutually exclusive. This operation cannot be run on a view. The
operation is synchronous meaning that a response will not be available
until the request is completely processed and all the matching records
are deleted.
Parameters:
expressions (list of str)
A list of the actual predicates, one for each select; format
should follow the guidelines provided `here
<../../../../concepts/expressions/>`__. Specifying one or more
input parameter *expressions* is mutually exclusive to
specifying *record_id* in the input parameter *options*. The
user can provide a single element (which will be automatically
promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **global_expression** --
An optional global expression to reduce the search space of
the input parameter *expressions*. The default value is ''.
* **record_id** --
A record ID identifying a single record, obtained at the time
of :meth:`insertion of the record <GPUdb.insert_records>` or
by calling :meth:`GPUdb.get_records_from_collection` with the
*return_record_ids* option. This option cannot be used to
delete records from `replicated
<../../../../concepts/tables/#replication>`__ tables.
* **delete_all_records** --
If set to *true*, all records in the table will be deleted.
If set to *false*, then the option is effectively ignored.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
The response from the server which is a dict containing the
following entries--
count_deleted (long)
Total number of records deleted across all expressions.
counts_deleted (list of longs)
Total number of records deleted per expression.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.delete_records( self.qualified_name, expressions,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end delete_records
[docs] def filter( self, expression = None, options = {}, view_name = '' ):
"""Filters data based on the specified expression. The results are
stored in a `result set <../../../../concepts/filtered_views/>`__ with
the
given input parameter *view_name*.
For details see `Expressions <../../../../concepts/expressions/>`__.
The response message contains the number of points for which the
expression
evaluated to be true, which is equivalent to the size of the result
view.
Parameters:
expression (str)
The select expression to filter the specified table. For
details see `Expressions
<../../../../concepts/expressions/>`__.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **view_id** --
view this filtered-view is part of. The default value is ''.
* **ttl** --
Sets the `TTL <../../../../concepts/ttl/>`__ of the view
specified in input parameter *view_name*.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter( self.qualified_name, view_name, expression,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter
[docs] def filter_by_area( self, x_column_name = None, x_vector = None,
y_column_name = None, y_vector = None, options = {},
view_name = '' ):
"""Calculates which objects from a table are within a named area of
interest (NAI/polygon). The operation is synchronous, meaning that a
response
will not be returned until all the matching objects are fully
available. The
response payload provides the count of the resulting set. A new
resultant set
(view) which satisfies the input NAI restriction specification is
created with
the name input parameter *view_name* passed in as part of the input.
Parameters:
x_column_name (str)
Name of the column containing the x values to be filtered.
x_vector (list of floats)
List of x coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
y_column_name (str)
Name of the column containing the y values to be filtered.
y_vector (list of floats)
List of y coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_area( self.qualified_name, view_name,
x_column_name, x_vector,
y_column_name, y_vector, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_area
[docs] def filter_by_area_geometry( self, column_name = None, x_vector = None,
y_vector = None, options = {}, view_name = ''
):
"""Calculates which geospatial geometry objects from a table intersect
a named area of interest (NAI/polygon). The operation is synchronous,
meaning
that a response will not be returned until all the matching objects are
fully
available. The response payload provides the count of the resulting
set. A new
resultant set (view) which satisfies the input NAI restriction
specification is
created with the name input parameter *view_name* passed in as part of
the input.
Parameters:
column_name (str)
Name of the geospatial geometry column to be filtered.
x_vector (list of floats)
List of x coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
y_vector (list of floats)
List of y coordinates of the vertices of the polygon
representing the area to be filtered. The user can provide a
single element (which will be automatically promoted to a list
internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] The schema for the newly created view. If the
schema is non-existent, it will be automatically created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_area_geometry( self.qualified_name,
view_name, column_name,
x_vector, y_vector, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_area_geometry
[docs] def filter_by_box( self, x_column_name = None, min_x = None, max_x = None,
y_column_name = None, min_y = None, max_y = None, options
= {}, view_name = '' ):
"""Calculates how many objects within the given table lie in a
rectangular box. The operation is synchronous, meaning that a response
will not
be returned until all the objects are fully available. The response
payload
provides the count of the resulting set. A new resultant set which
satisfies the
input NAI restriction specification is also created when a input
parameter *view_name* is
passed in as part of the input payload.
Parameters:
x_column_name (str)
Name of the column on which to perform the bounding box query.
Must be a valid numeric column.
min_x (float)
Lower bound for the column chosen by input parameter
*x_column_name*. Must be less than or equal to input parameter
*max_x*.
max_x (float)
Upper bound for input parameter *x_column_name*. Must be
greater than or equal to input parameter *min_x*.
y_column_name (str)
Name of a column on which to perform the bounding box query.
Must be a valid numeric column.
min_y (float)
Lower bound for input parameter *y_column_name*. Must be less
than or equal to input parameter *max_y*.
max_y (float)
Upper bound for input parameter *y_column_name*. Must be
greater than or equal to input parameter *min_y*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_box( self.qualified_name, view_name,
x_column_name, min_x, max_x,
y_column_name, min_y, max_y, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_box
[docs] def filter_by_box_geometry( self, column_name = None, min_x = None, max_x =
None, min_y = None, max_y = None, options = {},
view_name = '' ):
"""Calculates which geospatial geometry objects from a table intersect
a rectangular box. The operation is synchronous, meaning that a
response will
not be returned until all the objects are fully available. The response
payload
provides the count of the resulting set. A new resultant set which
satisfies the
input NAI restriction specification is also created when a input
parameter *view_name* is
passed in as part of the input payload.
Parameters:
column_name (str)
Name of the geospatial geometry column to be filtered.
min_x (float)
Lower bound for the x-coordinate of the rectangular box. Must
be less than or equal to input parameter *max_x*.
max_x (float)
Upper bound for the x-coordinate of the rectangular box. Must
be greater than or equal to input parameter *min_x*.
min_y (float)
Lower bound for the y-coordinate of the rectangular box. Must
be less than or equal to input parameter *max_y*.
max_y (float)
Upper bound for the y-coordinate of the rectangular box. Must
be greater than or equal to input parameter *min_y*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_box_geometry( self.qualified_name,
view_name, column_name,
min_x, max_x, min_y, max_y,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_box_geometry
[docs] def filter_by_geometry( self, column_name = None, input_wkt = '', operation
= None, options = {}, view_name = '' ):
"""Applies a geometry filter against a geospatial geometry column in a
given table or view. The filtering geometry is provided by input
parameter *input_wkt*.
Parameters:
column_name (str)
Name of the column to be used in the filter. Must be a
geospatial geometry column.
input_wkt (str)
A geometry in WKT format that will be used to filter the
objects in input parameter *table_name*. The default value is
''.
operation (str)
The geometric filtering operation to perform
Allowed values are:
* **contains** --
Matches records that contain the given WKT in input parameter
*input_wkt*, i.e. the given WKT is within the bounds of a
record's geometry.
* **crosses** --
Matches records that cross the given WKT.
* **disjoint** --
Matches records that are disjoint from the given WKT.
* **equals** --
Matches records that are the same as the given WKT.
* **intersects** --
Matches records that intersect the given WKT.
* **overlaps** --
Matches records that overlap the given WKT.
* **touches** --
Matches records that touch the given WKT.
* **within** --
Matches records that are within the given WKT.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_geometry( self.qualified_name, view_name,
column_name, input_wkt,
operation, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_geometry
[docs] def filter_by_list( self, column_values_map = None, options = {}, view_name
= '' ):
"""Calculates which records from a table have values in the given list
for the corresponding column. The operation is synchronous, meaning
that a
response will not be returned until all the objects are fully
available. The
response payload provides the count of the resulting set. A new
resultant set
(view) which satisfies the input filter specification is also created
if a
input parameter *view_name* is passed in as part of the request.
For example, if a type definition has the columns 'x' and 'y', then a
filter by
list query with the column map
{"x":["10.1", "2.3"], "y":["0.0", "-31.5", "42.0"]} will return
the count of all data points whose x and y values match both in the
respective
x- and y-lists, e.g., "x = 10.1 and y = 0.0", "x = 2.3 and y = -31.5",
etc.
However, a record with "x = 10.1 and y = -31.5" or "x = 2.3 and y =
0.0"
would not be returned because the values in the given lists do not
correspond.
Parameters:
column_values_map (dict of str to lists of str)
List of values for the corresponding column in the table
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
* **filter_mode** --
String indicating the filter mode, either 'in_list' or
'not_in_list'.
Allowed values are:
* **in_list** --
The filter will match all items that are in the provided
list(s).
* **not_in_list** --
The filter will match all items that are not in the
provided list(s).
The default value is 'in_list'.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_list( self.qualified_name, view_name,
column_values_map, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_list
[docs] def filter_by_radius( self, x_column_name = None, x_center = None,
y_column_name = None, y_center = None, radius = None,
options = {}, view_name = '' ):
"""Calculates which objects from a table lie within a circle with the
given radius and center point (i.e. circular NAI). The operation is
synchronous,
meaning that a response will not be returned until all the objects are
fully
available. The response payload provides the count of the resulting
set. A new
resultant set (view) which satisfies the input circular NAI restriction
specification is also created if a input parameter *view_name* is
passed in as part of
the request.
For track data, all track points that lie within the circle plus one
point on
either side of the circle (if the track goes beyond the circle) will be
included
in the result.
Parameters:
x_column_name (str)
Name of the column to be used for the x-coordinate (the
longitude) of the center.
x_center (float)
Value of the longitude of the center. Must be within [-180.0,
180.0]. The minimum allowed value is -180. The maximum allowed
value is 180.
y_column_name (str)
Name of the column to be used for the y-coordinate-the
latitude-of the center.
y_center (float)
Value of the latitude of the center. Must be within [-90.0,
90.0]. The minimum allowed value is -90. The maximum allowed
value is 90.
radius (float)
The radius of the circle within which the search will be
performed. Must be a non-zero positive value. It is in meters;
so, for example, a value of '42000' means 42 km. The minimum
allowed value is 0. The maximum allowed value is MAX_INT.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema which is to contain the newly
created view. If the schema is non-existent, it will be
automatically created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_radius( self.qualified_name, view_name,
x_column_name, x_center,
y_column_name, y_center, radius,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_radius
[docs] def filter_by_radius_geometry( self, column_name = None, x_center = None,
y_center = None, radius = None, options = {},
view_name = '' ):
"""Calculates which geospatial geometry objects from a table intersect
a circle with the given radius and center point (i.e. circular NAI).
The
operation is synchronous, meaning that a response will not be returned
until all
the objects are fully available. The response payload provides the
count of the
resulting set. A new resultant set (view) which satisfies the input
circular NAI
restriction specification is also created if a input parameter
*view_name* is passed in
as part of the request.
Parameters:
column_name (str)
Name of the geospatial geometry column to be filtered.
x_center (float)
Value of the longitude of the center. Must be within [-180.0,
180.0]. The minimum allowed value is -180. The maximum allowed
value is 180.
y_center (float)
Value of the latitude of the center. Must be within [-90.0,
90.0]. The minimum allowed value is -90. The maximum allowed
value is 90.
radius (float)
The radius of the circle within which the search will be
performed. Must be a non-zero positive value. It is in meters;
so, for example, a value of '42000' means 42 km. The minimum
allowed value is 0. The maximum allowed value is MAX_INT.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema provided is non-existent, it will be
automatically created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_radius_geometry( self.qualified_name,
view_name, column_name,
x_center, y_center,
radius, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_radius_geometry
[docs] def filter_by_range( self, column_name = None, lower_bound = None,
upper_bound = None, options = {}, view_name = '' ):
"""Calculates which objects from a table have a column that is within
the given bounds. An object from the table identified by input
parameter *table_name* is
added to the view input parameter *view_name* if its column is within
[input parameter *lower_bound*, input parameter *upper_bound*]
(inclusive). The operation is
synchronous. The response provides a count of the number of objects
which passed
the bound filter. Although this functionality can also be accomplished
with the
standard filter function, it is more efficient.
For track objects, the count reflects how many points fall within the
given
bounds (which may not include all the track points of any given track).
Parameters:
column_name (str)
Name of a column on which the operation would be applied.
lower_bound (float)
Value of the lower bound (inclusive).
upper_bound (float)
Value of the upper bound (inclusive).
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_range( self.qualified_name, view_name,
column_name, lower_bound,
upper_bound, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_range
[docs] def filter_by_series( self, track_id = None, target_track_ids = None,
options = {}, view_name = '' ):
"""Filters objects matching all points of the given track (works only
on track type data). It allows users to specify a particular track to
find all
other points in the table that fall within specified ranges (spatial
and
temporal) of all points of the given track. Additionally, the user can
specify
another track to see if the two intersect (or go close to each other
within the
specified ranges). The user also has the flexibility of using different
metrics
for the spatial distance calculation: Euclidean (flat geometry) or
Great Circle
(spherical geometry to approximate the Earth's surface distances). The
filtered
points are stored in a newly created result set. The return value of
the
function is the number of points in the resultant set (view).
This operation is synchronous, meaning that a response will not be
returned
until all the objects are fully available.
Parameters:
track_id (str)
The ID of the track which will act as the filtering points.
Must be an existing track within the given table.
target_track_ids (list of str)
Up to one track ID to intersect with the "filter" track. If any
provided, it must be an valid track ID within the given set.
The user can provide a single element (which will be
automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **spatial_radius** --
A positive number passed as a string representing the radius
of the search area centered around each track point's
geospatial coordinates. The value is interpreted in meters.
Required parameter.
* **time_radius** --
A positive number passed as a string representing the maximum
allowable time difference between the timestamps of a
filtered object and the given track's points. The value is
interpreted in seconds. Required parameter.
* **spatial_distance_metric** --
A string representing the coordinate system to use for the
spatial search criteria. Acceptable values are 'euclidean'
and 'great_circle'. Optional parameter; default is
'euclidean'.
Allowed values are:
* euclidean
* great_circle
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_series( self.qualified_name, view_name,
track_id, target_track_ids, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_series
[docs] def filter_by_string( self, expression = None, mode = None, column_names =
None, options = {}, view_name = '' ):
"""Calculates which objects from a table or view match a string
expression for the given string columns. Setting
*case_sensitive* can modify case sensitivity in matching
for all modes except *search*. For
*search* mode details and limitations, see
`Full Text Search <../../../../concepts/full_text_search/>`__.
Parameters:
expression (str)
The expression with which to filter the table.
mode (str)
The string filtering mode to apply. See below for details.
Allowed values are:
* **search** --
Full text search query with wildcards and boolean operators.
Note that for this mode, no column can be specified in input
parameter *column_names*; all string columns of the table
that have text search enabled will be searched.
* **equals** --
Exact whole-string match (accelerated).
* **contains** --
Partial substring match (not accelerated). If the column is
a string type (non-charN) and the number of records is too
large, it will return 0.
* **starts_with** --
Strings that start with the given expression (not
accelerated). If the column is a string type (non-charN) and
the number of records is too large, it will return 0.
* **regex** --
Full regular expression search (not accelerated). If the
column is a string type (non-charN) and the number of records
is too large, it will return 0.
column_names (list of str)
List of columns on which to apply the filter. Ignored for
*search* mode. The user can provide a single element (which
will be automatically promoted to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **case_sensitive** --
If *false* then string filtering will ignore case. Does not
apply to *search* mode.
Allowed values are:
* true
* false
The default value is 'true'.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_string( self.qualified_name, view_name,
expression, mode, column_names,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_string
[docs] def filter_by_table( self, column_name = None, source_table_name = None,
source_table_column_name = None, options = {},
view_name = '' ):
"""Filters objects in one table based on objects in another table. The
user must specify matching column types from the two tables (i.e. the
target
table from which objects will be filtered and the source table based on
which
the filter will be created); the column names need not be the same. If
a
input parameter *view_name* is specified, then the filtered objects
will then be put in a
newly created view. The operation is synchronous, meaning that a
response will
not be returned until all objects are fully available in the result
view. The
return value contains the count (i.e. the size) of the resulting view.
Parameters:
column_name (str)
Name of the column by whose value the data will be filtered
from the table designated by input parameter *table_name*.
source_table_name (str)
Name of the table whose data will be compared against in the
table called input parameter *table_name*, in
[schema_name.]table_name format, using standard `name
resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. Must
be an existing table.
source_table_column_name (str)
Name of the column in the input parameter *source_table_name*
whose values will be used as the filter for table input
parameter *table_name*. Must be a geospatial geometry column if
in 'spatial' mode; otherwise, Must match the type of the input
parameter *column_name*.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
* **filter_mode** --
String indicating the filter mode, either *in_table* or
*not_in_table*.
Allowed values are:
* in_table
* not_in_table
The default value is 'in_table'.
* **mode** --
Mode - should be either *spatial* or *normal*.
Allowed values are:
* normal
* spatial
The default value is 'normal'.
* **buffer** --
Buffer size, in meters. Only relevant for *spatial* mode.
The default value is '0'.
* **buffer_method** --
Method used to buffer polygons. Only relevant for *spatial*
mode.
Allowed values are:
* **geos** --
Use geos 1 edge per corner algorithm
The default value is 'normal'.
* **max_partition_size** --
Maximum number of points in a partition. Only relevant for
*spatial* mode. The default value is '0'.
* **max_partition_score** --
Maximum number of points * edges in a partition. Only
relevant for *spatial* mode. The default value is '8000000'.
* **x_column_name** --
Name of column containing x value of point being filtered in
*spatial* mode. The default value is 'x'.
* **y_column_name** --
Name of column containing y value of point being filtered in
*spatial* mode. The default value is 'y'.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_table( self.qualified_name, view_name,
column_name, source_table_name,
source_table_column_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_table
[docs] def filter_by_value( self, is_string = None, value = 0, value_str = '',
column_name = None, options = {}, view_name = '' ):
"""Calculates which objects from a table has a particular value for a
particular column. The input parameters provide a way to specify either
a String
or a Double valued column and a desired value for the column on which
the filter
is performed. The operation is synchronous, meaning that a response
will not be
returned until all the objects are fully available. The response
payload
provides the count of the resulting set. A new result view which
satisfies the
input filter restriction specification is also created with a view name
passed
in as part of the input payload. Although this functionality can also
be
accomplished with the standard filter function, it is more efficient.
Parameters:
is_string (bool)
Indicates whether the value being searched for is string or
numeric.
value (float)
The value to search for. The default value is 0.
value_str (str)
The string value to search for. The default value is ''.
column_name (str)
Name of a column on which the filter by value would be applied.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **create_temp_table** --
If *true*, a unique temporary table name will be generated in
the sys_temp schema and used in place of input parameter
*view_name*. This is always allowed even if the caller does
not have permission to create tables. The generated name is
returned in *qualified_view_name*.
Allowed values are:
* true
* false
The default value is 'false'.
* **collection_name** --
[DEPRECATED--please specify the containing schema for the
view as part of input parameter *view_name* and use
:meth:`GPUdb.create_schema` to create the schema if
non-existent] Name of a schema for the newly created view.
If the schema is non-existent, it will be automatically
created.
view_name (str)
If provided, then this will be the name of the view containing
the results, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__ and
meeting `table naming criteria
<../../../../concepts/tables/#table-naming-criteria>`__. Must
not be an already existing table or view. The default value is
''.
Returns:
A read-only GPUdbTable object.
Raises:
GPUdbException --
Upon an error from the server.
"""
view_name = self.__process_view_name( view_name )
response = self.db.filter_by_value( self.qualified_name, view_name,
is_string, value, value_str,
column_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
# Use the qualified version of the output table name from the response, if any
if "qualified_view_name" in response.info:
view_name = response.info[ "qualified_view_name" ]
return self.create_view( view_name, response[ "count" ] )
# end filter_by_value
[docs] def lock_table( self, lock_type = 'status', options = {} ):
"""Manages global access to a table's data. By default a table has a
input parameter *lock_type* of *read_write*, indicating all operations
are permitted. A user may request a *read_only* or a *write_only*
lock, after which only read or write operations, respectively, are
permitted on the table until the lock is removed. When input parameter
*lock_type* is *no_access* then no operations are permitted on the
table. The lock status can be queried by setting input parameter
*lock_type* to *status*.
Parameters:
lock_type (str)
The type of lock being applied to the table. Setting it to
*status* will return the current lock status of the table
without changing it.
Allowed values are:
* **status** --
Show locked status
* **no_access** --
Allow no read/write operations
* **read_only** --
Allow only read operations
* **write_only** --
Allow only write operations
* **read_write** --
Allow all read/write operations
The default value is 'status'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
lock_type (str)
Returns the lock state of the table.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.lock_table( self.qualified_name, lock_type, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end lock_table
[docs] def show_table( self, options = {} ):
"""Retrieves detailed information about a table, view, or schema,
specified in input parameter *table_name*. If the supplied input
parameter *table_name* is a
schema the call can return information about either the schema itself
or the
tables and views it contains. If input parameter *table_name* is empty,
information about
all schemas will be returned.
If the option *get_sizes* is set to
*true*, then the number of records
in each table is returned (in output parameter *sizes* and
output parameter *full_sizes*), along with the total number of objects
across all
requested tables (in output parameter *total_size* and output parameter
*total_full_size*).
For a schema, setting the *show_children* option to
*false* returns only information
about the schema itself; setting *show_children* to
*true* returns a list of tables and
views contained in the schema, along with their corresponding detail.
To retrieve a list of every table, view, and schema in the database,
set
input parameter *table_name* to '*' and *show_children* to
*true*. When doing this, the
returned output parameter *total_size* and output parameter
*total_full_size* will not include the sizes of
non-base tables (e.g., filters, views, joins, etc.).
Parameters:
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **force_synchronous** --
If *true* then the table sizes will wait for read lock before
returning.
Allowed values are:
* true
* false
The default value is 'true'.
* **get_sizes** --
If *true* then the number of records in each table, along
with a cumulative count, will be returned; blank, otherwise.
Allowed values are:
* true
* false
The default value is 'false'.
* **get_cached_sizes** --
If *true* then the number of records in each table, along
with a cumulative count, will be returned; blank, otherwise.
This version will return the sizes cached at rank 0, which
may be stale if there is a multihead insert occuring.
Allowed values are:
* true
* false
The default value is 'false'.
* **show_children** --
If input parameter *table_name* is a schema, then *true* will
return information about the tables and views in the schema,
and *false* will return information about the schema itself.
If input parameter *table_name* is a table or view,
*show_children* must be *false*. If input parameter
*table_name* is empty, then *show_children* must be *true*.
Allowed values are:
* true
* false
The default value is 'true'.
* **no_error_if_not_exists** --
If *false* will return an error if the provided input
parameter *table_name* does not exist. If *true* then it will
return an empty result.
Allowed values are:
* true
* false
The default value is 'false'.
* **get_column_info** --
If *true* then column info (memory usage, etc) will be
returned.
Allowed values are:
* true
* false
The default value is 'false'.
Returns:
The response from the server which is a dict containing the
following entries--
table_name (str)
Value of input parameter *table_name*.
table_names (list of str)
If input parameter *table_name* is a table or view, then the
single element of the array is input parameter *table_name*.
If input parameter *table_name* is a schema and *show_children*
is set to *true*,
then this array is populated with the names of all tables and
views in the given schema;
if *show_children* is *false*,
then this array will only include the schema name itself.
If input parameter *table_name* is an empty string, then the
array contains the names of all tables in the user's default
schema.
table_descriptions (list of lists of str)
List of descriptions for the respective tables in output
parameter *table_names*.
Allowed values are:
* COLLECTION
* JOIN
* LOGICAL_EXTERNAL_TABLE
* LOGICAL_VIEW
* MATERIALIZED_EXTERNAL_TABLE
* MATERIALIZED_VIEW
* MATERIALIZED_VIEW_MEMBER
* MATERIALIZED_VIEW_UNDER_CONSTRUCTION
* REPLICATED
* RESULT_TABLE
* SCHEMA
* VIEW
type_ids (list of str)
Type ids of the respective tables in output parameter
*table_names*.
type_schemas (list of str)
Type schemas of the respective tables in output parameter
*table_names*.
type_labels (list of str)
Type labels of the respective tables in output parameter
*table_names*.
properties (list of dicts of str to lists of str)
Property maps of the respective tables in output parameter
*table_names*.
additional_info (list of dicts of str to str)
Additional information about the respective tables in output
parameter *table_names*.
Allowed values are:
* @INNER_STRUCTURE
sizes (list of longs)
If *get_sizes* is *true*, an array containing the number of
records of each corresponding table in output parameter
*table_names*. Otherwise, an empty array.
full_sizes (list of longs)
If *get_sizes* is *true*, an array containing the number of
records of each corresponding table in output parameter
*table_names* (same values as output parameter *sizes*).
Otherwise, an empty array.
join_sizes (list of floats)
If *get_sizes* is *true*, an array containing the number of
unfiltered records in the cross product of the sub-tables of
each corresponding join-table in output parameter
*table_names*. For simple tables, this number will be the same
as output parameter *sizes*. For join-tables, this value gives
the number of joined-table rows that must be processed by any
aggregate functions operating on the table. Otherwise, (if
*get_sizes* is *false*), an empty array.
total_size (long)
If *get_sizes* is *true*, the sum of the elements of output
parameter *sizes*. Otherwise, -1.
total_full_size (long)
If *get_sizes* is *true*, the sum of the elements of output
parameter *full_sizes* (same value as output parameter
*total_size*). Otherwise, -1.
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.show_table( self.qualified_name, options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end show_table
[docs] def update_records( self, expressions = None, new_values_maps = None,
records_to_insert = [], records_to_insert_str = [],
record_encoding = 'binary', options = {} ):
"""Runs multiple predicate-based updates in a single call. With the
list of given expressions, any matching record's column values will be
updated
as provided in input parameter *new_values_maps*. There is also an
optional 'upsert'
capability where if a particular predicate doesn't match any existing
record,
then a new record can be inserted.
Note that this operation can only be run on an original table and not
on a
result view.
This operation can update primary key values. By default only
'pure primary key' predicates are allowed when updating primary key
values. If
the primary key for a table is the column 'attr1', then the operation
will only
accept predicates of the form: "attr1 == 'foo'" if the attr1 column is
being
updated. For a composite primary key (e.g. columns 'attr1' and
'attr2') then
this operation will only accept predicates of the form:
"(attr1 == 'foo') and (attr2 == 'bar')". Meaning, all primary key
columns
must appear in an equality predicate in the expressions. Furthermore
each
'pure primary key' predicate must be unique within a given request.
These
restrictions can be removed by utilizing some available options through
input parameter *options*.
The *update_on_existing_pk* option specifies the record primary key
collision
policy for tables with a `primary key
<../../../../concepts/tables/#primary-keys>`__, while
*ignore_existing_pk* specifies the record primary key collision
error-suppression policy when those collisions result in the update
being rejected. Both are
ignored on tables with no primary key.
Parameters:
expressions (list of str)
A list of the actual predicates, one for each update; format
should follow the guidelines :meth:`here <GPUdb.filter>`.
new_values_maps (list of dicts of str to str and/or None)
List of new values for the matching records. Each element is a
map with
(key, value) pairs where the keys are the names of the columns
whose values are to be updated; the
values are the new values. The number of elements in the list
should match the length of input parameter *expressions*.
records_to_insert (list of str)
An *optional* list of new binary-avro encoded records to
insert, one for each
update. If one of input parameter *expressions* does not yield
a matching record to be updated, then the
corresponding element from this list will be added to the
table. The default value is an empty list ( [] ).
records_to_insert_str (list of str)
An optional list of JSON encoded objects to insert, one for
each update, to be added if the particular update did not match
any objects. The default value is an empty list ( [] ).
record_encoding (str)
Identifies which of input parameter *records_to_insert* and
input parameter *records_to_insert_str* should be used.
Allowed values are:
* binary
* json
The default value is 'binary'.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Allowed keys are:
* **global_expression** --
An optional global expression to reduce the search space of
the predicates listed in input parameter *expressions*. The
default value is ''.
* **bypass_safety_checks** --
When set to *true*,
all predicates are available for primary key updates. Keep
in mind that it is possible to destroy
data in this case, since a single predicate may match
multiple objects (potentially all of records
of a table), and then updating all of those records to have
the same primary key will, due to the
primary key uniqueness constraints, effectively delete all
but one of those updated records.
Allowed values are:
* true
* false
The default value is 'false'.
* **update_on_existing_pk** --
Specifies the record collision policy for updating a table
with a
`primary key <../../../../concepts/tables/#primary-keys>`__.
There are two ways that a record collision can
occur.
The first is an "update collision", which happens when the
update changes the value of the updated
record's primary key, and that new primary key already exists
as the primary key of another record
in the table.
The second is an "insert collision", which occurs when a
given filter in input parameter *expressions*
finds no records to update, and the alternate insert record
given in input parameter *records_to_insert* (or
input parameter *records_to_insert_str*) contains a primary
key matching that of an existing record in the
table.
If *update_on_existing_pk* is set to
*true*, "update collisions" will result in the
existing record collided into being removed and the record
updated with values specified in
input parameter *new_values_maps* taking its place; "insert
collisions" will result in the collided-into
record being updated with the values in input parameter
*records_to_insert*/input parameter *records_to_insert_str*
(if given).
If set to *false*, the existing collided-into
record will remain unchanged, while the update will be
rejected and the error handled as determined
by *ignore_existing_pk*. If the specified table does not
have a primary key,
then this option has no effect.
Allowed values are:
* **true** --
Overwrite the collided-into record when updating a
record's primary key or inserting an alternate record
causes a primary key collision between the
record being updated/inserted and another existing record
in the table
* **false** --
Reject updates which cause primary key collisions
between the record being updated/inserted and an existing
record in the table
The default value is 'false'.
* **ignore_existing_pk** --
Specifies the record collision error-suppression policy for
updating a table with a `primary key
<../../../../concepts/tables/#primary-keys>`__, only used
when primary
key record collisions are rejected (*update_on_existing_pk*
is
*false*). If set to
*true*, any record update that is rejected for
resulting in a primary key collision with an existing table
record will be ignored with no error
generated. If *false*, the rejection of any update
for resulting in a primary key collision will cause an error
to be reported. If the specified table
does not have a primary key or if *update_on_existing_pk* is
*true*, then this option has no effect.
Allowed values are:
* **true** --
Ignore updates that result in primary key collisions with
existing records
* **false** --
Treat as errors any updates that result in primary key
collisions with existing records
The default value is 'false'.
* **update_partition** --
Force qualifying records to be deleted and reinserted so
their partition membership will be reevaluated.
Allowed values are:
* true
* false
The default value is 'false'.
* **truncate_strings** --
If set to *true*, any strings which are too long for their
charN string fields will be truncated to fit.
Allowed values are:
* true
* false
The default value is 'false'.
* **use_expressions_in_new_values_maps** --
When set to *true*,
all new values in input parameter *new_values_maps* are
considered as expression values. When set to
*false*, all new values in
input parameter *new_values_maps* are considered as
constants. NOTE: When
*true*, string constants will need
to be quoted to avoid being evaluated as expressions.
Allowed values are:
* true
* false
The default value is 'false'.
* **record_id** --
ID of a single record to be updated (returned in the call to
:meth:`GPUdb.insert_records` or
:meth:`GPUdb.get_records_from_collection`).
Returns:
The response from the server which is a dict containing the
following entries--
count_updated (long)
Total number of records updated.
counts_updated (list of longs)
Total number of records updated per predicate in input
parameter *expressions*.
count_inserted (long)
Total number of records inserted (due to expressions not
matching any existing records).
counts_inserted (list of longs)
Total number of records inserted per predicate in input
parameter *expressions* (will be either 0 or 1 for each
expression).
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.update_records( self.qualified_name, expressions,
new_values_maps, records_to_insert,
records_to_insert_str,
record_encoding, options, record_type
= self.record_type )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end update_records
[docs] def update_records_by_series( self, world_table_name = None, view_name = '',
reserved = [], options = {} ):
"""Updates the view specified by input parameter *table_name* to include
full
series (track) information from the input parameter *world_table_name*
for the series
(tracks) present in the input parameter *view_name*.
Parameters:
world_table_name (str)
Name of the table containing the complete series (track)
information, in [schema_name.]table_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__.
view_name (str)
Name of the view containing the series (tracks) which have to
be updated, in [schema_name.]view_name format, using standard
`name resolution rules
<../../../../concepts/tables/#table-name-resolution>`__. The
default value is ''.
reserved (list of str)
The default value is an empty list ( [] ). The user can
provide a single element (which will be automatically promoted
to a list internally) or a list.
options (dict of str to str)
Optional parameters. The default value is an empty dict ( {}
).
Returns:
The response from the server which is a dict containing the
following entries--
count (int)
info (dict of str to str)
Additional information.
Raises:
GPUdbException --
Upon an error from the server.
"""
response = self.db.update_records_by_series( self.qualified_name,
world_table_name,
view_name, reserved,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end update_records_by_series
def visualize_image_labels( self, x_column_name = None, y_column_name =
None, x_offset = '', y_offset = '', text_string
= None, font = '', text_color = '', text_angle =
'', text_scale = '', draw_box = '', draw_leader
= '', line_width = '', line_color = '',
fill_color = '', leader_x_column_name = '',
leader_y_column_name = '', filter = '', min_x =
None, max_x = None, min_y = None, max_y = None,
width = None, height = None, projection =
'PLATE_CARREE', options = {} ):
response = self.db.visualize_image_labels( self.qualified_name,
x_column_name, y_column_name,
x_offset, y_offset,
text_string, font,
text_color, text_angle,
text_scale, draw_box,
draw_leader, line_width,
line_color, fill_color,
leader_x_column_name,
leader_y_column_name, filter,
min_x, max_x, min_y, max_y,
width, height, projection,
options )
if not response.is_ok():
raise GPUdbException( response.get_error_msg() )
return response
# end visualize_image_labels
# end class GPUdbTable
# ---------------------------------------------------------------------------
# GPUdbTableIterator - Iterator Class to iterate over records in a table
# ---------------------------------------------------------------------------
class GPUdbTableIterator( Iterator ):
"""Iterates over a chunk of records of a given table. Once the initial
chunk of records have been iterated over, a new iterator object must
be instantiated since there is no way to guarantee that getting another
chunk would yield the 'next' set of records without duplicates or skipping
over records. GPUdb does not guarantee any order or returned records via
/get/records/\*.
"""
def __init__( self, table, offset = 0, limit = 10000, db = None ):
"""Initiate the iterator with the given table, offset, and limit.
Parameters:
table (GPUdbTable)
A GPUdbTable object or a name of a table
offset (int)
An integer value greater than or equal to 0.
limit (int)
An integer value greater than or equal to 1.
db (GPUdb)
Optional GPUdb object
"""
# Validate and set the offset
if not isinstance( offset, (int, long) ) or (offset < 0):
raise GPUdbException( "Offset must be >= 0; given {0}"
"".format( offset ) )
self.offset = offset
if not isinstance( limit, (int, long) ) or (limit < 1):
raise GPUdbException( "Limit must be >= 1; given {0}"
"".format( limit ) )
self.limit = limit
# Save the table name and the GPUdb object
if isinstance( table, GPUdbTable ):
self.table = table
elif isinstance( table, (str, unicode) ):
if not isinstance( db, GPUdb ):
raise GPUdbException( "Argument 'db' must be a GPUdb object "
"if 'table' is the table name; given "
"{0}".format( type( db ) ) )
# Create the table object
self.table = GPUdbTable( None, table, db = db )
else:
raise GPUdbException( "Argument 'table' must be a GPUdbTable object"
" or a string; given {0}".format( table ) )
self.cursor = 0
# Call /get/records to get the batch of records
records = self.table.get_records( offset = self.offset,
limit = self.limit )
self.records = records
# end __init__
def __iter__( self ):
return self
def next( self ):
return self.__next__()
# end next
def __next__( self ): # For python3
if (self.cursor == len( self.records ) ):
raise StopIteration()
cursor = self.cursor
self.cursor += 1
return self.records[ cursor ]
# end __next__
# end class GPUdbTableIterator
# ---------------------------------------------------------------------------
# GPUdbTableOptions - Class to handle GPUdb table creation options
# ---------------------------------------------------------------------------
[docs]class GPUdbTableOptions(object):
"""
Encapsulates the various options used to create a table. The same object
can be used on multiple tables and state modifications are chained together:
::
opts = GPUdbTableOptions.default().collection_name('coll_name')
table1 = Table( None, options = opts )
table2 = Table( None, options = opts.is_replicated( True ) )
"""
__no_error_if_exists = "no_error_if_exists"
__collection_name = "collection_name"
__is_collection = "is_collection"
__disallow_homogeneous_tables = "disallow_homogeneous_tables"
__is_replicated = "is_replicated"
__foreign_keys = "foreign_keys"
__foreign_shard_key = "foreign_shard_key"
__partition_type = "partition_type"
__partition_keys = "partition_keys"
__partition_definitions = "partition_definitions"
__is_automatic_partition = "is_automatic_partition"
__ttl = "ttl"
__chunk_size = "chunk_size"
__chunk_column_max_memory = "chunk_column_max_memory"
__chunk_max_memory = "chunk_max_memory"
__strategy_definition = "strategy_definition"
__is_result_table = "is_result_table"
__create_temp_table = "create_temp_table"
_supported_options = [ __no_error_if_exists,
__collection_name,
__is_collection,
__disallow_homogeneous_tables,
__is_replicated,
__foreign_keys,
__foreign_shard_key,
__partition_type,
__partition_keys,
__partition_definitions,
__is_automatic_partition,
__ttl,
__chunk_size,
__strategy_definition,
__is_result_table,
__create_temp_table
]
@staticmethod
def default():
return GPUdbTableOptions()
def __init__(self, _dict = None):
"""Create a default set of options for create_table().
Parameters:
_dict (dict)
Optional dictionary with options already loaded.
Returns:
A GPUdbTableOptions object.
"""
# Set default values
self._no_error_if_exists = False
self._collection_name = None
self._is_collection = False
self._disallow_homogeneous_tables = False
self._is_replicated = False
self._foreign_keys = None
self._foreign_shard_key = None
self._partition_type = None
self._partition_keys = None
self._partition_definitions = None
self._is_automatic_partition = None
self._ttl = None
self._chunk_size = None
self._strategy_definition = None
self._is_result_table = None
self._create_temp_table = False
if (_dict is None):
return # nothing to do
if not isinstance( _dict, dict ):
raise GPUdbException( "Argument '_dict' must be a dict; given '%s'."
% type( _dict ) )
# Else,_dict is a dict; extract options from within it
# Check for invalid options
unsupported_options = set( _dict.keys() ).difference( self._supported_options )
if unsupported_options:
raise GPUdbException( "Invalid options: %s" % unsupported_options )
# Extract and save each option
for (key, val) in _dict.items():
getattr( self, key )( val )
# end __init__
[docs] def as_json(self):
"""Return the options as a JSON for using directly in create_table()"""
result = {}
if self._is_replicated is not None:
result[ self.__is_replicated ] = "true" if self._is_replicated else "false"
if self._is_result_table is not None:
result[ self.__is_result_table ] = "true" if self._is_result_table else "false"
if self._collection_name is not None:
result[ self.__collection_name ] = str( self._collection_name )
if self._no_error_if_exists is not None:
result[ self.__no_error_if_exists ] = "true" if self._no_error_if_exists else "false"
if self._chunk_size is not None:
result[ self.__chunk_size ] = str( self._chunk_size )
if self._is_collection is not None:
result[ self.__is_collection ] = "true" if self._is_collection else "false"
if self._foreign_keys is not None:
result[ self.__foreign_keys ] = str( self._foreign_keys )
if self._foreign_shard_key is not None:
result[ self.__foreign_shard_key ] = str( self._foreign_shard_key )
if self._partition_type is not None:
result[ self.__partition_type ] = str( self._partition_type )
if self._partition_keys is not None:
result[ self.__partition_keys ] = str( self._partition_keys )
if self._partition_definitions is not None:
result[ self.__partition_definitions ] = str( self._partition_definitions )
if self._is_automatic_partition is not None:
result[ self.__is_automatic_partition ] = "true" if self._is_automatic_partition else "false"
if self._ttl is not None:
result[ self.__ttl ] = str( self._ttl )
if self._strategy_definition is not None:
result[ self.__strategy_definition ] = str( self._strategy_definition )
if self._disallow_homogeneous_tables is not None:
result[ self.__disallow_homogeneous_tables ] = "true" if self._disallow_homogeneous_tables else "false"
if self._create_temp_table is not None and self._create_temp_table:
result[ self.__create_temp_table ] = "true"
return result
# end as_json
[docs] def as_dict(self):
"""Return the options as a dict for using directly in create_table()"""
return self.as_json()
# end as_dict
def no_error_if_exists(self, val):
if isinstance( val, bool ):
self._no_error_if_exists = val
elif val.lower() in ["true", "false"]:
self._no_error_if_exists = True if (val == "true") else False
else:
raise GPUdbException( "Value for 'no_error_if_exists' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end no_error_if_exists
[docs] def collection_name(self, val):
"""When creating a new table, sets the name of the collection which is
to contain the table. If the collection specified is non-existent, the
collection will be automatically created. If not specified, the newly
created table will be a top-level table.
"""
if (val and not isinstance( val, basestring )):
raise GPUdbException( "'collection_name' must be a string value; given '%s'" % val )
self._collection_name = val
return self
# end collection_name
[docs] def is_collection(self, val):
"""When creating a new entity, sets whether the entity is a collection
or a table (the default).
"""
if isinstance( val, bool ):
self._is_collection = val
elif val.lower() in ["true", "false"]:
self._is_collection = True if (val == "true") else False
else:
raise GPUdbException( "Value for 'is_collection' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end is_collection
[docs] def disallow_homogeneous_tables(self, val):
"""When creating a new collection, sets whether the collection prohibits
containment of multiple tables of exactly the same type.
"""
if isinstance( val, bool ):
self._disallow_homogeneous_tables = val
elif val.lower() in ["true", "false"]:
self._disallow_homogeneous_tables = True if (val == "true") else False
else:
raise GPUdbException( "Value for 'disallow_homogeneous_tables' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end disallow_homogeneous_tables
[docs] def is_replicated(self, val):
"""When creating a new table, sets whether the table is replicated or
or not (the default).
"""
if isinstance( val, bool ):
self._is_replicated = val
elif val.lower() in ["true", "false"]:
self._is_replicated = True if (val == "true") else False
else:
raise GPUdbException( "Value for 'is_replicated' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end is_replicated
[docs] def is_result_table(self, val):
"""When creating a new table, sets whether the table is an in-memory
table or not (the default). An in-memory cannot contain *store-only*,
*text-searchable*, or unrestricted length string columns; and it will
not be retained if the server is restarted.
"""
if isinstance( val, bool ):
self._is_result_table = val
elif val.lower() in ["true", "false"]:
self._is_result_table = True if (val == "true") else False
else:
raise GPUdbException( "Value for 'is_result_table' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end is_result_table
[docs] def is_automatic_partition(self, val):
"""When creating a new table, sets whether the a new partition will be
created for values which don't fall into any existing partition.
"""
if isinstance( val, bool ):
self._is_automatic_partition = val
elif val.lower() in ["true", "false"]:
self._is_automatic_partition = True if (val == "true") else False
else:
raise GPUdbException( "Value for 'is_automatic_partition' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end is_automatic_partition
def foreign_keys(self, val):
self._foreign_keys = val
return self
# end foreign_keys
def foreign_shard_key(self, val):
self._foreign_shard_key = val
return self
# end foreign_shard_key
def partition_type(self, val):
self._partition_type = val
return self
# end partition_type
def partition_keys(self, val):
self._partition_keys = val
return self
# end partition_keys
def partition_definitions(self, val):
self._partition_definitions = val
return self
# end partition_definitions
def ttl(self, val):
self._ttl = val
return self
# end ttl
def chunk_size(self, val):
self._chunk_size = val
return self
# end chunk_size
def strategy_definition(self, val):
self._strategy_definition = val
return self
# end strategy_definition
def create_temp_table(self, val):
if isinstance( val, bool ):
self._create_temp_table = val
elif val.lower() in ["true", "false"]:
self._create_temp_table = True if (val.lower() == "true") else False
else:
raise GPUdbException( "Value for 'create_temp_table' must be "
"boolean or one of ['true', 'false']; "
"given " + repr( val ) )
return self
# end create_temp_table
# end class GPUdbTableOptions