Create Data Sources

Copy-paste examples of how to create data sources with SQL

Several authentication schemes across multiple providers are supported. For a detailed overview of all of the provider-specific options, see the SQL documentation.

Azure BLOB

Credential
1
2
3
4
5
6
7
CREATE DATA SOURCE azure_ds
LOCATION = 'AZURE'
WITH OPTIONS
(
    CREDENTIAL = 'azure_cred',
    CONTAINER NAME = 'samplecontainer'
)
Public (No Auth)
1
2
3
4
5
6
7
CREATE DATA SOURCE azure_ds
LOCATION = 'AZURE'
USER = 'sampleacc'
WITH OPTIONS
(
    CONTAINER NAME = 'samplecontainer'
)
Password
1
2
3
4
5
6
7
8
CREATE DATA SOURCE azure_ds
LOCATION = 'AZURE'
USER = 'sampleacc'
PASSWORD = 'foobaz123'
WITH OPTIONS
(
    CONTAINER NAME = 'samplecontainer'
)
SAS Token
1
2
3
4
5
6
7
8
CREATE DATA SOURCE azure_ds
LOCATION = 'AZURE'
USER = 'sampleacc'
WITH OPTIONS
(
    CONTAINER NAME = 'samplecontainer',
    SAS TOKEN = 'sv=2015-07-08&sr=b&sig=39Up0JzHkxhUlhFEjEH9673DJxe7w6clRCg0V6lCgSo%3D&se=2016-10-18T21%A51%A337Z&sp=rcw'
)
Active Directory
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
CREATE DATA SOURCE azure_ds
LOCATION = 'AZURE'
USER = 'jdoe'
PASSWORD = 'foobaz123'
WITH OPTIONS
(
    STORAGE ACCOUNT NAME = 'sampelacc',
    CONTAINER NAME = 'samplecontainer',
    TENANT ID = 'x0xxx10-00x0-0x01-0xxx-x0x0x01xx100'
)

CData

Credential
1
2
3
CREATE DATA SOURCE cdata_ds
LOCATION = 'jdbc:postgresql:Server=my.server.com;Port=5432;Database=mydb'
WITH OPTIONS (CREDENTIAL = 'cdata_cred')
Password in URL
1
2
CREATE DATA SOURCE cdata_ds
LOCATION = 'jdbc:postgresql:Server=my.server.com;Port=5432;Database=mydb;User=auser;Password=Passw0rd!'
Password as Parameter
1
2
3
4
CREATE DATA SOURCE cdata_ds
LOCATION = 'jdbc:postgresql:Server=my.server.com;Port=5432;Database=mydb'
USER = 'auser'
PASSWORD = 'Passw0rd!'

Google Cloud Storage

Credential
1
2
3
4
5
6
7
CREATE DATA SOURCE gcs_ds
LOCATION = 'GCS'
WITH OPTIONS
(
      CREDENTIAL = 'gcs_cred',
      GCS_BUCKET_NAME = 'gcs-public'
)
Public (No Auth)
1
2
3
4
5
6
CREATE DATA SOURCE gcs_ds
LOCATION = 'GCS'
WITH OPTIONS
(
      GCS_BUCKET_NAME = 'gcs-public'
)
User ID & Key
1
2
3
4
5
6
7
8
CREATE DATA SOURCE gcs_ds
LOCATION = 'GCS'
USER = 'auser@auser.iam.gserviceaccount.com'
PASSWORD = '-----BEGIN PRIVATE KEY-----\nABCDEFG=\n-----END PRIVATE KEY-----\n'
WITH OPTIONS
(
      GCS_BUCKET_NAME = 'gcs-public'
)
JSON Key
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
CREATE DATA SOURCE gcs_ds
LOCATION = 'GCS'
WITH OPTIONS
(
      GCS_SERVICE_ACCOUNT_KEYS = '
      {
            "type": "service_account",
            "project_id": "auser",
            "private_key_id": "abcdef1234567890",
            "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEFG=\n-----END PRIVATE KEY-----\n",
            "client_email": "auser@auser.iam.gserviceaccount.com",
            "client_id": "1234567890",
            "auth_uri": "https://accounts.google.com/o/oauth2/auth",
            "token_uri": "https://oauth2.googleapis.com/token",
            "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
            "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/auser%40auser.iam.gserviceaccount.com"
      }',
      GCS_BUCKET_NAME = 'gcs-public'
)

HDFS

Credential
1
2
3
4
5
6
CREATE DATA SOURCE hdfs_ds
LOCATION = 'HDFS://example.com:8020'
WITH OPTIONS
(
    CREDENTIAL = 'hdfs_cred'
)
Password
1
2
3
4
CREATE DATA SOURCE hdfs_ds
LOCATION = 'HDFS://example.com:8020'
USER = 'jdoe'
PASSWORD = 'foobaz123'
Kerberos Keytab
1
2
3
4
5
6
7
CREATE DATA SOURCE hdfs_ds
LOCATION = 'HDFS://example.com:8020'
USER = 'jdoe'
WITH OPTIONS
(
    KERBEROS KEYTAB = '/path/to/jdoe.keytab'
)
Kerberos Token
1
2
3
4
5
6
7
CREATE DATA SOURCE hdfs_ds
LOCATION = 'HDFS://example.com:8020'
USER = 'jdoe'
WITH OPTIONS
(
    USE KERBEROS = true
)

JDBC

Credential
1
2
3
4
5
6
7
8
CREATE DATA SOURCE jdbc_ds
LOCATION = 'jdbc:postgresql://localhost:5432/ki_home'
WITH OPTIONS
(
      CREDENTIAL = 'jdbc_cred',
      JDBC_DRIVER_CLASS_NAME = 'org.postgresql.Driver',
      JDBC_DRIVER_JAR_PATH = 'kifs://drivers/postgresql.jar'
)
Password
1
2
3
4
5
6
7
8
9
CREATE DATA SOURCE jdbc_ds
LOCATION = 'jdbc:postgresql://localhost:5432/ki_home'
USER = 'auser'
PASSWORD = 'Passw0rd!'
WITH OPTIONS
(
      JDBC_DRIVER_CLASS_NAME = 'org.postgresql.Driver',
      JDBC_DRIVER_JAR_PATH = 'kifs://drivers/postgresql.jar'
)

Kafka (Apache)

Credential
1
2
3
4
5
6
7
CREATE DATA SOURCE kafka_ds
LOCATION = 'KAFKA://example.com:9092'
WITH OPTIONS
(
    KAFKA_TOPIC_NAME = 'sample',
    CREDENTIAL = 'kafka_cred'
)
Public (No Auth)
1
2
3
4
5
6
CREATE DATA SOURCE kafka_ds
LOCATION = 'KAFKA://example.com:9092'
WITH OPTIONS
(
    KAFKA_TOPIC_NAME = 'sample'
)

Note

Creating an authenticated Kafka data source requires creating a corresponding credential object to store the Kafka credentials and then referencing that object when creating the data source. See Kafka (Apache) for examples.

Kafka (Confluent)

Credential
1
2
3
4
5
6
7
CREATE DATA SOURCE confluent_ds
LOCATION = 'CONFLUENT://example.com:9092'
WITH OPTIONS
(
    KAFKA_TOPIC_NAME = 'sample',
    CREDENTIAL = 'confluent_cred'
)
Public (No Auth)
1
2
3
4
5
6
CREATE DATA SOURCE confluent_ds
LOCATION = 'CONFLUENT://example.com:9092'
WITH OPTIONS
(
    KAFKA_TOPIC_NAME = 'sample'
)

Note

Creating an authenticated Confluent data source requires creating a corresponding credential object to store the Confluent credentials and then referencing that object when creating the data source. See Kafka (Confluent) for examples.

S3 (Amazon)

Credential
1
2
3
4
5
6
7
8
CREATE DATA SOURCE s3_ds
LOCATION = 'S3'
WITH OPTIONS
(
    CREDENTIAL = 's3_cred',
    BUCKET NAME = 'samplebucket',
    REGION = 'us-east-2'
)
Public (No Auth)
1
2
3
4
5
6
7
CREATE OR REPLACE DATA SOURCE s3_ds
LOCATION = 'S3'
WITH OPTIONS
(
    BUCKET NAME = 'quickstartpublic',
    REGION = 'us-west-1'
)
Access Key
1
2
3
4
5
6
7
8
9
CREATE DATA SOURCE s3_ds
LOCATION = 'S3'
USER = 'AKIAIOSFODNN7EXAMPLE'
PASSWORD = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
WITH OPTIONS
(
    BUCKET NAME = 'samplebucket',
    REGION = 'us-east-2'
)
IAM Role
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
CREATE DATA SOURCE s3_ds
LOCATION = 'S3'
USER = 'AKIAIOSFODNN7EXAMPLE'
PASSWORD = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
WITH OPTIONS
(
    BUCKET NAME = 'samplebucket',
    REGION = 'us-east-2',
    S3_AWS_ROLE_ARN = 'arn:aws:iam::123456789012:user/JohnDoe'
)