Loading Data

Copy-paste examples of loading data with SQL

LOAD INTO

Single & Multiple Files

Single File
1
2
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.csv'
Multiple Files by List
1
2
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.csv', 'kifs://data/products.kh.csv'

Data Sources

Batch Load
1
2
3
LOAD DATA INTO example.product
FROM FILE PATHS 'products.csv'
WITH OPTIONS (DATA SOURCE = 'product_ds')
Streaming Load
1
2
3
LOAD DATA INTO example.orders
FORMAT JSON
WITH OPTIONS (DATA SOURCE = 'kafka_ds', SUBSCRIBE = TRUE)
JDBC Query Load
1
2
3
LOAD INTO example.employee_dept2
FROM REMOTE QUERY 'SELECT * FROM example.employee WHERE dept_id = 2'
WITH OPTIONS (DATA SOURCE = 'example.jdbc_ds')

Delimited Text Files

CSV
1
2
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.nh.csv'
Forced Format
1
2
3
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.nh.dat'
FORMAT TEXT
No Header
1
2
3
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.nh.csv'
FORMAT TEXT (INCLUDES HEADER = FALSE)
Columns Mapped by Name
1
2
3
LOAD DATA INTO example.product_name_stock
FROM FILE PATHS 'kifs://data/products.title-case.csv'
WITH OPTIONS (FIELDS MAPPED BY NAME(ID, Name, Stock))
Columns Ignored by Position
1
2
3
LOAD DATA INTO example.product_name_stock
FROM FILE PATHS 'kifs://data/products.title-case.csv'
WITH OPTIONS (FIELDS IGNORED BY POSITION(2, 4))
Text Options
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.ssv'
FORMAT TEXT
(
    COMMENT = '--',
    DELIMITER = ';',
    ESCAPE = '`',
    INCLUDES HEADER = TRUE,
    NULL = '<null>',
    QUOTE = ''''
)

JSON/GeoJSON Files

JSON File
1
2
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.json'
GeoJSON File
1
2
LOAD DATA INTO example.events
FROM FILE PATHS 'kifs://data/events.json'
Forced Format
1
2
3
LOAD DATA INTO example.product
FROM FILE PATHS 'kifs://data/products.dat'
FORMAT JSON
Properties Mapped by Name
1
2
3
LOAD DATA INTO example.product_name_stock
FROM FILE PATHS 'kifs://data/products.json'
WITH OPTIONS (FIELDS MAPPED BY NAME(ID, Name, Stock))
Properties Ignored by Name
1
2
3
LOAD DATA INTO example.product_name_stock
FROM FILE PATHS 'kifs://data/products.json'
WITH OPTIONS (FIELDS IGNORED BY NAME(Category, Description))

Parquet Files

Implicit Format
1
2
LOAD DATA INTO example.ext_employee
FROM FILE PATHS 'kifs://data/employee.parquet'
Forced Format
1
2
3
CREATE EXTERNAL TABLE example.ext_employee
FILE PATHS 'kifs://data/employee.dat'
FORMAT PARQUET
Properties Mapped By Name
1
2
3
LOAD DATA INTO example.product_name_stock
FROM FILE PATHS 'kifs://data/products.parquet'
WITH OPTIONS (FIELDS MAPPED BY NAME(ID, Name, Stock))
Properties Ignored By Name
1
2
3
LOAD DATA INTO example.product_name_stock
FROM FILE PATHS 'kifs://data/products.parquet'
WITH OPTIONS (FIELDS IGNORED BY NAME(Category, Description))

Shapefile Files

Implicit Format
1
2
LOAD DATA INTO example.events
FROM FILE PATHS 'kifs://data/events.shp'
Forced Format
1
2
3
LOAD DATA INTO example.events
FROM FILE PATHS 'kifs://data/events.shp'
FORMAT SHAPEFILE
Properties Mapped by Name
1
2
3
LOAD DATA INTO example.events
FROM FILE PATHS 'kifs://data/events.shp'
WITH OPTIONS (FIELDS MAPPED BY NAME(ID, kiGeometry))
Properties Ignored by Name
1
2
3
LOAD DATA INTO example.events
FROM FILE PATHS 'kifs://data/events.shp'
WITH OPTIONS (FIELDS IGNORED BY NAME(type))

Primary/Shard Keys

Primary Key
1
2
3
LOAD INTO example.employee
FROM FILE PATHS 'kifs://data/employee.parquet'
WITH OPTIONS (PRIMARY KEY = (id))
Primary & Shard Key
1
2
3
4
5
6
7
LOAD INTO example.employee
FROM FILE PATHS 'kifs://data/employee.csv'
WITH OPTIONS
(
    PRIMARY KEY = (id, dept_id),
    SHARD KEY = (id)
)

External Tables

Refresh on Start
1
2
3
4
5
6
7
CREATE EXTERNAL TABLE example.ext_product
FILE PATHS 'products.csv'
WITH OPTIONS
(
    DATA SOURCE = 'product_ds',
    REFRESH ON START = TRUE
)
Change Data Capture Subscription
1
2
3
4
5
6
7
8
CREATE EXTERNAL TABLE example.ext_product
FILE PATHS 'products.csv'
WITH OPTIONS
(
    DATA SOURCE = 'product_ds',
    SUBSCRIBE = TRUE,
    POLL_INTERVAL = '60'
)
JDBC Data Source Remote Query
1
2
3
4
5
6
7
CREATE EXTERNAL TABLE example.ext_employee_dept2
REMOTE QUERY 'SELECT * FROM example.ext_employee WHERE dept_id = 2'
WITH OPTIONS
(
    DATA SOURCE = 'example.jdbc_ds',
    REFRESH ON START = TRUE
)